diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-1.c new file mode 100644 index 0000000000000000000000000000000000000000..ab5f33014924edf37e0d1f162553d14004b778f2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-1.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } + +/* This test case is partially extracted from case + gcc.dg/vect/vect-avg-16.c, it's to verify we don't + cost a store with vec_to_scalar when we shouldn't. */ + +void +test (signed char *restrict a, signed char *restrict b, signed char *restrict c, + int n) +{ + for (int j = 0; j < n; ++j) + { + for (int i = 0; i < 16; ++i) + a[i] = (b[i] + c[i]) >> 1; + a += 20; + b += 20; + c += 20; + } +} + +/* { dg-final { scan-tree-dump-times "vec_to_scalar" 0 "vect" } } */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 048c14d291c4a65cc37bf278947ab2110dcdc0f2..3d01168080aed291a4011f0590bc3d0e91218498 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -964,7 +964,9 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies, vec_load_store_type vls_type, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); + gcc_assert (memory_access_type != VMAT_GATHER_SCATTER + && memory_access_type != VMAT_ELEMENTWISE + && memory_access_type != VMAT_STRIDED_SLP); unsigned int inside_cost = 0, prologue_cost = 0; stmt_vec_info first_stmt_info = stmt_info; bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); @@ -1010,29 +1012,9 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies, group_size); } - tree vectype = STMT_VINFO_VECTYPE (stmt_info); /* Costs of the stores. */ - if (memory_access_type == VMAT_ELEMENTWISE) - { - unsigned int assumed_nunits = vect_nunits_for_cost (vectype); - /* N scalar stores plus extracting the elements. */ - inside_cost += record_stmt_cost (cost_vec, - ncopies * assumed_nunits, - scalar_store, stmt_info, 0, vect_body); - } - else - vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme, - misalignment, &inside_cost, cost_vec); - - if (memory_access_type == VMAT_ELEMENTWISE - || memory_access_type == VMAT_STRIDED_SLP) - { - /* N scalar stores plus extracting the elements. */ - unsigned int assumed_nunits = vect_nunits_for_cost (vectype); - inside_cost += record_stmt_cost (cost_vec, - ncopies * assumed_nunits, - vec_to_scalar, stmt_info, 0, vect_body); - } + vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme, + misalignment, &inside_cost, cost_vec); /* When vectorizing a store into the function result assign a penalty if the function returns in a multi-register location. @@ -8416,6 +8398,18 @@ vectorizable_store (vec_info *vinfo, "Vectorizing an unaligned access.\n"); STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; + + /* As function vect_transform_stmt shows, for interleaving stores + the whole chain is vectorized when the last store in the chain + is reached, the other stores in the group are skipped. So we + want to only cost the last one here, but it's not trivial to + get the last, as it's equivalent to use the first one for + costing, use the first one instead. */ + if (grouped_store + && !slp + && first_stmt_info != stmt_info + && memory_access_type == VMAT_ELEMENTWISE) + return true; } gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); @@ -8488,14 +8482,7 @@ vectorizable_store (vec_info *vinfo, if (memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_STRIDED_SLP) { - if (costing_p) - { - vect_model_store_cost (vinfo, stmt_info, ncopies, memory_access_type, - alignment_support_scheme, misalignment, - vls_type, slp_node, cost_vec); - return true; - } - + unsigned inside_cost = 0, prologue_cost = 0; gimple_stmt_iterator incr_gsi; bool insert_after; gimple *incr; @@ -8503,7 +8490,7 @@ vectorizable_store (vec_info *vinfo, tree ivstep; tree running_off; tree stride_base, stride_step, alias_off; - tree vec_oprnd; + tree vec_oprnd = NULL_TREE; tree dr_offset; unsigned int g; /* Checked by get_load_store_type. */ @@ -8609,26 +8596,30 @@ vectorizable_store (vec_info *vinfo, lnel = const_nunits; ltype = vectype; lvectype = vectype; + alignment_support_scheme = dr_align; + misalignment = mis_align; } } ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } - ivstep = stride_step; - ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, - build_int_cst (TREE_TYPE (ivstep), vf)); + if (!costing_p) + { + ivstep = stride_step; + ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, + build_int_cst (TREE_TYPE (ivstep), vf)); - standard_iv_increment_position (loop, &incr_gsi, &insert_after); + standard_iv_increment_position (loop, &incr_gsi, &insert_after); - stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); - ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); - create_iv (stride_base, PLUS_EXPR, ivstep, NULL, - loop, &incr_gsi, insert_after, - &offvar, NULL); - incr = gsi_stmt (incr_gsi); + stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); + ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); + create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi, + insert_after, &offvar, NULL); + incr = gsi_stmt (incr_gsi); - stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); + stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); + } alias_off = build_int_cst (ref_type, 0); stmt_vec_info next_stmt_info = first_stmt_info; @@ -8636,39 +8627,76 @@ vectorizable_store (vec_info *vinfo, for (g = 0; g < group_size; g++) { running_off = offvar; - if (g) + if (!costing_p) { - tree size = TYPE_SIZE_UNIT (ltype); - tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g), - size); - tree newoff = copy_ssa_name (running_off, NULL); - incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, - running_off, pos); - vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi); - running_off = newoff; + if (g) + { + tree size = TYPE_SIZE_UNIT (ltype); + tree pos + = fold_build2 (MULT_EXPR, sizetype, size_int (g), size); + tree newoff = copy_ssa_name (running_off, NULL); + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, pos); + vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi); + running_off = newoff; + } } if (!slp) op = vect_get_store_rhs (next_stmt_info); - vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, - op, &vec_oprnds); + if (!costing_p) + vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op, + &vec_oprnds); + else if (!slp) + { + enum vect_def_type cdt; + gcc_assert (vect_is_simple_use (op, vinfo, &cdt)); + if (cdt == vect_constant_def || cdt == vect_external_def) + prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, + stmt_info, 0, vect_prologue); + } unsigned int group_el = 0; unsigned HOST_WIDE_INT elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); for (j = 0; j < ncopies; j++) { - vec_oprnd = vec_oprnds[j]; - /* Pun the vector to extract from if necessary. */ - if (lvectype != vectype) + if (!costing_p) { - tree tem = make_ssa_name (lvectype); - gimple *pun - = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, - lvectype, vec_oprnd)); - vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi); - vec_oprnd = tem; + vec_oprnd = vec_oprnds[j]; + /* Pun the vector to extract from if necessary. */ + if (lvectype != vectype) + { + tree tem = make_ssa_name (lvectype); + tree cvt + = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd); + gimple *pun = gimple_build_assign (tem, cvt); + vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi); + vec_oprnd = tem; + } } for (i = 0; i < nstores; i++) { + if (costing_p) + { + /* Only need vector extracting when there are more + than one stores. */ + if (nstores > 1) + inside_cost + += record_stmt_cost (cost_vec, 1, vec_to_scalar, + stmt_info, 0, vect_body); + /* Take a single lane vector type store as scalar + store to avoid ICE like 110776. */ + if (VECTOR_TYPE_P (ltype) + && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U)) + vect_get_store_cost (vinfo, stmt_info, 1, + alignment_support_scheme, + misalignment, &inside_cost, + cost_vec); + else + inside_cost + += record_stmt_cost (cost_vec, 1, scalar_store, + stmt_info, 0, vect_body); + continue; + } tree newref, newoff; gimple *incr, *assign; tree size = TYPE_SIZE (ltype); @@ -8719,6 +8747,12 @@ vectorizable_store (vec_info *vinfo, break; } + if (costing_p && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_model_store_cost: inside_cost = %d, " + "prologue_cost = %d .\n", + inside_cost, prologue_cost); + return true; }