diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e42270c40195b3f149472774c5dcaa6b83dcfa8b..eda66257b4708fdece1ffb39e5c9345204be1410 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + + * tree.h (build_index_vector): Declare. + * tree.c (build_index_vector): New function. + * tree-vect-loop.c (get_initial_defs_for_reduction): Treat the number + of units as polynomial, forcibly converting it to a constant if + vectorizable_reduction has already enforced the condition. + (vect_create_epilog_for_reduction): Likewise. Use build_index_vector + to create a {1,2,3,...} vector. + (vectorizable_reduction): Treat the number of units as polynomial. + Choose vectype_in based on the largest scalar element size rather + than the smallest number of units. Enforce the restrictions + relied on above. + 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 557522c2ee5c6349a1f86e907db5d401d17c896a..93e430c7a562f0884138039ec1e2b3abf8214719 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4150,6 +4150,8 @@ get_initial_defs_for_reduction (slp_tree slp_node, vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); scalar_type = TREE_TYPE (vector_type); + /* vectorizable_reduction has already rejected SLP reductions on + variable-length vectors. */ nunits = TYPE_VECTOR_SUBPARTS (vector_type); gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); @@ -4537,8 +4539,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) { tree indx_before_incr, indx_after_incr; - int nunits_out = TYPE_VECTOR_SUBPARTS (vectype); - int k; + poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); @@ -4554,10 +4555,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, vector size (STEP). */ /* Create a {1,2,3,...} vector. */ - tree_vector_builder vtemp (cr_index_vector_type, 1, 3); - for (k = 0; k < 3; ++k) - vtemp.quick_push (build_int_cst (cr_index_scalar_type, k + 1)); - tree series_vect = vtemp.build (); + tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); /* Create a vector of the step value. */ tree step = build_int_cst (cr_index_scalar_type, nunits_out); @@ -4935,8 +4933,11 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); - unsigned HOST_WIDE_INT v_size - = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); + /* Enforced by vectorizable_reduction, which ensures we have target + support before allowing a conditional reduction on variable-length + vectors. */ + unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant (); tree idx_val = NULL_TREE, val = NULL_TREE; for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size) { @@ -5055,6 +5056,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, { bool reduce_with_shift = have_whole_vector_shift (mode); int element_bitsize = tree_to_uhwi (bitsize); + /* Enforced by vectorizable_reduction, which disallows SLP reductions + for variable-length vectors and also requires direct target support + for loop reductions. */ int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); tree vec_temp; @@ -5743,10 +5747,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (k == 1 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) continue; - tem = get_vectype_for_scalar_type (TREE_TYPE (op)); - if (! vectype_in - || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in)) - vectype_in = tem; + if (!vectype_in + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op))))) + vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); break; } gcc_assert (vectype_in); @@ -5912,7 +5916,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, /* To properly compute ncopies we are interested in the widest input type in case we're looking at a widening accumulation. */ if (!vectype_in - || TYPE_VECTOR_SUBPARTS (vectype_in) > TYPE_VECTOR_SUBPARTS (tem)) + || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) + < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))))) vectype_in = tem; } @@ -6097,6 +6102,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, gcc_assert (ncopies >= 1); vec_mode = TYPE_MODE (vectype_in); + poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); if (code == COND_EXPR) { @@ -6278,14 +6284,23 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, int scalar_precision = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); cr_index_scalar_type = make_unsigned_type (scalar_precision); - cr_index_vector_type = build_vector_type - (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out)); + cr_index_vector_type = build_vector_type (cr_index_scalar_type, + nunits_out); if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type, OPTIMIZE_FOR_SPEED)) reduc_fn = IFN_REDUC_MAX; } + if (reduc_fn == IFN_LAST && !nunits_out.is_constant ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "missing target support for reduction on" + " variable-length vectors.\n"); + return false; + } + if ((double_reduc || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION) && ncopies > 1) @@ -6297,6 +6312,27 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, return false; } + if (double_reduc && !nunits_out.is_constant ()) + { + /* The current double-reduction code creates the initial value + element-by-element. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "double reduction not supported for variable-length" + " vectors.\n"); + return false; + } + + if (slp_node && !nunits_out.is_constant ()) + { + /* The current SLP code creates the initial value element-by-element. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "SLP reduction not supported for variable-length" + " vectors.\n"); + return false; + } + /* In case of widenning multiplication by a constant, we update the type of the constant to be the type of the other operand. We check that the constant fits the type in the pattern recognition pass. */ diff --git a/gcc/tree.c b/gcc/tree.c index f13e2d8227dc8ef75cb6a71bf4bfc8f6aa161945..93c4654309d597c57c8817a3fcd7d92b8e4ef5d4 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -1930,6 +1930,29 @@ build_vec_series (tree type, tree base, tree step) return build2 (VEC_SERIES_EXPR, type, base, step); } +/* Return a vector with the same number of units and number of bits + as VEC_TYPE, but in which the elements are a linear series of unsigned + integers { BASE, BASE + STEP, BASE + STEP * 2, ... }. */ + +tree +build_index_vector (tree vec_type, poly_uint64 base, poly_uint64 step) +{ + tree index_vec_type = vec_type; + tree index_elt_type = TREE_TYPE (vec_type); + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vec_type); + if (!INTEGRAL_TYPE_P (index_elt_type) || !TYPE_UNSIGNED (index_elt_type)) + { + index_elt_type = build_nonstandard_integer_type + (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (index_elt_type)), true); + index_vec_type = build_vector_type (index_elt_type, nunits); + } + + tree_vector_builder v (index_vec_type, 1, 3); + for (unsigned int i = 0; i < 3; ++i) + v.quick_push (build_int_cstu (index_elt_type, base + i * step)); + return v.build (); +} + /* Something has messed with the elements of CONSTRUCTOR C after it was built; calculate TREE_CONSTANT and TREE_SIDE_EFFECTS. */ diff --git a/gcc/tree.h b/gcc/tree.h index f523a0850ca48a1c404b9cdc07504ecb05bccaec..a45c2cd5aa681ce87fd23a6257a7bcd2484003dd 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4062,6 +4062,7 @@ extern tree make_vector (unsigned, unsigned CXX_MEM_STAT_INFO); extern tree build_vector_from_ctor (tree, vec<constructor_elt, va_gc> *); extern tree build_vector_from_val (tree, tree); extern tree build_vec_series (tree, tree, tree); +extern tree build_index_vector (tree, poly_uint64, poly_uint64); extern void recompute_constructor_flags (tree); extern void verify_constructor_flags (tree); extern tree build_constructor (tree, vec<constructor_elt, va_gc> *);