diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c index 98ba75224710a4f62f66b9eee63aaed7712baa54..2c4fa3f599150cb5639924cd63ab27c47eedb627 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c @@ -44,4 +44,4 @@ int main (int argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! riscv_v } } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c index e3bbf5c0bf8db8cb258d8d05591c246d80c5e755..5f6640d9ab65a4b126e989e784d0d7a95745b732 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c @@ -58,4 +58,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c index a1f6ba458a97f4bf77466bb30fb5bd76537c5e5c..4c23b80833326a7d5242299ec32e959aa8095c04 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c @@ -8,6 +8,7 @@ f1 (int *restrict y, int *restrict x1, int *restrict x2, { for (int i = 0; i < N; ++i) { + /* Different base. */ y[i * 2] = x1[indices[i * 2]] + 1; y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2; } @@ -18,8 +19,9 @@ f2 (int *restrict y, int *restrict x, int *restrict indices) { for (int i = 0; i < N; ++i) { - y[i * 2] = x[indices[i * 2]] + 1; - y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2; + /* Different scale. */ + y[i * 2] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2] * 4) + 1; + y[i * 2 + 1] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2 + 1] * 2) + 2; } } @@ -28,9 +30,12 @@ f3 (int *restrict y, int *restrict x, int *restrict indices) { for (int i = 0; i < N; ++i) { + /* Different type. */ y[i * 2] = x[indices[i * 2]] + 1; - y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2; + y[i * 2 + 1] = x[((unsigned int *) indices)[i * 2 + 1]] + 2; } } -/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */ +/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */ +/* { dg-final { scan-tree-dump "different gather base" vect { target { ! vect_gather_load_ifn } } } } */ +/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! vect_gather_load_ifn } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c index adfef3bf407fb46ef7a2ad01c495e44456b37b7b..30ba6789e0382d8c8e0be9da3cb64931dff6cdb4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c @@ -62,4 +62,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { { vect_gather_load_ifn || avx2 } && vect_masked_load } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c index ee2e4e4999a8b8b826081eb166f206ecd88abda5..1ce63e6919995d1242cc4d1e9dd94bd96f74bd43 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c @@ -39,10 +39,10 @@ f3 (int *restrict y, int *restrict x, int *restrict indices) y[i * 2] = (indices[i * 2] < N * 2 ? x[indices[i * 2]] + 1 : 1); - y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2 - ? x[(unsigned int) indices[i * 2 + 1]] + 2 + y[i * 2 + 1] = (((unsigned int *)indices)[i * 2 + 1] < N * 2 + ? x[((unsigned int *) indices)[i * 2 + 1]] + 2 : 2); } } -/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */ +/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ebab1953b9c09fa4467b3b386349977e1e999472..8877ebde2466cbddd4446852f83d8a5765c70fd2 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11362,8 +11362,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) updated offset we set using ADVANCE. Instead we have to make sure the reference in the data references point to the corresponding copy of the original in the epilogue. */ - if (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo)) - == VMAT_GATHER_SCATTER) + if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo))) { DR_REF (dr) = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE, @@ -11372,6 +11371,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE, &find_in_mapping, &mapping); } + else + gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo)) + != VMAT_GATHER_SCATTER); DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo); stmt_vinfo->dr_aux.stmt = stmt_vinfo; /* The vector size of the epilogue is smaller than that of the main loop diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index d081999a763ced2ca29131284287bba99098a5ce..8efff2e912d7c1960a5251088020c05d593bfbd0 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -283,10 +283,11 @@ typedef struct _slp_oprnd_info vec<tree> ops; /* Information about the first statement, its vector def-type, type, the operand itself in case it's constant, and an indication if it's a pattern - stmt. */ + stmt and gather/scatter info. */ tree first_op_type; enum vect_def_type first_dt; bool any_pattern; + gather_scatter_info first_gs_info; } *slp_oprnd_info; @@ -609,6 +610,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, unsigned int i, number_of_oprnds; enum vect_def_type dt = vect_uninitialized_def; slp_oprnd_info oprnd_info; + gather_scatter_info gs_info; unsigned int commutative_op = -1U; bool first = stmt_num == 0; @@ -660,6 +662,19 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, oprnd_info = (*oprnds_info)[i]; + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + gcc_assert (number_of_oprnds == 1); + if (!is_a <loop_vec_info> (vinfo) + || !vect_check_gather_scatter (stmt_info, + as_a <loop_vec_info> (vinfo), + first ? &oprnd_info->first_gs_info + : &gs_info)) + return -1; + + oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset; + } + stmt_vec_info def_stmt_info; if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info)) { @@ -792,6 +807,25 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, return 1; } + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + if (!operand_equal_p (oprnd_info->first_gs_info.base, + gs_info.base)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: different gather base\n"); + return 1; + } + if (oprnd_info->first_gs_info.scale != gs_info.scale) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: different gather scale\n"); + return 1; + } + } + /* Not first stmt of the group, check that the def-stmt/s match the def-stmt/s of the first stmt. Allow different definition types for reduction chains: the first stmt must be a @@ -1235,6 +1269,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, || rhs_code == INDIRECT_REF || rhs_code == COMPONENT_REF || rhs_code == MEM_REF))) + || (ldst_p + && (STMT_VINFO_GATHER_SCATTER_P (stmt_info) + != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info))) || first_stmt_ldst_p != ldst_p || first_stmt_phi_p != phi_p) { @@ -1357,12 +1394,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)) && rhs_code != CFN_GATHER_LOAD && rhs_code != CFN_MASK_GATHER_LOAD + && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) /* Not grouped loads are handled as externals for BB vectorization. For loop vectorization we can handle splats the same we handle single element interleaving. */ && (is_a <bb_vec_info> (vinfo) - || stmt_info != first_stmt_info - || STMT_VINFO_GATHER_SCATTER_P (stmt_info))) + || stmt_info != first_stmt_info)) { /* Not grouped load. */ if (dump_enabled_p ()) @@ -1858,6 +1895,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD) || gimple_call_internal_p (stmt, IFN_GATHER_LOAD) || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)); + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); else { *max_nunits = this_max_nunits;