diff --git a/gcc/testsuite/gcc.dg/vect/slp-53.c b/gcc/testsuite/gcc.dg/vect/slp-53.c new file mode 100644 index 0000000000000000000000000000000000000000..d8cd5f85b3c48b876261ea1c4725a3457ce70173 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-53.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ + +void foo (int * __restrict x, int *y) +{ + x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); + y = __builtin_assume_aligned (y, __BIGGEST_ALIGNMENT__); + for (int i = 0; i < 1024; ++i) + { + x[3*i+0] = y[2*i+0] * 7 + 5; + x[3*i+1] = y[2*i+1] * 2; + x[3*i+2] = y[2*i+0] + 3; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_int && vect_int_mult } xfail vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-54.c b/gcc/testsuite/gcc.dg/vect/slp-54.c new file mode 100644 index 0000000000000000000000000000000000000000..ab66b349d1f405b20444858e4d2ba230a8ee197f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-54.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +void foo (int * __restrict x, int *y) +{ + x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); + y = __builtin_assume_aligned (y, __BIGGEST_ALIGNMENT__); + for (int i = 0; i < 1024; ++i) + { + x[6*i+0] = y[4*i+0] * 7 + 5; + x[6*i+1] = y[4*i+1] * 2; + x[6*i+2] = y[4*i+2] + 3; + x[6*i+3] = y[4*i+3] * 7 + 5; + x[6*i+4] = y[4*i+0] * 2; + x[6*i+5] = y[4*i+3] + 3; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_int && vect_int_mult } xfail riscv*-*-* } } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index a8bb08ea7bed9541000c959d0b50e5d5716fa142..d0a8531fd3b3f30b4dc10f341d089f25d0d4267b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3697,6 +3697,69 @@ vect_build_slp_instance (vec_info *vinfo, most two vector inputs to produce a single vector output. */ while (SLP_TREE_CHILDREN (perm).length () > 2) { + /* When we have three equal sized groups left the pairwise + reduction does not result in a scheme that avoids using + three vectors. Instead merge the first two groups + to the final size with do-not-care elements (chosen + from the first group) and then merge with the third. + { A0, B0, x, A1, B1, x, ... } + -> { A0, B0, C0, A1, B1, C1, ... } + This handles group size of three (and at least + power-of-two multiples of that). */ + if (SLP_TREE_CHILDREN (perm).length () == 3 + && (SLP_TREE_LANES (SLP_TREE_CHILDREN (perm)[0]) + == SLP_TREE_LANES (SLP_TREE_CHILDREN (perm)[1])) + && (SLP_TREE_LANES (SLP_TREE_CHILDREN (perm)[0]) + == SLP_TREE_LANES (SLP_TREE_CHILDREN (perm)[2]))) + { + int ai = 0; + int bi = 1; + slp_tree a = SLP_TREE_CHILDREN (perm)[ai]; + slp_tree b = SLP_TREE_CHILDREN (perm)[bi]; + unsigned n = SLP_TREE_LANES (perm); + + slp_tree permab + = vect_create_new_slp_node (2, VEC_PERM_EXPR); + SLP_TREE_LANES (permab) = n; + SLP_TREE_LANE_PERMUTATION (permab).create (n); + SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm); + /* ??? Should be NULL but that's not expected. */ + SLP_TREE_REPRESENTATIVE (permab) + = SLP_TREE_REPRESENTATIVE (perm); + SLP_TREE_CHILDREN (permab).quick_push (a); + for (unsigned k = 0; k < SLP_TREE_LANES (a); ++k) + SLP_TREE_LANE_PERMUTATION (permab) + .quick_push (std::make_pair (0, k)); + SLP_TREE_CHILDREN (permab).quick_push (b); + for (unsigned k = 0; k < SLP_TREE_LANES (b); ++k) + SLP_TREE_LANE_PERMUTATION (permab) + .quick_push (std::make_pair (1, k)); + /* Push the do-not-care lanes. */ + for (unsigned k = 0; k < SLP_TREE_LANES (a); ++k) + SLP_TREE_LANE_PERMUTATION (permab) + .quick_push (std::make_pair (0, k)); + + /* Put the merged node into 'perm', in place of a. */ + SLP_TREE_CHILDREN (perm)[ai] = permab; + /* Adjust the references to b in the permutation + of perm and to the later children which we'll + remove. */ + for (unsigned k = 0; k < SLP_TREE_LANES (perm); ++k) + { + std::pair<unsigned, unsigned> &p + = SLP_TREE_LANE_PERMUTATION (perm)[k]; + if (p.first == (unsigned) bi) + { + p.first = ai; + p.second += SLP_TREE_LANES (a); + } + else if (p.first > (unsigned) bi) + p.first--; + } + SLP_TREE_CHILDREN (perm).ordered_remove (bi); + break; + } + /* Pick the two nodes with the least number of lanes, prefer the earliest candidate and maintain ai < bi. */ int ai = -1; @@ -3733,7 +3796,7 @@ vect_build_slp_instance (vec_info *vinfo, SLP_TREE_LANES (permab) = n; SLP_TREE_LANE_PERMUTATION (permab).create (n); SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm); - /* ??? We should set this NULL but that's not expected. */ + /* ??? Should be NULL but that's not expected. */ SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm); SLP_TREE_CHILDREN (permab).quick_push (a);