From 172a72da368146e0fe34194020eb7a6636db4438 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek <jakub@redhat.com> Date: Fri, 17 Nov 2023 15:09:44 +0100 Subject: [PATCH] vect: Fix check_reduction_path [PR112374] As mentioned in the PR, the intent of the r14-5076 changes was that it doesn't count one of the uses on the use_stmt, but what actually got implemented is that it does this processing on any op_use_stmt, even if it is not the use_stmt statement, which means that it can increase count even on debug stmts (-fcompare-debug failures), or if there would be some other use stmt with 2+ uses it could count that as a single use. Though, because it fails whenever cnt != 1 and I believe use_stmt must be one of the uses, it would probably fail in the latter case anyway. The following patch fixes that by doing this extra processing only when op_use_stmt is use_stmt, and using the normal processing otherwise (so ignore debug stmts, and increase on any uses on the stmt). 2023-11-17 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/112374 * tree-vect-loop.cc (check_reduction_path): Perform the cond_fn_p special case only if op_use_stmt == use_stmt, use as_a rather than dyn_cast in that case. * gcc.dg/pr112374-1.c: New test. * gcc.dg/pr112374-2.c: New test. * g++.dg/opt/pr112374.C: New test. --- gcc/testsuite/g++.dg/opt/pr112374.C | 24 +++++++++++++++++++++ gcc/testsuite/gcc.dg/pr112374-1.c | 20 +++++++++++++++++ gcc/testsuite/gcc.dg/pr112374-2.c | 33 +++++++++++++++++++++++++++++ gcc/tree-vect-loop.cc | 4 ++-- 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/opt/pr112374.C create mode 100644 gcc/testsuite/gcc.dg/pr112374-1.c create mode 100644 gcc/testsuite/gcc.dg/pr112374-2.c diff --git a/gcc/testsuite/g++.dg/opt/pr112374.C b/gcc/testsuite/g++.dg/opt/pr112374.C new file mode 100644 index 000000000000..15d8a067754e --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr112374.C @@ -0,0 +1,24 @@ +// PR tree-optimization/112374 +// { dg-do compile { target c++11 } } +// { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } +// { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } +// { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } + +struct t +{ + long coef[1]; + t(const unsigned long &a) : coef{(long)a} {}; + t(const t &a); +}; +extern void gen_int_mode(t, int); +struct expand_vec_perm_d { + unsigned char perm[64]; + int vmode; + unsigned char nelt; +}; +void expand_vec_perm_blend(struct expand_vec_perm_d *d) { + unsigned long mask = 0; + for (unsigned i = 0; i < 4; ++i) + mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); + gen_int_mode(mask, 0); +} diff --git a/gcc/testsuite/gcc.dg/pr112374-1.c b/gcc/testsuite/gcc.dg/pr112374-1.c new file mode 100644 index 000000000000..7fbd67a12883 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112374-1.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/112374 */ +/* { dg-do compile } */ +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2 -w" } */ +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */ + +void foo (int, int); +struct S { char s[4]; }; +int a, b, c; + +void +bar () +{ + struct S d; + long e = 0; + for (c = 0; c < 4; ++c) + e |= (d.s[c] ? 3 : 0) << c; + if (e) + foo (a, b); +} diff --git a/gcc/testsuite/gcc.dg/pr112374-2.c b/gcc/testsuite/gcc.dg/pr112374-2.c new file mode 100644 index 000000000000..1598c49fc549 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112374-2.c @@ -0,0 +1,33 @@ +/* PR tree-optimization/112374 */ +/* { dg-do compile } */ +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } */ +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */ + +void foo (int, int); +struct S { char s[64]; } *p; +char a, b; +unsigned char c; +int d, e; + +void +bar (void) +{ + unsigned i; + long j = 0; + for (i = 0; i < b; ++i) + j |= (p->s[i] ? 3 : 0) << i; + if (p->s[i + 1]) + lab: + for (;;) + ; + for (i = 0; i < 4; ++i) + j |= p->s[i] << i; + for (; i; i += 2) + if (c + 1 != a) + goto lab; + for (; i < 8; ++i) + j |= p->s[i] >= 6; + if (j) + foo (d, e); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3f59139cb013..e8b8be5b7e1c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4118,9 +4118,9 @@ pop: /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have op1 twice (once as definition, once as else) in the same operation. Allow this. */ - if (cond_fn_p) + if (cond_fn_p && op_use_stmt == use_stmt) { - gcall *call = dyn_cast<gcall *> (use_stmt); + gcall *call = as_a<gcall *> (use_stmt); unsigned else_pos = internal_fn_else_index (internal_fn (op.code)); -- GitLab