From 5977b746db3925aaba37722f5312419d5f2968a5 Mon Sep 17 00:00:00 2001 From: Richard Biener <rguenther@suse.de> Date: Tue, 8 Oct 2024 09:01:01 +0200 Subject: [PATCH] tree-optimization/117000 - elide .REDUC_IOR with compare against zero The following adds a pattern to elide a .REDUC_IOR operation when the result is compared against zero with a cbranch. I've resorted to using can_compare_p since that's what RTL expansion eventually checks - while GIMPLE allowed whole vector equality compares for long I'll notice vector lowering won't lower unsupported ones and RTL expansion doesn't seem to try using [u]cmp<vector-mode> optabs (and neither x86 nor aarch64 implements those). There's cstore but no target implements that for vector modes either. PR tree-optimization/117000 * match.pd (.REDUC_IOR !=/== 0): New pattern. * gimple-match-head.cc: Include memmodel.h and optabs.h. * generic-match-head.cc: Likewise. * gcc.target/i386/pr117000.c: New testcase. --- gcc/generic-match-head.cc | 2 ++ gcc/gimple-match-head.cc | 2 ++ gcc/match.pd | 9 +++++++++ gcc/testsuite/gcc.target/i386/pr117000.c | 13 +++++++++++++ 4 files changed, 26 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr117000.c diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc index 42dee6266132..7d7e2a9f792d 100644 --- a/gcc/generic-match-head.cc +++ b/gcc/generic-match-head.cc @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "case-cfn-macros.h" #include "gimplify.h" +#include "memmodel.h" +#include "optabs.h" #include "optabs-tree.h" #include "dbgcnt.h" #include "tm.h" diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc index 4147a0eb38a7..b9d5f751b7cb 100644 --- a/gcc/gimple-match-head.cc +++ b/gcc/gimple-match-head.cc @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see #include "internal-fn.h" #include "case-cfn-macros.h" #include "gimplify.h" +#include "memmodel.h" +#include "optabs.h" #include "optabs-tree.h" #include "tree-eh.h" #include "dbgcnt.h" diff --git a/gcc/match.pd b/gcc/match.pd index e73bb7e2109b..755ed13e77d1 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10474,6 +10474,15 @@ and, (simplify (reduc (op @0 VECTOR_CST@1)) (op (reduc:type @0) (reduc:type @1)))) +/* Simplify .REDUC_IOR (@0) ==/!= 0 to @0 ==/!= 0. */ +(for cmp (eq ne) + (simplify + (cmp (IFN_REDUC_IOR @0) integer_zerop) + (if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0))) + && can_compare_p (cmp == EQ_EXPR ? EQ : NE, TYPE_MODE (TREE_TYPE (@0)), + ccp_jump)) + (cmp @0 { build_zero_cst (TREE_TYPE (@0)); })))) + /* Simplify vector floating point operations of alternating sub/add pairs into using an fneg of a wider element type followed by a normal add. under IEEE 754 the fneg of the wider type will negate every even entry diff --git a/gcc/testsuite/gcc.target/i386/pr117000.c b/gcc/testsuite/gcc.target/i386/pr117000.c new file mode 100644 index 000000000000..04f94344eb17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117000.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" { target sse4 } } */ + +int eq(unsigned long *x, unsigned long *y) +{ + unsigned long folded = 0; + for (int i = 0; i < 4; ++i) + folded |= x[i] ^ y[i]; + return folded == 0; +} + +/* We want to elide the .REDUC_IOR with the compare against zero. */ +/* { dg-final { scan-assembler "ptest" } } */ -- GitLab