From 5977b746db3925aaba37722f5312419d5f2968a5 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Tue, 8 Oct 2024 09:01:01 +0200
Subject: [PATCH] tree-optimization/117000 - elide .REDUC_IOR with compare
 against zero

The following adds a pattern to elide a .REDUC_IOR operation when
the result is compared against zero with a cbranch.  I've resorted
to using can_compare_p since that's what RTL expansion eventually
checks - while GIMPLE allowed whole vector equality compares for long
I'll notice vector lowering won't lower unsupported ones and RTL
expansion doesn't seem to try using [u]cmp<vector-mode> optabs
(and neither x86 nor aarch64 implements those).  There's cstore
but no target implements that for vector modes either.

	PR tree-optimization/117000
	* match.pd (.REDUC_IOR !=/== 0): New pattern.
	* gimple-match-head.cc: Include memmodel.h and optabs.h.
	* generic-match-head.cc: Likewise.

	* gcc.target/i386/pr117000.c: New testcase.
---
 gcc/generic-match-head.cc                |  2 ++
 gcc/gimple-match-head.cc                 |  2 ++
 gcc/match.pd                             |  9 +++++++++
 gcc/testsuite/gcc.target/i386/pr117000.c | 13 +++++++++++++
 4 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117000.c

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 42dee6266132..7d7e2a9f792d 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "dbgcnt.h"
 #include "tm.h"
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 4147a0eb38a7..b9d5f751b7cb 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "tree-eh.h"
 #include "dbgcnt.h"
diff --git a/gcc/match.pd b/gcc/match.pd
index e73bb7e2109b..755ed13e77d1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10474,6 +10474,15 @@ and,
   (simplify (reduc (op @0 VECTOR_CST@1))
     (op (reduc:type @0) (reduc:type @1))))
 
+/* Simplify .REDUC_IOR (@0) ==/!= 0 to @0 ==/!= 0.  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (IFN_REDUC_IOR @0) integer_zerop)
+  (if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
+       && can_compare_p (cmp == EQ_EXPR ? EQ : NE, TYPE_MODE (TREE_TYPE (@0)),
+			 ccp_jump))
+   (cmp @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+
 /* Simplify vector floating point operations of alternating sub/add pairs
    into using an fneg of a wider element type followed by a normal add.
    under IEEE 754 the fneg of the wider type will negate every even entry
diff --git a/gcc/testsuite/gcc.target/i386/pr117000.c b/gcc/testsuite/gcc.target/i386/pr117000.c
new file mode 100644
index 000000000000..04f94344eb17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117000.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" { target sse4 } } */
+
+int eq(unsigned long *x, unsigned long *y)
+{
+    unsigned long folded = 0;
+    for (int i = 0; i < 4; ++i)
+      folded |= x[i] ^ y[i];
+    return folded == 0;
+}
+
+/* We want to elide the .REDUC_IOR with the compare against zero.  */
+/* { dg-final { scan-assembler "ptest" } } */
-- 
GitLab