From ad519f46194a7ab1671470a236c67ae17cb98ead Mon Sep 17 00:00:00 2001
From: Jovan Vukic <Jovan.Vukic@rt-rk.com>
Date: Sat, 14 Dec 2024 14:47:35 -0700
Subject: [PATCH] [PATCH v3] match.pd: Add pattern to simplify `(a - 1) & -a`
 to `0`

Thank you for the feedback. I have made the minor changes that were requested.
Additionally, I extracted the repetitive code into a reusable helper function,
match_plus_neg_pattern, making the code much more readable. Furthermore, the
logic, code, and tests remain the same as in version 2 of the patch.

gcc/ChangeLog:

	* match.pd: New pattern.
	* simplify-rtx.cc (match_plus_neg_pattern): New helper function.
	(simplify_context::simplify_binary_operation_1): New
	code to handle (a - 1) & -a, (a - 1) | -a and (a - 1) ^ -a.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/bitops-11.c: New test.
---
 gcc/match.pd                              |  16 +++
 gcc/simplify-rtx.cc                       |  41 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c | 117 ++++++++++++++++++++++
 3 files changed, 174 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 4587658aa0a4..c43e51c96b7c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1472,6 +1472,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (bit_and:c @0 (bit_not (bit_xor:c @0 @1)))
  (bit_and @0 @1))
 
+/* Transform:
+   (a - 1) & -a ->  0.
+   (a - 1) | -a -> -1.
+   (a - 1) ^ -a -> -1.  */
+(for bit_op (bit_ior bit_xor bit_and)
+ (simplify
+  (bit_op:c (plus @0 integer_minus_onep) (negate @0))
+  (if (bit_op == BIT_AND_EXPR)
+    { build_zero_cst (type); }
+    { build_minus_one_cst (type); }))
+ (simplify
+  (bit_op:c (minus @0 integer_onep) (negate @0))
+  (if (bit_op == BIT_AND_EXPR)
+    { build_zero_cst (type); }
+    { build_minus_one_cst (type); })))
+
 /* a & (a == b)  -->  a & b (boolean version of the above). */
 (simplify
  (bit_and:c @0 (nop_convert? (eq:c @0 @1)))
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 86b3f3319285..223a00959786 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -2941,6 +2941,35 @@ simplify_rotate_op (rtx op0, rtx op1, machine_mode mode)
   return NULL_RTX;
 }
 
+/* Returns true if OP0 and OP1 match the pattern (OP (plus (A - 1)) (neg A)),
+   and the pattern can be simplified (there are no side effects).  */
+
+static bool
+match_plus_neg_pattern (rtx op0, rtx op1, machine_mode mode)
+{
+  /* Remove SUBREG from OP0 and OP1, if needed.  */
+  if (GET_CODE (op0) == SUBREG
+      && GET_CODE (op1) == SUBREG
+      && subreg_lowpart_p (op0)
+      && subreg_lowpart_p (op1))
+    {
+      op0 = XEXP (op0, 0);
+      op1 = XEXP (op1, 0);
+    }
+
+  /* Check for the pattern (OP (plus (A - 1)) (neg A)).  */
+  if (((GET_CODE (op1) == NEG
+	&& GET_CODE (op0) == PLUS
+	&& XEXP (op0, 1) == CONSTM1_RTX (mode))
+       || (GET_CODE (op0) == NEG
+	   && GET_CODE (op1) == PLUS
+	   && XEXP (op1, 1) == CONSTM1_RTX (mode)))
+      && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0))
+      && !side_effects_p (XEXP (op0, 0)))
+    return true;
+  return false;
+}
+
 /* Subroutine of simplify_binary_operation.  Simplify a binary operation
    CODE with result mode MODE, operating on OP0 and OP1.  If OP0 and/or
    OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
@@ -3553,6 +3582,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
 	  && GET_MODE_CLASS (mode) != MODE_CC)
 	return CONSTM1_RTX (mode);
 
+      /* Convert (ior (plus (A - 1)) (neg A)) to -1.  */
+      if (match_plus_neg_pattern (op0, op1, mode))
+	return CONSTM1_RTX (mode);
+
       /* (ior A C) is C if all bits of A that might be nonzero are on in C.  */
       if (CONST_INT_P (op1)
 	  && HWI_COMPUTABLE_MODE_P (mode)
@@ -3714,6 +3747,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
 	      & nonzero_bits (op1, mode)) == 0)
 	return (simplify_gen_binary (IOR, mode, op0, op1));
 
+      /* Convert (xor (plus (A - 1)) (neg A)) to -1.  */
+      if (match_plus_neg_pattern (op0, op1, mode))
+	return CONSTM1_RTX (mode);
+
       /* Convert (XOR (NOT x) (NOT y)) to (XOR x y).
 	 Also convert (XOR (NOT x) y) to (NOT (XOR x y)), similarly for
 	 (NOT y).  */
@@ -3981,6 +4018,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
 	  && GET_MODE_CLASS (mode) != MODE_CC)
 	return CONST0_RTX (mode);
 
+      /* Convert (and (plus (A - 1)) (neg A)) to 0.  */
+      if (match_plus_neg_pattern (op0, op1, mode))
+	return CONST0_RTX (mode);
+
       /* Transform (and (extend X) C) into (zero_extend (and X C)) if
 	 there are no nonzero bits of C outside of X's mode.  */
       if ((GET_CODE (op0) == SIGN_EXTEND
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c
new file mode 100644
index 000000000000..63f30889a7c5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c
@@ -0,0 +1,117 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw -Wno-psabi" } */
+
+typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
+
+/* Generic */
+#define BIT_OPERATIONS(result_type, operand_type, suffix) \
+result_type foo_generic_1##suffix(operand_type a) {       \
+    return (a - 1) & -a;                                  \
+}                                                         \
+result_type foo_generic_2##suffix(operand_type a) {       \
+    return (a - 1) | -a;                                  \
+}                                                         \
+result_type foo_generic_3##suffix(operand_type a) {       \
+    return (a - 1) ^ -a;                                  \
+}
+
+BIT_OPERATIONS(signed char, signed char, 11)
+BIT_OPERATIONS(unsigned char, unsigned char, 12)
+
+BIT_OPERATIONS(signed short, signed short, 21)
+BIT_OPERATIONS(unsigned short, unsigned short, 22)
+
+BIT_OPERATIONS(signed int, signed int, 31)
+BIT_OPERATIONS(unsigned int, unsigned int, 32)
+
+BIT_OPERATIONS(signed long, signed long, 41)
+BIT_OPERATIONS(unsigned long, unsigned long, 42)
+
+/* Gimple */
+int
+foo_gimple_1(int a)
+{
+  int t1 = a - 1; 
+  int t2 = -a;
+  int t3 = t1 & t2;
+  return t3;
+}
+
+short
+foo_gimple_2(short a)
+{
+  short t1 = a - 1; 
+  short t2 = -a;
+  short t3 = t1 | t2;
+  return t3;
+}
+
+unsigned long
+foo_gimple_3(unsigned long a)
+{
+  unsigned long t1 = a - 1;
+  unsigned long t2 = -a;
+  unsigned long t3 = t1 ^ t2;
+  return t3;
+}
+
+int
+foo_gimple_4(int a, unsigned char b)
+{
+  /* The return expression should simplify to b + 7.  */
+  int t1 = b;
+  t1 |= (a - 1) | -a;
+  t1 |= b & (a >> 3);
+
+  int t2 = b + 7;
+  t2 &= ~((b - 1) & -b);
+  t2 &= (a - 1) ^ -a;
+
+  int t3 = t1 & t2;
+  return t3;
+}
+
+/* Vectors */
+v4si
+foo_vector_1(v4si a)
+{
+  return (a - (v4si) {1, 1, 1, 1}) & -a;
+}
+
+v4si
+foo_vector_2(v4si a)
+{
+  v4si t0 = (v4si) {1, 1, 1, 1};
+  v4si t1 = a - t0;
+  v4si t2 = -a;
+  v4si t3 = t1 | t2;
+  return t3;
+}
+
+v4si
+foo_vector_3(v4si a)
+{
+  v4si t0 = (v4si) {1, 1, 1, 1};
+  v4si t1 = a - t0;
+  v4si t2 = -a;
+  v4si t3 = t1 ^ t2;
+  return t3;
+}
+
+v4si
+foo_vector_4(v4si a)
+{
+  v4si t0 = (v4si) {1, 1, 1, 1};
+  v4si t1 = (a - t0) & -a;
+  v4si t2 = (a - t0) | -a;
+  v4si t3 = (a - t0) ^ -a;
+  v4si t4 = t1 - t2 + t3;
+  return t4;
+}
+
+/* { dg-final { scan-tree-dump-not   "bit_and_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "bit_ior_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "bit_xor_expr, "   "optimized" } } */
+/* { dg-final { scan-tree-dump-not   "negate_expr, "    "optimized" } } */
+/* { dg-final { scan-tree-dump-times "plus_expr, " 1 "optimized" } } */
+
-- 
GitLab