From c41492423140e1573df68d1c98e825ae7593741f Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Wed, 11 Oct 2023 08:08:04 +0100
Subject: [PATCH] Optimize (ne:SI (subreg:QI (ashift:SI x 7) 0) 0) as (and:SI x
 1).

This patch is the middle-end piece of an improvement to PRs 101955 and
106245, that adds a missing simplification to the RTL optimizers.
This transformation is to simplify (char)(x << 7) != 0 as x & 1.
Technically, the cast can be any truncation, where shift is by one
less than the narrower type's precision, setting the most significant
(only) bit from the least significant bit.

This transformation applies to any target, but it's easy to see
(and add a new test case) on x86, where the following function:

int f(int a) { return (a << 31) >> 31; }

currently gets compiled with -O2 to:

foo:    movl    %edi, %eax
        sall    $7, %eax
        sarb    $7, %al
        movsbl  %al, %eax
        ret

but with this patch, we now generate the slightly simpler.

foo:    movl    %edi, %eax
        sall    $31, %eax
        sarl    $31, %eax
        ret

2023-10-11  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	PR middle-end/101955
	PR tree-optimization/106245
	* simplify-rtx.cc (simplify_relational_operation_1): Simplify
	the RTL (ne:SI (subreg:QI (ashift:SI x 7) 0) 0) to (and:SI x 1).

gcc/testsuite/ChangeLog
	* gcc.target/i386/pr106245-1.c: New test case.
---
 gcc/simplify-rtx.cc                        | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr106245-1.c | 10 ++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106245-1.c

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index bd9443dbcc28..69d87579d9c8 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -6109,6 +6109,23 @@ simplify_context::simplify_relational_operation_1 (rtx_code code,
 	break;
       }
 
+  /* (ne:SI (subreg:QI (ashift:SI x 7) 0) 0) -> (and:SI x 1).  */
+  if (code == NE
+      && op1 == const0_rtx
+      && (op0code == TRUNCATE
+	  || (partial_subreg_p (op0)
+	      && subreg_lowpart_p (op0)))
+      && SCALAR_INT_MODE_P (mode)
+      && STORE_FLAG_VALUE == 1)
+    {
+      rtx tmp = XEXP (op0, 0);
+      if (GET_CODE (tmp) == ASHIFT
+	  && GET_MODE (tmp) == mode
+	  && CONST_INT_P (XEXP (tmp, 1))
+	  && is_int_mode (GET_MODE (op0), &int_mode)
+	  && INTVAL (XEXP (tmp, 1)) == GET_MODE_PRECISION (int_mode) - 1)
+	return simplify_gen_binary (AND, mode, XEXP (tmp, 0), const1_rtx);
+    }
   return NULL_RTX;
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/pr106245-1.c b/gcc/testsuite/gcc.target/i386/pr106245-1.c
new file mode 100644
index 000000000000..a0403e931b34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106245-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int f(int a)
+{
+    return (a << 31) >> 31;
+}
+
+/* { dg-final { scan-assembler-not "sarb" } } */
+/* { dg-final { scan-assembler-not "movsbl" } } */
-- 
GitLab