From 1dfc50232dcb703454db4f54c538042a32be2138 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 17 Apr 2020 16:56:12 +0200
Subject: [PATCH] i386: Fix up *testqi_ext_3 define_insn_and_split [PR94567]

As the testcase shows, there are unfortunately more problematic cases
in *testqi_ext_3 if the mode is not CCZmode, because the sign flag might
not behave the same between the insn with zero_extract and what we split it
into.

The previous fix to the insn condition was because *testdi_1 for mask with
upper 32-bits clear and bit 31 set is implemented using SImode test and thus
SF is set depending on that bit 31 rather than on always cleared.

But we can have other cases.  On the zero_extract (which has <MODE>mode),
we can have either the pos + len == precision of <MODE>mode, or
pos + len < precision of <MODE>mode cases.  The former one copies the most
significant bit into SF, the latter will have SF always cleared.

For the former case, either it is a zero_extract from a larger mode, but
then when we perform test in that larger mode, SF will be always clear and
thus mismatch from the zero_extract case (so we need to enforce CCZmode),
or it will be a zero_extract from same mode with pos 0 and len equal to
mode precision, such zero_extracts should have been really simplified
into their first operand.

For the latter case, when SF is always clear on the define_insn with
zero_extract, we need to split into something that doesn't sometimes set
SF, i.e. it has to be a test with mask that doesn't have the most
significant bit set.  In some cases it can be achieved through using test
in a wider mode (e.g. in the testcase, there is
(zero_extract:SI (reg:HI) (const_int 13) (const_int 3))
which will always set SF to 0, but we split it into
(and:HI (reg:HI) (const_int -8))
which will copy the MSB of (reg:HI) into SF, but we can do:
(and:SI (subreg:SI (reg:HI) 0) (const_int 0xfff8))
which will keep SF always cleared), but there are various cases where we
can't (when already using DImode, or when SImode and we'd turned it into
the problematic *testdi_1 implemented using SImode test, or when
the val operand is a MEM (we don't want to read from memory more than
the user originally wanted), paradoxical subreg of MEM could be problematic
too if we through the narrowing end up with a MEM).

So, the patch attempts to require CCZmode (and not CCNOmode) if it can't
really ensure the SF will have same meaning between the define_insn and what
we split it into, and if we decide we allow CCNOmode, it needs to avoid
performing narrowing and/or widen if pos + len would indicate we'd have MSB
set in the mask.

2020-04-17  Jakub Jelinek  <jakub@redhat.com>
	    Jeff Law  <law@redhat.com>

	PR target/94567
	* config/i386/i386.md (*testqi_ext_3): Use CCZmode rather than
	CCNOmode in ix86_match_ccmode if len is equal to <MODE>mode precision,
	or pos + len >= 32, or pos + len is equal to operands[2] precision
	and operands[2] is not a register operand.  During splitting perform
	SImode AND if operands[0] doesn't have CCZmode and pos + len is
	equal to mode precision.

	* gcc.c-torture/execute/pr94567.c: New test.

Co-Authored-By: Jeff Law <law@redhat.com>
---
 gcc/ChangeLog                                 | 11 ++++
 gcc/config/i386/i386.md                       | 63 ++++++++++++++++---
 gcc/testsuite/ChangeLog                       |  6 ++
 gcc/testsuite/gcc.c-torture/execute/pr94567.c | 26 ++++++++
 4 files changed, 99 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr94567.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d2aa482b2e08..1c010560c19f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2020-04-17  Jakub Jelinek  <jakub@redhat.com>
+	    Jeff Law  <law@redhat.com>
+
+	PR target/94567
+	* config/i386/i386.md (*testqi_ext_3): Use CCZmode rather than
+	CCNOmode in ix86_match_ccmode if len is equal to <MODE>mode precision,
+	or pos + len >= 32, or pos + len is equal to operands[2] precision
+	and operands[2] is not a register operand.  During splitting perform
+	SImode AND if operands[0] doesn't have CCZmode and pos + len is
+	equal to mode precision.
+
 2020-04-17  Richard Biener  <rguenther@suse.de>
 
 	PR other/94629
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3051624d89fd..b426c21d3ddd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8730,10 +8730,38 @@
 	   && INTVAL (operands[3]) > 32
 	   && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
    && ix86_match_ccmode (insn,
-			 /* *testdi_1 requires CCZmode if the mask has bit
+			 /* If zero_extract mode precision is the same
+			    as len, the SF of the zero_extract
+			    comparison will be the most significant
+			    extracted bit, but this could be matched
+			    after splitting only for pos 0 len all bits
+			    trivial extractions.  Require CCZmode.  */
+			 (GET_MODE_PRECISION (<MODE>mode)
+			  == INTVAL (operands[3]))
+			 /* Otherwise, require CCZmode if we'd use a mask
+			    with the most significant bit set and can't
+			    widen it to wider mode.  *testdi_1 also
+			    requires CCZmode if the mask has bit
 			    31 set and all bits above it clear.  */
-			 GET_MODE (operands[2]) == DImode
-			 && INTVAL (operands[3]) + INTVAL (operands[4]) == 32
+			 || (INTVAL (operands[3]) + INTVAL (operands[4])
+			     >= 32)
+			 /* We can't widen also if val is not a REG.  */
+			 || (INTVAL (operands[3]) + INTVAL (operands[4])
+			     == GET_MODE_PRECISION (GET_MODE (operands[2]))
+			     && !register_operand (operands[2],
+						   GET_MODE (operands[2])))
+			 /* And we shouldn't widen if
+			    TARGET_PARTIAL_REG_STALL.  */
+			 || (TARGET_PARTIAL_REG_STALL
+			     && (INTVAL (operands[3]) + INTVAL (operands[4])
+				 >= (paradoxical_subreg_p (operands[2])
+				     && (GET_MODE_CLASS
+					  (GET_MODE (SUBREG_REG (operands[2])))
+					 == MODE_INT)
+				     ? GET_MODE_PRECISION
+					 (GET_MODE (SUBREG_REG (operands[2])))
+				     : GET_MODE_PRECISION
+					 (GET_MODE (operands[2])))))
 			 ? CCZmode : CCNOmode)"
   "#"
   "&& 1"
@@ -8750,7 +8778,10 @@
 
       /* Narrow paradoxical subregs to prevent partial register stalls.  */
       if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
-	  && GET_MODE_CLASS (submode) == MODE_INT)
+	  && GET_MODE_CLASS (submode) == MODE_INT
+	  && (GET_MODE (operands[0]) == CCZmode
+	      || pos + len < GET_MODE_PRECISION (submode)
+	      || REG_P (SUBREG_REG (val))))
 	{
 	  val = SUBREG_REG (val);
 	  mode = submode;
@@ -8758,14 +8789,32 @@
     }
 
   /* Small HImode tests can be converted to QImode.  */
-  if (register_operand (val, HImode) && pos + len <= 8)
+  if (pos + len <= 8
+      && register_operand (val, HImode))
     {
-      val = gen_lowpart (QImode, val);
-      mode = QImode;
+      rtx nval = gen_lowpart (QImode, val);
+      if (!MEM_P (nval)
+	  || GET_MODE (operands[0]) == CCZmode
+	  || pos + len < 8)
+	{
+	  val = nval;
+	  mode = QImode;
+	}
     }
 
   gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
 
+  /* If the mask is going to have the sign bit set in the mode
+     we want to do the comparison in and user isn't interested just
+     in the zero flag, then we must widen the target mode.  */
+  if (pos + len == GET_MODE_PRECISION (mode)
+      && GET_MODE (operands[0]) != CCZmode)
+    {
+      gcc_assert (pos + len < 32 && !MEM_P (val));
+      mode = SImode;
+      val = gen_lowpart (mode, val);
+    }
+
   wide_int mask
     = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b98c72cdd2af..830ee92357e5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-17  Jakub Jelinek  <jakub@redhat.com>
+	    Jeff Law  <law@redhat.com>
+
+	PR target/94567
+	* gcc.c-torture/execute/pr94567.c: New test.
+
 2020-04-17  Nathan Sidwell  <nathan@acm.org>
 
 	PR c++/94608
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr94567.c b/gcc/testsuite/gcc.c-torture/execute/pr94567.c
new file mode 100644
index 000000000000..679d73d2ef42
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr94567.c
@@ -0,0 +1,26 @@
+/* PR target/94567 */
+
+volatile int a = 1, b;
+short c, d = 4, f = 2, g;
+unsigned short e = 53736;
+
+int
+foo (int i, int j)
+{
+  return i && j ? 0 : i + j;
+}
+
+int
+main ()
+{
+  for (; a; a = 0)
+    {
+      unsigned short k = e;
+      g = k >> 3;
+      if (foo (g < (f || c), b))
+	d = 0;
+    }
+  if (d != 4)
+    __builtin_abort ();
+  return 0;
+}
-- 
GitLab