From bef95ba085b0ae9bf3eb79a8eed685236d773116 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Fri, 14 Jul 2023 11:46:22 +0200
Subject: [PATCH] cprop: Do not set REG_EQUAL note when simplifying paradoxical
 subreg [PR110206]

cprop1 pass does not consider paradoxical subreg and for (insn 22) claims
that it equals 8 elements of HImodeby setting REG_EQUAL note:

(insn 21 19 22 4 (set (reg:V4QI 98)
        (mem/u/c:V4QI (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S4 A32])) "pr110206.c":12:42 1530 {*movv4qi_internal}
     (expr_list:REG_EQUAL (const_vector:V4QI [
                (const_int -52 [0xffffffffffffffcc]) repeated x4
            ])
        (nil)))
(insn 22 21 23 4 (set (reg:V8HI 100)
        (zero_extend:V8HI (vec_select:V8QI (subreg:V16QI (reg:V4QI 98) 0)
                (parallel [
                        (const_int 0 [0])
                        (const_int 1 [0x1])
                        (const_int 2 [0x2])
                        (const_int 3 [0x3])
                        (const_int 4 [0x4])
                        (const_int 5 [0x5])
                        (const_int 6 [0x6])
                        (const_int 7 [0x7])
                    ])))) "pr110206.c":12:42 7471 {sse4_1_zero_extendv8qiv8hi2}
     (expr_list:REG_EQUAL (const_vector:V8HI [
                (const_int 204 [0xcc]) repeated x8
            ])
        (expr_list:REG_DEAD (reg:V4QI 98)
            (nil))))

We rely on the "undefined" vals to have a specific value (from the earlier
REG_EQUAL note) but actual code generation doesn't ensure this (it doesn't
need to).  That said, the issue isn't the constant folding per-se but that
we do not actually constant fold but register an equality that doesn't hold.

	PR target/110206

gcc/ChangeLog:

	* fwprop.cc (contains_paradoxical_subreg_p): Move to ...
	* rtlanal.cc (contains_paradoxical_subreg_p): ... here.
	* rtlanal.h (contains_paradoxical_subreg_p): Add prototype.
	* cprop.cc (try_replace_reg): Do not set REG_EQUAL note
	when the original source contains a paradoxical subreg.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110206.c: New test.

(cherry picked from commit 1815e313a8fb519a77c94a908eb6dafc4ce51ffe)
---
 gcc/cprop.cc                             |  4 ++-
 gcc/fwprop.cc                            | 16 +---------
 gcc/rtlanal.cc                           | 15 +++++++++
 gcc/rtlanal.h                            |  2 ++
 gcc/testsuite/gcc.target/i386/pr110206.c | 39 ++++++++++++++++++++++++
 5 files changed, 60 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110206.c

diff --git a/gcc/cprop.cc b/gcc/cprop.cc
index 6ec0bda4a243..294fec871f0e 100644
--- a/gcc/cprop.cc
+++ b/gcc/cprop.cc
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "backend.h"
 #include "rtl.h"
+#include "rtlanal.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "insn-config.h"
@@ -795,7 +796,8 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
       /* If we've failed perform the replacement, have a single SET to
 	 a REG destination and don't yet have a note, add a REG_EQUAL note
 	 to not lose information.  */
-      if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set)))
+      if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set))
+	  && !contains_paradoxical_subreg_p (SET_SRC (set)))
 	note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
     }
 
diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index ae342f59407b..0707a234726d 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "backend.h"
 #include "rtl.h"
+#include "rtlanal.h"
 #include "df.h"
 #include "rtl-ssa.h"
 
@@ -353,21 +354,6 @@ reg_single_def_p (rtx x)
   return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
 }
 
-/* Return true if X contains a paradoxical subreg.  */
-
-static bool
-contains_paradoxical_subreg_p (rtx x)
-{
-  subrtx_var_iterator::array_type array;
-  FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
-    {
-      x = *iter;
-      if (SUBREG_P (x) && paradoxical_subreg_p (x))
-	return true;
-    }
-  return false;
-}
-
 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
    USE_INSN.  Return the number of substitutions on success, otherwise return
    -1 and leave USE_INSN unchanged.
diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index e69d2e8a9f52..6b02428b7350 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -6969,3 +6969,18 @@ vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel)
     }
   return false;
 }
+
+/* Return true if X contains a paradoxical subreg.  */
+
+bool
+contains_paradoxical_subreg_p (rtx x)
+{
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
+    {
+      x = *iter;
+      if (SUBREG_P (x) && paradoxical_subreg_p (x))
+	return true;
+    }
+  return false;
+}
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 5fbed816e202..8762276d687d 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -338,4 +338,6 @@ vec_series_highpart_p (machine_mode result_mode, machine_mode op_mode,
 bool
 vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel);
 
+bool
+contains_paradoxical_subreg_p (rtx x);
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr110206.c b/gcc/testsuite/gcc.target/i386/pr110206.c
new file mode 100644
index 000000000000..af1455c5aa95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110206.c
@@ -0,0 +1,39 @@
+/* PR target/110206 */
+/* { dg-do run } */
+/* { dg-options "-Os -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512BW
+#define AVX512VL
+
+#include "avx512f-check.h"
+
+typedef unsigned char __attribute__((__vector_size__ (4))) U;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short u16;
+
+V g;
+
+void
+__attribute__((noinline))
+foo (U u, u16 c, V *r)
+{
+  if (!c)
+    abort ();
+  V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2);
+  V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5);
+  V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5);
+  *r = z;
+}
+
+static void test_256 (void) { };
+
+static void
+test_128 (void)
+{
+  V r;
+  foo ((U){4}, 5, &r);
+  if (r[6] != 0x30)
+    abort();
+}
-- 
GitLab