From bef95ba085b0ae9bf3eb79a8eed685236d773116 Mon Sep 17 00:00:00 2001 From: Uros Bizjak <ubizjak@gmail.com> Date: Fri, 14 Jul 2023 11:46:22 +0200 Subject: [PATCH] cprop: Do not set REG_EQUAL note when simplifying paradoxical subreg [PR110206] cprop1 pass does not consider paradoxical subreg and for (insn 22) claims that it equals 8 elements of HImodeby setting REG_EQUAL note: (insn 21 19 22 4 (set (reg:V4QI 98) (mem/u/c:V4QI (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S4 A32])) "pr110206.c":12:42 1530 {*movv4qi_internal} (expr_list:REG_EQUAL (const_vector:V4QI [ (const_int -52 [0xffffffffffffffcc]) repeated x4 ]) (nil))) (insn 22 21 23 4 (set (reg:V8HI 100) (zero_extend:V8HI (vec_select:V8QI (subreg:V16QI (reg:V4QI 98) 0) (parallel [ (const_int 0 [0]) (const_int 1 [0x1]) (const_int 2 [0x2]) (const_int 3 [0x3]) (const_int 4 [0x4]) (const_int 5 [0x5]) (const_int 6 [0x6]) (const_int 7 [0x7]) ])))) "pr110206.c":12:42 7471 {sse4_1_zero_extendv8qiv8hi2} (expr_list:REG_EQUAL (const_vector:V8HI [ (const_int 204 [0xcc]) repeated x8 ]) (expr_list:REG_DEAD (reg:V4QI 98) (nil)))) We rely on the "undefined" vals to have a specific value (from the earlier REG_EQUAL note) but actual code generation doesn't ensure this (it doesn't need to). That said, the issue isn't the constant folding per-se but that we do not actually constant fold but register an equality that doesn't hold. PR target/110206 gcc/ChangeLog: * fwprop.cc (contains_paradoxical_subreg_p): Move to ... * rtlanal.cc (contains_paradoxical_subreg_p): ... here. * rtlanal.h (contains_paradoxical_subreg_p): Add prototype. * cprop.cc (try_replace_reg): Do not set REG_EQUAL note when the original source contains a paradoxical subreg. gcc/testsuite/ChangeLog: * gcc.target/i386/pr110206.c: New test. (cherry picked from commit 1815e313a8fb519a77c94a908eb6dafc4ce51ffe) --- gcc/cprop.cc | 4 ++- gcc/fwprop.cc | 16 +--------- gcc/rtlanal.cc | 15 +++++++++ gcc/rtlanal.h | 2 ++ gcc/testsuite/gcc.target/i386/pr110206.c | 39 ++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr110206.c diff --git a/gcc/cprop.cc b/gcc/cprop.cc index 6ec0bda4a243..294fec871f0e 100644 --- a/gcc/cprop.cc +++ b/gcc/cprop.cc @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "backend.h" #include "rtl.h" +#include "rtlanal.h" #include "cfghooks.h" #include "df.h" #include "insn-config.h" @@ -795,7 +796,8 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn) /* If we've failed perform the replacement, have a single SET to a REG destination and don't yet have a note, add a REG_EQUAL note to not lose information. */ - if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set))) + if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set)) + && !contains_paradoxical_subreg_p (SET_SRC (set))) note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); } diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc index ae342f59407b..0707a234726d 100644 --- a/gcc/fwprop.cc +++ b/gcc/fwprop.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "backend.h" #include "rtl.h" +#include "rtlanal.h" #include "df.h" #include "rtl-ssa.h" @@ -353,21 +354,6 @@ reg_single_def_p (rtx x) return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); } -/* Return true if X contains a paradoxical subreg. */ - -static bool -contains_paradoxical_subreg_p (rtx x) -{ - subrtx_var_iterator::array_type array; - FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) - { - x = *iter; - if (SUBREG_P (x) && paradoxical_subreg_p (x)) - return true; - } - return false; -} - /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of USE_INSN. Return the number of substitutions on success, otherwise return -1 and leave USE_INSN unchanged. diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index e69d2e8a9f52..6b02428b7350 100644 --- a/gcc/rtlanal.cc +++ b/gcc/rtlanal.cc @@ -6969,3 +6969,18 @@ vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel) } return false; } + +/* Return true if X contains a paradoxical subreg. */ + +bool +contains_paradoxical_subreg_p (rtx x) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) + { + x = *iter; + if (SUBREG_P (x) && paradoxical_subreg_p (x)) + return true; + } + return false; +} diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h index 5fbed816e202..8762276d687d 100644 --- a/gcc/rtlanal.h +++ b/gcc/rtlanal.h @@ -338,4 +338,6 @@ vec_series_highpart_p (machine_mode result_mode, machine_mode op_mode, bool vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel); +bool +contains_paradoxical_subreg_p (rtx x); #endif diff --git a/gcc/testsuite/gcc.target/i386/pr110206.c b/gcc/testsuite/gcc.target/i386/pr110206.c new file mode 100644 index 000000000000..af1455c5aa95 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110206.c @@ -0,0 +1,39 @@ +/* PR target/110206 */ +/* { dg-do run } */ +/* { dg-options "-Os -mavx512bw -mavx512vl" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512BW +#define AVX512VL + +#include "avx512f-check.h" + +typedef unsigned char __attribute__((__vector_size__ (4))) U; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned short u16; + +V g; + +void +__attribute__((noinline)) +foo (U u, u16 c, V *r) +{ + if (!c) + abort (); + V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2); + V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5); + V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5); + *r = z; +} + +static void test_256 (void) { }; + +static void +test_128 (void) +{ + V r; + foo ((U){4}, 5, &r); + if (r[6] != 0x30) + abort(); +} -- GitLab