From e7287cbbb208b676991096dd9081ff8a61c49781 Mon Sep 17 00:00:00 2001
From: Pan Li <pan2.li@intel.com>
Date: Sat, 22 Feb 2025 19:34:52 +0800
Subject: [PATCH] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

This patch would like to fix one bug when expanding const vector for the
interleave case.  For example, we have:

base1 = 151
step = 121

For vec_series, we will generate vector in format of v[i] = base + i * step.
Then the vec_series will have below result for HImode, and we can find
that the result overflow to the highest 8 bits of HImode.

v1.b = {151, 255, 7,  0, 119,  0, 231,  0, 87,  1, 199,  1, 55,   2, 167,   2}

Aka we expect v1.b should be:

v1.b = {151, 0, 7,  0, 119,  0, 231,  0, 87,  0, 199,  0, 55,   0, 167,   0}

After that it will perform the IOR with v2 for the base2(aka another series).

v2.b =  {0,  17, 0, 33,   0, 49,   0, 65,  0, 81,   0, 97,  0, 113,   0, 129}

Unfortunately, the base1 + i * step1 in HImode may overflow to the high
8 bits, and the high 8 bits will pollute the v2 and result in incorrect
value in const_vector.

This patch would like to perform the overflow to smode check before the
optimized interleave code generation.  If overflow or VLA, it will fall
back to the default merge approach.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

	PR target/118931

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Add overflow to
	smode check and clean up highest bits if overflow.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr118931-run-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 36 +++++++++++++++----
 .../riscv/rvv/base/pr118931-run-1.c           | 19 ++++++++++
 2 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7cc15f3d53c1..287eb3e54cf7 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1489,22 +1489,44 @@ expand_const_vector (rtx target, rtx src)
 
 		  EEW = 32, { 2, 4, ... }.
 
-	     This only works as long as the larger type does not overflow
-	     as we can't guarantee a zero value for each second element
-	     of the sequence with smaller EEW.
-	     ??? For now we assume that no overflow happens with positive
-	     steps and forbid negative steps altogether.  */
+	     Both the series1 and series2 may overflow before taking the IOR
+	     to generate the final result.  However, only series1 matters
+	     because the series2 will shift before IOR, thus the overflow
+	     bits will never pollute the final result.
+
+	     For now we forbid the negative steps and overflow, and they
+	     will fall back to the default merge way to generate the
+	     const_vector.  */
+
 	  unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
 	  scalar_int_mode new_smode;
 	  machine_mode new_mode;
 	  poly_uint64 new_nunits
 	    = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+
+	  poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+	  bool overflow_smode_p = false;
+
+	  if (!step1.is_constant ())
+	    overflow_smode_p = true;
+	  else
+	    {
+	      int elem_count = XVECLEN (src, 0);
+	      uint64_t step1_val = step1.to_constant ();
+	      uint64_t base1_val = base1_poly.to_constant ();
+	      uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+	      if ((elem_val >> builder.inner_bits_size ()) != 0)
+		overflow_smode_p = true;
+	    }
+
 	  if (known_ge (step1, 0) && known_ge (step2, 0)
 	      && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
-	      && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
+	      && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+	      && !overflow_smode_p)
 	    {
 	      rtx tmp1 = gen_reg_rtx (new_mode);
-	      base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
+	      base1 = gen_int_mode (base1_poly, new_smode);
 	      expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
 
 	      if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
new file mode 100644
index 000000000000..84c63b5040e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fwhole-program -mrvv-vector-bits=zvl" } */
+
+long long m;
+char f = 151;
+char h = 103;
+unsigned char a = 109;
+
+int main() {
+  for (char l = 0; l < 255 - 241; l += h - 102)
+    a *= f;
+
+  m = a;
+
+  if (m != 29)
+    __builtin_abort ();
+
+  return 0;
+}
-- 
GitLab