From 52a392b8b797d01a7b0b06c8f20b0bf8374d489e Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@ventanamicro.com>
Date: Mon, 4 Nov 2024 15:34:50 +0100
Subject: [PATCH] RISC-V: Add VLS modes to strided loads.

This patch adds VLS modes to the strided load expanders.

gcc/ChangeLog:

	* config/riscv/autovec.md: Add VLS modes.
	* config/riscv/vector-iterators.md: Ditto.
	* config/riscv/vector.md: Ditto.
---
 gcc/config/riscv/autovec.md          |   4 +-
 gcc/config/riscv/vector-iterators.md | 243 +++++++++++++++++++++++++++
 gcc/config/riscv/vector.md           |  22 +--
 3 files changed, 256 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index c64ef5a12b43..2529dc77f221 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2903,7 +2903,7 @@
 ;; == Strided Load/Store
 ;; =========================================================================
 (define_expand "mask_len_strided_load_<mode>"
-  [(match_operand:V     0 "register_operand")
+  [(match_operand:V_VLS     0 "register_operand")
    (match_operand       1 "pmode_reg_or_0_operand")
    (match_operand       2 "pmode_reg_or_0_operand")
    (match_operand:<VM>  3 "vector_mask_operand")
@@ -2919,7 +2919,7 @@
 (define_expand "mask_len_strided_store_<mode>"
   [(match_operand       0 "pmode_reg_or_0_operand")
    (match_operand       1 "pmode_reg_or_0_operand")
-   (match_operand:V     2 "register_operand")
+   (match_operand:V_VLS     2 "register_operand")
    (match_operand:<VM>  3 "vector_mask_operand")
    (match_operand       4 "autovec_length_operand")
    (match_operand       5 "const_0_operand")]
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 43325d1ba87a..6a621459cc4a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -3524,6 +3524,87 @@
 
   (RVVM8DF "vector_eew64_stride_operand") (RVVM4DF "vector_eew64_stride_operand")
   (RVVM2DF "vector_eew64_stride_operand") (RVVM1DF "vector_eew64_stride_operand")
+
+  (V1QI "vector_eew8_stride_operand")
+  (V2QI "vector_eew8_stride_operand")
+  (V4QI "vector_eew8_stride_operand")
+  (V8QI "vector_eew8_stride_operand")
+  (V16QI "vector_eew8_stride_operand")
+  (V32QI "vector_eew8_stride_operand")
+  (V64QI "vector_eew8_stride_operand")
+  (V128QI "vector_eew8_stride_operand")
+  (V256QI "vector_eew8_stride_operand")
+  (V512QI "vector_eew8_stride_operand")
+  (V1024QI "vector_eew8_stride_operand")
+  (V2048QI "vector_eew8_stride_operand")
+  (V4096QI "vector_eew8_stride_operand")
+  (V1HI "vector_eew16_stride_operand")
+  (V2HI "vector_eew16_stride_operand")
+  (V4HI "vector_eew16_stride_operand")
+  (V8HI "vector_eew16_stride_operand")
+  (V16HI "vector_eew16_stride_operand")
+  (V32HI "vector_eew16_stride_operand")
+  (V64HI "vector_eew16_stride_operand")
+  (V128HI "vector_eew16_stride_operand")
+  (V256HI "vector_eew16_stride_operand")
+  (V512HI "vector_eew16_stride_operand")
+  (V1024HI "vector_eew16_stride_operand")
+  (V2048HI "vector_eew16_stride_operand")
+  (V1SI "vector_eew32_stride_operand")
+  (V2SI "vector_eew32_stride_operand")
+  (V4SI "vector_eew32_stride_operand")
+  (V8SI "vector_eew32_stride_operand")
+  (V16SI "vector_eew32_stride_operand")
+  (V32SI "vector_eew32_stride_operand")
+  (V64SI "vector_eew32_stride_operand")
+  (V128SI "vector_eew32_stride_operand")
+  (V256SI "vector_eew32_stride_operand")
+  (V512SI "vector_eew32_stride_operand")
+  (V1024SI "vector_eew32_stride_operand")
+  (V1DI "vector_eew64_stride_operand")
+  (V2DI "vector_eew64_stride_operand")
+  (V4DI "vector_eew64_stride_operand")
+  (V8DI "vector_eew64_stride_operand")
+  (V16DI "vector_eew64_stride_operand")
+  (V32DI "vector_eew64_stride_operand")
+  (V64DI "vector_eew64_stride_operand")
+  (V128DI "vector_eew64_stride_operand")
+  (V256DI "vector_eew64_stride_operand")
+  (V512DI "vector_eew64_stride_operand")
+
+  (V1HF "vector_eew16_stride_operand")
+  (V2HF "vector_eew16_stride_operand")
+  (V4HF "vector_eew16_stride_operand")
+  (V8HF "vector_eew16_stride_operand")
+  (V16HF "vector_eew16_stride_operand")
+  (V32HF "vector_eew16_stride_operand")
+  (V64HF "vector_eew16_stride_operand")
+  (V128HF "vector_eew16_stride_operand")
+  (V256HF "vector_eew16_stride_operand")
+  (V512HF "vector_eew16_stride_operand")
+  (V1024HF "vector_eew16_stride_operand")
+  (V2048HF "vector_eew16_stride_operand")
+  (V1SF "vector_eew32_stride_operand")
+  (V2SF "vector_eew32_stride_operand")
+  (V4SF "vector_eew32_stride_operand")
+  (V8SF "vector_eew32_stride_operand")
+  (V16SF "vector_eew32_stride_operand")
+  (V32SF "vector_eew32_stride_operand")
+  (V64SF "vector_eew32_stride_operand")
+  (V128SF "vector_eew32_stride_operand")
+  (V256SF "vector_eew32_stride_operand")
+  (V512SF "vector_eew32_stride_operand")
+  (V1024SF "vector_eew32_stride_operand")
+  (V1DF "vector_eew64_stride_operand")
+  (V2DF "vector_eew64_stride_operand")
+  (V4DF "vector_eew64_stride_operand")
+  (V8DF "vector_eew64_stride_operand")
+  (V16DF "vector_eew64_stride_operand")
+  (V32DF "vector_eew64_stride_operand")
+  (V64DF "vector_eew64_stride_operand")
+  (V128DF "vector_eew64_stride_operand")
+  (V256DF "vector_eew64_stride_operand")
+  (V512DF "vector_eew64_stride_operand")
 ])
 
 (define_mode_attr stride_load_constraint [
@@ -3557,6 +3638,87 @@
 
   (RVVM8DF "rJ,rJ,rJ,c08,c08,c08") (RVVM4DF "rJ,rJ,rJ,c08,c08,c08")
   (RVVM2DF "rJ,rJ,rJ,c08,c08,c08") (RVVM1DF "rJ,rJ,rJ,c08,c08,c08")
+
+  (V1QI "rJ,rJ,rJ,c01,c01,c01")
+  (V2QI "rJ,rJ,rJ,c01,c01,c01")
+  (V4QI "rJ,rJ,rJ,c01,c01,c01")
+  (V8QI "rJ,rJ,rJ,c01,c01,c01")
+  (V16QI "rJ,rJ,rJ,c01,c01,c01")
+  (V32QI "rJ,rJ,rJ,c01,c01,c01")
+  (V64QI "rJ,rJ,rJ,c01,c01,c01")
+  (V128QI "rJ,rJ,rJ,c01,c01,c01")
+  (V256QI "rJ,rJ,rJ,c01,c01,c01")
+  (V512QI "rJ,rJ,rJ,c01,c01,c01")
+  (V1024QI "rJ,rJ,rJ,c01,c01,c01")
+  (V2048QI "rJ,rJ,rJ,c01,c01,c01")
+  (V4096QI "rJ,rJ,rJ,c01,c01,c01")
+  (V1HI "rJ,rJ,rJ,c02,c02,c02")
+  (V2HI "rJ,rJ,rJ,c02,c02,c02")
+  (V4HI "rJ,rJ,rJ,c02,c02,c02")
+  (V8HI "rJ,rJ,rJ,c02,c02,c02")
+  (V16HI "rJ,rJ,rJ,c02,c02,c02")
+  (V32HI "rJ,rJ,rJ,c02,c02,c02")
+  (V64HI "rJ,rJ,rJ,c02,c02,c02")
+  (V128HI "rJ,rJ,rJ,c02,c02,c02")
+  (V256HI "rJ,rJ,rJ,c02,c02,c02")
+  (V512HI "rJ,rJ,rJ,c02,c02,c02")
+  (V1024HI "rJ,rJ,rJ,c02,c02,c02")
+  (V2048HI "rJ,rJ,rJ,c02,c02,c02")
+  (V1SI "rJ,rJ,rJ,c04,c04,c04")
+  (V2SI "rJ,rJ,rJ,c04,c04,c04")
+  (V4SI "rJ,rJ,rJ,c04,c04,c04")
+  (V8SI "rJ,rJ,rJ,c04,c04,c04")
+  (V16SI "rJ,rJ,rJ,c04,c04,c04")
+  (V32SI "rJ,rJ,rJ,c04,c04,c04")
+  (V64SI "rJ,rJ,rJ,c04,c04,c04")
+  (V128SI "rJ,rJ,rJ,c04,c04,c04")
+  (V256SI "rJ,rJ,rJ,c04,c04,c04")
+  (V512SI "rJ,rJ,rJ,c04,c04,c04")
+  (V1024SI "rJ,rJ,rJ,c04,c04,c04")
+  (V1DI "rJ,rJ,rJ,c08,c08,c08")
+  (V2DI "rJ,rJ,rJ,c08,c08,c08")
+  (V4DI "rJ,rJ,rJ,c08,c08,c08")
+  (V8DI "rJ,rJ,rJ,c08,c08,c08")
+  (V16DI "rJ,rJ,rJ,c08,c08,c08")
+  (V32DI "rJ,rJ,rJ,c08,c08,c08")
+  (V64DI "rJ,rJ,rJ,c08,c08,c08")
+  (V128DI "rJ,rJ,rJ,c08,c08,c08")
+  (V256DI "rJ,rJ,rJ,c08,c08,c08")
+  (V512DI "rJ,rJ,rJ,c08,c08,c08")
+
+  (V1HF "rJ,rJ,rJ,c02,c02,c02")
+  (V2HF "rJ,rJ,rJ,c02,c02,c02")
+  (V4HF "rJ,rJ,rJ,c02,c02,c02")
+  (V8HF "rJ,rJ,rJ,c02,c02,c02")
+  (V16HF "rJ,rJ,rJ,c02,c02,c02")
+  (V32HF "rJ,rJ,rJ,c02,c02,c02")
+  (V64HF "rJ,rJ,rJ,c02,c02,c02")
+  (V128HF "rJ,rJ,rJ,c02,c02,c02")
+  (V256HF "rJ,rJ,rJ,c02,c02,c02")
+  (V512HF "rJ,rJ,rJ,c02,c02,c02")
+  (V1024HF "rJ,rJ,rJ,c02,c02,c02")
+  (V2048HF "rJ,rJ,rJ,c02,c02,c02")
+  (V1SF "rJ,rJ,rJ,c04,c04,c04")
+  (V2SF "rJ,rJ,rJ,c04,c04,c04")
+  (V4SF "rJ,rJ,rJ,c04,c04,c04")
+  (V8SF "rJ,rJ,rJ,c04,c04,c04")
+  (V16SF "rJ,rJ,rJ,c04,c04,c04")
+  (V32SF "rJ,rJ,rJ,c04,c04,c04")
+  (V64SF "rJ,rJ,rJ,c04,c04,c04")
+  (V128SF "rJ,rJ,rJ,c04,c04,c04")
+  (V256SF "rJ,rJ,rJ,c04,c04,c04")
+  (V512SF "rJ,rJ,rJ,c04,c04,c04")
+  (V1024SF "rJ,rJ,rJ,c04,c04,c04")
+  (V1DF "rJ,rJ,rJ,c08,c08,c08")
+  (V2DF "rJ,rJ,rJ,c08,c08,c08")
+  (V4DF "rJ,rJ,rJ,c08,c08,c08")
+  (V8DF "rJ,rJ,rJ,c08,c08,c08")
+  (V16DF "rJ,rJ,rJ,c08,c08,c08")
+  (V32DF "rJ,rJ,rJ,c08,c08,c08")
+  (V64DF "rJ,rJ,rJ,c08,c08,c08")
+  (V128DF "rJ,rJ,rJ,c08,c08,c08")
+  (V256DF "rJ,rJ,rJ,c08,c08,c08")
+  (V512DF "rJ,rJ,rJ,c08,c08,c08")
 ])
 
 (define_mode_attr stride_store_constraint [
@@ -3590,6 +3752,87 @@
 
   (RVVM8DF "rJ,c08") (RVVM4DF "rJ,c08")
   (RVVM2DF "rJ,c08") (RVVM1DF "rJ,c08")
+
+  (V1QI "rJ,c01")
+  (V2QI "rJ,c01")
+  (V4QI "rJ,c01")
+  (V8QI "rJ,c01")
+  (V16QI "rJ,c01")
+  (V32QI "rJ,c01")
+  (V64QI "rJ,c01")
+  (V128QI "rJ,c01")
+  (V256QI "rJ,c01")
+  (V512QI "rJ,c01")
+  (V1024QI "rJ,c01")
+  (V2048QI "rJ,c01")
+  (V4096QI "rJ,c01")
+  (V1HI "rJ,c02")
+  (V2HI "rJ,c02")
+  (V4HI "rJ,c02")
+  (V8HI "rJ,c02")
+  (V16HI "rJ,c02")
+  (V32HI "rJ,c02")
+  (V64HI "rJ,c02")
+  (V128HI "rJ,c02")
+  (V256HI "rJ,c02")
+  (V512HI "rJ,c02")
+  (V1024HI "rJ,c02")
+  (V2048HI "rJ,c02")
+  (V1SI "rJ,c04")
+  (V2SI "rJ,c04")
+  (V4SI "rJ,c04")
+  (V8SI "rJ,c04")
+  (V16SI "rJ,c04")
+  (V32SI "rJ,c04")
+  (V64SI "rJ,c04")
+  (V128SI "rJ,c04")
+  (V256SI "rJ,c04")
+  (V512SI "rJ,c04")
+  (V1024SI "rJ,c04")
+  (V1DI "rJ,c08")
+  (V2DI "rJ,c08")
+  (V4DI "rJ,c08")
+  (V8DI "rJ,c08")
+  (V16DI "rJ,c08")
+  (V32DI "rJ,c08")
+  (V64DI "rJ,c08")
+  (V128DI "rJ,c08")
+  (V256DI "rJ,c08")
+  (V512DI "rJ,c08")
+
+  (V1HF "rJ,c02")
+  (V2HF "rJ,c02")
+  (V4HF "rJ,c02")
+  (V8HF "rJ,c02")
+  (V16HF "rJ,c02")
+  (V32HF "rJ,c02")
+  (V64HF "rJ,c02")
+  (V128HF "rJ,c02")
+  (V256HF "rJ,c02")
+  (V512HF "rJ,c02")
+  (V1024HF "rJ,c02")
+  (V2048HF "rJ,c02")
+  (V1SF "rJ,c04")
+  (V2SF "rJ,c04")
+  (V4SF "rJ,c04")
+  (V8SF "rJ,c04")
+  (V16SF "rJ,c04")
+  (V32SF "rJ,c04")
+  (V64SF "rJ,c04")
+  (V128SF "rJ,c04")
+  (V256SF "rJ,c04")
+  (V512SF "rJ,c04")
+  (V1024SF "rJ,c04")
+  (V1DF "rJ,c08")
+  (V2DF "rJ,c08")
+  (V4DF "rJ,c08")
+  (V8DF "rJ,c08")
+  (V16DF "rJ,c08")
+  (V32DF "rJ,c08")
+  (V64DF "rJ,c08")
+  (V128DF "rJ,c08")
+  (V256DF "rJ,c08")
+  (V512DF "rJ,c08")
 ])
 
 (define_mode_attr gs_extension [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index a75c7ab9d086..898cda847cb5 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2292,8 +2292,8 @@
 ;; -------------------------------------------------------------------------------
 
 (define_insn "@pred_strided_load<mode>"
-  [(set (match_operand:V 0 "register_operand"              "=vr,    vr,    vd,    vr,    vr,    vd")
-	(if_then_else:V
+  [(set (match_operand:V_VLS 0 "register_operand"              "=vr,    vr,    vd,    vr,    vr,    vd")
+	(if_then_else:V_VLS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm,    vmWc1,   Wc1,    vm")
 	     (match_operand 5 "vector_length_operand"    "   rK,    rK,    rK,       rK,    rK,    rK")
@@ -2302,10 +2302,10 @@
 	     (match_operand 8 "const_int_operand"        "    i,     i,     i,        i,     i,     i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:V
-	    [(match_operand:V 3 "memory_operand"         "     m,     m,     m,    m,     m,     m")
-	     (match_operand 4 "<V:stride_predicate>"     "<V:stride_load_constraint>")] UNSPEC_STRIDED)
-	  (match_operand:V 2 "vector_merge_operand"      "     0,    vu,    vu,    0,    vu,    vu")))]
+	  (unspec:V_VLS
+	    [(match_operand:V_VLS 3 "memory_operand"         "     m,     m,     m,    m,     m,     m")
+	     (match_operand 4 "<V_VLS:stride_predicate>"     "<V_VLS:stride_load_constraint>")] UNSPEC_STRIDED)
+	  (match_operand:V_VLS 2 "vector_merge_operand"      "     0,    vu,    vu,    0,    vu,    vu")))]
   "TARGET_VECTOR"
   "@
   vlse<sew>.v\t%0,%3,%z4%p1
@@ -2318,17 +2318,17 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "@pred_strided_store<mode>"
-  [(set (match_operand:V 0 "memory_operand"                 "+m,    m")
-	(if_then_else:V
+  [(set (match_operand:V_VLS 0 "memory_operand"                 "+m,    m")
+	(if_then_else:V_VLS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,    vmWc1")
 	     (match_operand 4 "vector_length_operand"    "   rK,       rK")
 	     (match_operand 5 "const_int_operand"        "    i,        i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:V
-	    [(match_operand 2 "<V:stride_predicate>"     "<V:stride_store_constraint>")
-	     (match_operand:V 3 "register_operand"       "   vr,       vr")] UNSPEC_STRIDED)
+	  (unspec:V_VLS
+	    [(match_operand 2 "<V_VLS:stride_predicate>"     "<V_VLS:stride_store_constraint>")
+	     (match_operand:V_VLS 3 "register_operand"       "   vr,       vr")] UNSPEC_STRIDED)
 	  (match_dup 0)))]
   "TARGET_VECTOR"
   "@
-- 
GitLab