diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 78c47ece51e51cd683b3ec086d2a826b7f87ca54..e41f65a08941d435544227bce7056760044140e1 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -158,7 +158,9 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int, tree, unsigned int, tree *); bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); bool legitimize_move (rtx, rtx, machine_mode); +void emit_vlmax_vsetvl (machine_mode, rtx); void emit_vlmax_op (unsigned, rtx, rtx, machine_mode); +void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode); void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode); enum vlmul_type get_vlmul (machine_mode); unsigned int get_ratio (machine_mode); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9b83ef6ea5e7429f2f7c790f72409ebf4e143ebf..d7b77fd61238a738a01d8a604315e3a01dd575dc 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -98,6 +98,15 @@ private: expand_operand m_ops[MAX_OPERANDS]; }; +static unsigned +get_sew (machine_mode mode) +{ + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + ? 8 + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + return sew; +} + /* Return true if X is a const_vector with all duplicate elements, which is in the range between MINVAL and MAXVAL. */ bool @@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, && IN_RANGE (INTVAL (elt), minval, maxval)); } -static rtx -emit_vlmax_vsetvl (machine_mode vmode) +void +emit_vlmax_vsetvl (machine_mode vmode, rtx vl) { - rtx vl = gen_reg_rtx (Pmode); - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL - ? 8 - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode)); + unsigned int sew = get_sew (vmode); enum vlmul_type vlmul = get_vlmul (vmode); unsigned int ratio = calculate_ratio (sew, vlmul); @@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode) const0_rtx)); else emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode))); - - return vl; } /* Calculate SEW/LMUL ratio. */ @@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul) /* Emit an RVV unmask && vl mov from SRC to DEST. */ static void emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, - machine_mode mask_mode) + machine_mode mask_mode, bool vlmax_p) { insn_expander<8> e; machine_mode mode = GET_MODE (dest); @@ -186,17 +190,18 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, e.add_input_operand (len, Pmode); else { - rtx vlmax = emit_vlmax_vsetvl (mode); + rtx vlmax = gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (mode, vlmax); e.add_input_operand (vlmax, Pmode); } if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ()); - if (len) - e.add_avl_type_operand (avl_type::NONVLMAX); - else + if (vlmax_p) e.add_avl_type_operand (avl_type::VLMAX); + else + e.add_avl_type_operand (avl_type::NONVLMAX); e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src)); } @@ -204,14 +209,21 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, void emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode) { - emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode); + emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode, true); +} + +void +emit_vlmax_op (unsigned icode, rtx dest, rtx src, rtx len, + machine_mode mask_mode) +{ + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, true); } void emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len, machine_mode mask_mode) { - emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode); + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false); } static void @@ -265,6 +277,20 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode) expand_const_vector (dest, src, mask_mode); return true; } + + /* In order to decrease the memory traffic, we don't use whole register + * load/store for the LMUL less than 1 and mask mode, so those case will + * require one extra general purpose register, but it's not allowed during LRA + * process, so we have a special move pattern used for LRA, which will defer + * the expansion after LRA. */ + if ((known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) + || GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + && lra_in_progress) + { + emit_insn (gen_mov_lra (mode, Pmode, dest, src)); + return true; + } + if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) { @@ -274,6 +300,13 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode) return false; } + + if (register_operand (src, mode) && register_operand (dest, mode)) + { + emit_insn (gen_rtx_SET (dest, src)); + return true; + } + if (!register_operand (src, mode) && !register_operand (dest, mode)) { rtx tmp = gen_reg_rtx (mode); @@ -540,9 +573,7 @@ force_vector_length_operand (rtx vl) static rtx gen_no_side_effects_vsetvl_rtx (machine_mode vmode, rtx vl, rtx avl) { - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL - ? 8 - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode)); + unsigned int sew = get_sew (vmode); return gen_vsetvl_no_side_effects (Pmode, vl, avl, gen_int_mode (sew, Pmode), gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx, const0_rtx); diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 20978a5851306a88cbd7f59217b1e6db7a896cae..1ddc1d3fd390a967239b26e5c19d78841d0e298f 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -635,6 +635,62 @@ [(set_attr "type" "vmov") (set_attr "mode" "<MODE>")]) +(define_expand "@mov<V_FRACT:mode><P:mode>_lra" + [(parallel + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand") + (match_operand:V_FRACT 1 "reg_or_mem_operand")) + (clobber (match_scratch:P 2))])] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" +{}) + +(define_expand "@mov<VB:mode><P:mode>_lra" + [(parallel + [(set (match_operand:VB 0 "reg_or_mem_operand") + (match_operand:VB 1 "reg_or_mem_operand")) + (clobber (match_scratch:P 2))])] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" +{}) + +(define_insn_and_split "*mov<V_FRACT:mode><P:mode>_lra" + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand" "=vr, m,vr") + (match_operand:V_FRACT 1 "reg_or_mem_operand" " m,vr,vr")) + (clobber (match_scratch:P 2 "=&r,&r,X"))] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (REG_P (operands[0]) && REG_P (operands[1])) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + else + { + riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]); + riscv_vector::emit_vlmax_op (code_for_pred_mov (<V_FRACT:MODE>mode), + operands[0], operands[1], operands[2], <VM>mode); + } + DONE; +}) + +(define_insn_and_split "*mov<VB:mode><P:mode>_lra" + [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr, m,vr") + (match_operand:VB 1 "reg_or_mem_operand" " m,vr,vr")) + (clobber (match_scratch:P 2 "=&r,&r,X"))] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (REG_P (operands[0]) && REG_P (operands[1])) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + else + { + riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]); + riscv_vector::emit_vlmax_op (code_for_pred_mov (<VB:MODE>mode), + operands[0], operands[1], operands[2], <VB:MODE>mode); + } + DONE; +}) + ;; ----------------------------------------------------------------- ;; ---- Duplicate Operations ;; ----------------------------------------------------------------- diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C new file mode 100644 index 0000000000000000000000000000000000000000..868ec1ef90fe70b8b57282dc229ea47c55ecd836 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C @@ -0,0 +1,139 @@ +/* { dg-do compile { target { riscv_vector } } } */ + +#include <iostream> +#include "riscv_vector.h" +using std::cerr; +using std::endl; +template < class , class b > int c(b val) { + return val; +} +auto &f32(c< float, uint32_t >); +template < class d > +bool check(d , d , size_t ); +int main() { + size_t e ; + int16_t f[] {}; + size_t g ; + int32_t i[] {4784}; + size_t aa = 4; + int16_t ab[] {2313}; + int16_t j[] {7114 }; + int16_t k[] {7696 }; + uint32_t l[] {9951 }; + int32_t m[] {2659 }; + uint16_t n[] {7537 }; + int32_t o[] {05733} + ; + uint32_t p[] {7010090 }; + uint32_t q[] {21060 }; + uint32_t r[] {2273 }; + uint32_t s[] {4094366 }; + int16_t ac[] {11880 }; + int16_t t[] {10988}; + int16_t ad[] {30376}; + int8_t u[] {}; + int8_t ae[] {7}; + int8_t v[] {40}; + int8_t af[] {6}; + int16_t w[] {4077 }; + int16_t x[] {7932 }; + int8_t y[] {3}; + int8_t z[] {4}; + uint16_t ag[] {2831}; + int16_t ah[] {10412 }; + int16_t ai[] {6823}; + int32_t aj[] {8572 }; + int32_t ak[] {9999 }; + uint32_t al[] {50166962 }; + uint32_t am[] {9781 }; + int8_t an[] {9, 35}; + float ao[] {222.65, 22.79}; + float ap[] {126.10, 13.92}; + int64_t aq[] {508727, 5556}; + int16_t ar[] {2861 }; + int16_t as[] {21420}; + int16_t at[] {4706 }; + uint32_t au ; + uint32_t av = 600295662; + size_t aw ; + int16_t ax = 13015; + uint32_t ay ; + uint16_t az = 10652; + int32_t ba ; + int8_t bb ; + int64_t bc = 40183771683589512; + +asm volatile ("ttt":::"memory"); + vint16mf4_t bd = __riscv_vle16_v_i16mf4(j, 2); + vuint32mf2_t be = __riscv_vle32_v_u32mf2(l, 2); + vint32mf2_t bf = __riscv_vle32_v_i32mf2(m, 2); + vuint16mf4_t bg = __riscv_vle16_v_u16mf4(n, 2); + vint8mf4_t bh ; + vuint32m2_t bi = __riscv_vle32_v_u32m2(p, 2); + vuint32m2_t bj = __riscv_vle32_v_u32m2(q, 2); + vuint32m2_t bk = __riscv_vle32_v_u32m2(r, 2); + vuint32m2_t bl = __riscv_vle32_v_u32m2(s, 2); + vint16m1_t bm = __riscv_vle16_v_i16m1(ac, 2); + vint16m1_t bn = __riscv_vle16_v_i16m1(t, 2); + vint8mf2_t bo = __riscv_vle8_v_i8mf2(u, 1); + vint8mf2_t bp = __riscv_vle8_v_i8mf2(ae, 1); + vint8mf8_t bq = __riscv_vle8_v_i8mf8(af, 1); + vint16mf4_t br = __riscv_vle16_v_i16mf4(w, 2); + vint16mf4_t bs = __riscv_vle16_v_i16mf4(x, 2); + vint8mf8_t bt = __riscv_vle8_v_i8mf8(y, 1); + vint8mf8_t bu = __riscv_vle8_v_i8mf8(z, 1); + vuint16mf4_t bv = __riscv_vle16_v_u16mf4(ag, 1); + vint16mf4_t bw = __riscv_vle16_v_i16mf4(ah, 2); + vint16mf4_t bx = __riscv_vle16_v_i16mf4(ai, 2); + vint32mf2_t by = __riscv_vle32_v_i32mf2(aj, 2); + vint32mf2_t bz = __riscv_vle32_v_i32mf2(ak, 2); + vuint32mf2_t ca = __riscv_vle32_v_u32mf2(al, 2); + vuint32mf2_t cb = __riscv_vle32_v_u32mf2(am, 2); + vint8mf8_t cc = __riscv_vle8_v_i8mf8(an, 2); + vfloat32mf2_t cd = __riscv_vle32_v_f32mf2(ao, 2); + vfloat32mf2_t ce = __riscv_vle32_v_f32mf2(ap, 2); + vint64m1_t cf = __riscv_vle64_v_i64m1(aq, 2); + vint16mf4_t cg = __riscv_vle16_v_i16mf4(ar, 2); + vint16mf4_t ch = __riscv_vle16_v_i16mf4(as, 2); + vint16mf4_t var_62 = __riscv_vle16_v_i16mf4(at, 2); + vbool64_t var_20 = __riscv_vmadc_vx_u32mf2_b64(be, ay, 2); + int8_t var_17 = __riscv_vmv_x_s_i8mf4_i8(bh); + vbool16_t var_28 = __riscv_vmsltu_vv_u32m2_b16(bk, bl, 2); + vint8mf2_t var_14 = __riscv_vadd_vv_i8mf2(bo, bp, 1); + vbool64_t var_8 = __riscv_vmseq_vv_i16mf4_b64(br, bs, 2); + vbool64_t var_42 = __riscv_vmsbc_vx_u16mf4_b64(bv, az, 1); + vbool64_t var_46 = __riscv_vmsge_vx_i32mf2_b64(by, ba, 2); + vint16mf4_t var_4 = __riscv_vncvt_x_x_w_i16mf4(bz, 2); + vbool64_t var_51 = __riscv_vmsgt_vx_i8mf8_b64(cc, bb, 2); + vbool64_t var_56 = __riscv_vmfne_vv_f32mf2_b64(cd, ce, 2); + vbool64_t var_55 = __riscv_vmseq_vx_i64m1_b64(cf, bc, 2); + vuint32m2_t var_16 = __riscv_vslideup_vx_u32m2_mu(var_28, bi, bj, aw, 2); + vint8mf2_t var_12 = __riscv_vmulh_vv_i8mf2(var_14, var_14, 1); + vint16mf4_t var_0 = __riscv_vdiv_vv_i16mf4_mu(var_8, var_4, ch, var_62, 2); + vuint32m2_t var_13 = __riscv_vsub_vx_u32m2(var_16, av, 2); + int8_t var_9 = __riscv_vmv_x_s_i8mf2_i8(var_12); + vint16mf4_t var_19 = __riscv_vor_vx_i16mf4_mu(var_20, var_0, bd, ax, 2); + uint32_t var_10 = __riscv_vmv_x_s_u32m2_u32(var_13); + vint8mf8_t var_7 = __riscv_vmadd_vx_i8mf8_mu(var_42, bt, var_9, bu, 1); + __riscv_vse16_v_i16mf4(k, var_19, 2); + vuint32mf2_t var_3 = + __riscv_vslide1down_vx_u32mf2_mu(var_51, ca, cb, var_10, 2); + if (check(k, ab, aa)) + cerr << "check 8 fails" << endl; + vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2); + vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2); + vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2); + vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2); + vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1); + vint16m1_t var_15 = __riscv_vredand_vs_i16mf4_i16m1_tu(bm, var_5, bn, 2); + __riscv_vse32_v_i32mf2(o, var_18, 2); + vbool64_t var_11 = __riscv_vmsge_vx_i8mf8_b64(var_6, var_17, 1); + __riscv_vse16_v_i16m1(ad, var_15, 1); + if (check(o, i, g)) + cerr << "check 1 fails" << endl; + __riscv_vse8_v_i8mf8_m(var_11, v, bq, 1); + if (check(ad, f, e)) + cerr << "check 4 fails" << endl; + cerr << "check 7 fails" << endl; + return 0; +} diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C new file mode 100644 index 0000000000000000000000000000000000000000..eebfc239d3a81a70bdb200699c6639da70e66382 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */ +typedef int a; +using c = float; +template < typename > using e = int; +#pragma riscv intrinsic "vector" +template < typename, int, int f > struct aa { + using g = int; + template < typename > static constexpr int h() { return f; } + template < typename i > using ab = aa< i, 0, h< i >() >; +}; +template < int f > struct p { using j = aa< float, 6, f >; }; +template < int f > struct k { using j = typename p< f >::j; }; +template < typename, int f > using ac = typename k< f >::j; +template < class ad > using l = typename ad::g; +template < class g, class ad > using ab = typename ad::ab< g >; +template < class ad > using ae = ab< e< ad >, ad >; +template < int m > vuint32mf2_t ai(aa< a, m, -1 >, a aj) { + return __riscv_vmv_v_x_u32mf2(aj, 0); +} +template < int m > vfloat32mf2_t ai(aa< c, m, -1 >, c); +template < class ad > using ak = decltype(ai(ad(), l< ad >())); +template < class ad > ak< ad > al(ad d) { + ae< decltype(d) > am; + return an(d, ai(am, 0)); +} +template < typename g, int m > vuint8mf2_t ao(aa< g, m, -1 >, vuint32mf2_t n) { + return __riscv_vreinterpret_v_u32mf2_u8mf2(n); +} +template < int m > vuint32mf2_t ap(aa< a, m, -1 >, vuint8mf2_t n) { + return __riscv_vreinterpret_v_u8mf2_u32mf2(n); +} +template < typename g, int m > vuint8mf2_t ao(aa< g, m, -1 >, vfloat32mf2_t n) { + return __riscv_vreinterpret_v_u32mf2_u8mf2( + __riscv_vreinterpret_v_f32mf2_u32mf2(n)); +} +template < int m > vfloat32mf2_t ap(aa< c, m, -1 >, vuint8mf2_t); +template < class ad, class aq > ak< ad > an(ad d, aq n) { + return ap(d, ao(d, n)); +} +vbool64_t av(vuint32mf2_t, vuint32mf2_t); +template < class ad > bool ba(ad, vbool64_t); +template < class ad > using bb = decltype(al(ad())); +template < typename g > using be = ac< g, -1 >; +struct bf { + template < class ad > bool bh(ad, bb< ad > bi) { + ae< ad > am; + return ba(am, av(an(am, bi), al(am))); + } +}; +int bo; +template < class ad, class bl, typename g > void o(ad d, bl bn, g) { + bb< ad > bq = al(d); + for (; bo;) { + int br = bn.bh(d, bq); + if (__builtin_expect(br, 0)) + for (;;) + ; + } +} +template < class ad, class bl, typename g > void bs(ad d, bl bn, g) { + g bu; + o(d, bn, bu); +} +template < class ad, class bl, typename g > +void bv(ad d, bl bn, g *, int, g *bt) { + bs(d, bn, bt); +} +float by; +int bz; +float ca; +void b() { + be< float > d; + bf bn; + bv(d, bn, &by, bz, &ca); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c index 1b0afed037ab735737974c7d91e62c04081cea7a..552c264d8951ccbd08ab49caefea63f843ec2161 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c @@ -24,3 +24,4 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c index 384e2301a69bb683654a71bce1802f4eb6d87382..6a65fb576e88d37653c107accd61453397dc21de 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c @@ -24,4 +24,5 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c index a353a7ab2d53a5f4a4fc26b6aad321bfe907ba3a..3933c35f4ce714cae2919e3450e219fd421b5e77 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c @@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } - +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c new file mode 100644 index 0000000000000000000000000000000000000000..d9cbc853918331abdd7b3964386b7731107fd410 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c @@ -0,0 +1,95 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include "riscv_vector.h" + +void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28"); + + vbool16_t v = __riscv_vmseq_vv_u16m1_b16_mu(m1,m2,v1,v2,vl); + asm volatile("#" :: + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27"); + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmseq_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28"); + + vbool16_t v = __riscv_vmsltu_vv_u16m1_b16_mu(m1,m2,v1,v2,vl); + asm volatile("#" :: + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27"); + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +/* { dg-final { scan-assembler-not {vmv} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c new file mode 100644 index 0000000000000000000000000000000000000000..db245b025700c93a97e84257283cbbec6eb5cfac --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include "riscv_vector.h" + +void f (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29", "v30", "v31"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +/* { dg-final { scan-assembler-times {vmv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c index b1220c48f1b6a6fb0da498c9cb58bd235ef72780..2f2d85807ec66aece85611f83c0e8d53e097104f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c @@ -10,20 +10,20 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -42,21 +42,21 @@ spill_1 (int8_t *in, int8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf4,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** vle8.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -75,17 +75,17 @@ spill_2 (int8_t *in, int8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf2,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** vle8.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -104,10 +104,10 @@ spill_3 (int8_t *in, int8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re8.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -128,10 +128,10 @@ spill_4 (int8_t *in, int8_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re8.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -152,10 +152,10 @@ spill_5 (int8_t *in, int8_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re8.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -176,10 +176,10 @@ spill_6 (int8_t *in, int8_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re8.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -199,21 +199,21 @@ spill_7 (int8_t *in, int8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf8,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** vle8.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -232,21 +232,21 @@ spill_8 (uint8_t *in, uint8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf4,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** vle8.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -265,17 +265,17 @@ spill_9 (uint8_t *in, uint8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf2,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse8.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle8.v\tv24,0\(a3\) -** vse8.v\tv24,0\(a1\) +** vle8.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -294,10 +294,10 @@ spill_10 (uint8_t *in, uint8_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re8.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -318,10 +318,10 @@ spill_11 (uint8_t *in, uint8_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re8.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -342,10 +342,10 @@ spill_12 (uint8_t *in, uint8_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re8.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -366,10 +366,10 @@ spill_13 (uint8_t *in, uint8_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re8.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c index ca1904b830d1d4f80998a559523160baaef57e4f..4bcaf4dce79c105fa75ba63141fa34c3f5fbfe64 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c @@ -10,21 +10,21 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf4,ta,ma -** vle16.v\tv24,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse16.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle16.v\tv24,0\(a3\) -** vse16.v\tv24,0\(a1\) +** vle16.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -43,17 +43,17 @@ spill_2 (int16_t *in, int16_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf2,ta,ma -** vle16.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse16.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle16.v\tv24,0\(a3\) -** vse16.v\tv24,0\(a1\) +** vle16.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -72,10 +72,10 @@ spill_3 (int16_t *in, int16_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re16.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -96,10 +96,10 @@ spill_4 (int16_t *in, int16_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re16.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -120,10 +120,10 @@ spill_5 (int16_t *in, int16_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re16.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -144,10 +144,10 @@ spill_6 (int16_t *in, int16_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re16.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -167,21 +167,21 @@ spill_7 (int16_t *in, int16_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf4,ta,ma -** vle16.v\tv24,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse16.v\tv24,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle16.v\tv24,0\(a3\) -** vse16.v\tv24,0\(a1\) +** vle16.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -200,17 +200,17 @@ spill_9 (uint16_t *in, uint16_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf2,ta,ma -** vle16.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse16.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle16.v\tv24,0\(a3\) -** vse16.v\tv24,0\(a1\) +** vle16.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -229,10 +229,10 @@ spill_10 (uint16_t *in, uint16_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re16.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -253,10 +253,10 @@ spill_11 (uint16_t *in, uint16_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re16.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -277,10 +277,10 @@ spill_12 (uint16_t *in, uint16_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re16.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -301,10 +301,10 @@ spill_13 (uint16_t *in, uint16_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re16.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c index 2039ca345167d328eaa806fc7f6a0aebade8bcae..82d685e029dcae6f33498763ffe11ee47f128a32 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c @@ -10,17 +10,17 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma -** vle32.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv24,0\(a3\) -** vse32.v\tv24,0\(a1\) +** vle32.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -39,10 +39,10 @@ spill_3 (int32_t *in, int32_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -63,10 +63,10 @@ spill_4 (int32_t *in, int32_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -87,10 +87,10 @@ spill_5 (int32_t *in, int32_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -111,10 +111,10 @@ spill_6 (int32_t *in, int32_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -134,17 +134,17 @@ spill_7 (int32_t *in, int32_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma -** vle32.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv24,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv24,0\(a3\) -** vse32.v\tv24,0\(a1\) +** vle32.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -163,10 +163,10 @@ spill_10 (uint32_t *in, uint32_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -187,10 +187,10 @@ spill_11 (uint32_t *in, uint32_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -211,10 +211,10 @@ spill_12 (uint32_t *in, uint32_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -235,10 +235,10 @@ spill_13 (uint32_t *in, uint32_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c index 3c228a00c4886460f8eee6b435a6b6410620f9fc..5b3f75f3552208a25f981748dfd64c1a54b8c98c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c @@ -10,17 +10,17 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma -** vle32.v\tv24,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv24,0\(a3\) +** vle32.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv24,0\(a3\) -** vse32.v\tv24,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -39,10 +39,10 @@ spill_3 (float *in, float *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -63,10 +63,10 @@ spill_4 (float *in, float *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -87,10 +87,10 @@ spill_5 (float *in, float *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -111,10 +111,10 @@ spill_6 (float *in, float *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c index cf1eea2fa3fc967c4e401bca1759a03825bb408c..2bc54557deecf3aaa1721b98d02d484ad29400af 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c @@ -7,89 +7,92 @@ /* ** spill: -** csrr\tt0,vlenb -** slli\tt1,t0,4 -** sub\tsp,sp,t1 -** vsetvli\ta3,zero,e8,mf8,ta,ma -** vle8.v\tv24,0\(a0\) -** csrr\ta5,vlenb -** srli\ta5,a5,3 -** add\ta5,a5,sp -** vse8.v\tv24,0\(a5\) -** addi\ta5,a0,1 -** vsetvli\ta4,zero,e8,mf4,ta,ma -** vle8.v\tv24,0\(a5\) -** csrr\ta5,vlenb -** srli\ta5,a5,2 -** add\ta5,a5,sp -** vse8.v\tv24,0\(a5\) -** addi\ta2,a0,2 -** vsetvli\ta5,zero,e8,mf2,ta,ma -** vle8.v\tv24,0\(a2\) -** csrr\ta2,vlenb -** srli\ta2,a2,1 -** add\ta2,a2,sp -** vse8.v\tv24,0\(a2\) -** addi\ta2,a0,3 -** vl1re8.v\tv24,0\(a2\) -** csrr\ta2,vlenb -** add\ta2,a2,sp -** vs1r.v\tv24,0\(a2\) -** addi\ta2,a0,4 -** vl2re8.v\tv24,0\(a2\) -** csrr\tt3,vlenb -** slli\ta2,t3,1 -** add\ta2,a2,sp -** vs2r.v\tv24,0\(a2\) -** addi\ta2,a0,5 -** vl4re8.v\tv24,0\(a2\) -** mv\ta2,t3 -** slli\tt3,t3,2 -** add\tt3,t3,sp -** vs4r.v\tv24,0\(t3\) -** addi\ta0,a0,6 -** vl8re8.v\tv24,0\(a0\) -** slli\ta0,a2,3 -** add\ta0,a0,sp -** vs8r.v\tv24,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,4 +** sub\tsp,[a-x0-9]+,[a-x0-9]+ +** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,1 +** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,2 +** vsetvli\t[a-x0-9]+,zero,e8,mf2,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,3 +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,4 +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,5 +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\) +** mv\t[a-x0-9]+,[a-x0-9]+ +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,6 +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** srli\ta0,a2,3 -** add\ta0,a0,sp +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** vle8.v\tv27,0\(a0\) -** vse8.v\tv27,0\(a1\) -** addi\ta3,a1,1 -** srli\ta0,a2,2 -** add\ta0,a0,sp +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ ** ... -** vle8.v\tv27,0\(a0\) -** vse8.v\tv27,0\(a3\) -** addi\ta4,a1,2 -** srli\ta3,a2,1 -** add\ta3,a3,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,1 +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ ** ... -** vle8.v\tv27,0\(a3\) -** vse8.v\tv27,0\(a4\) -** addi\ta5,a1,3 -** add\ta4,a2,sp -** vl1re8.v\tv25,0\(a4\) -** vs1r.v\tv25,0\(a5\) -** addi\ta5,a1,4 -** slli\ta4,a2,1 -** add\ta4,a4,sp -** vl2re8.v\tv26,0\(a4\) -** vs2r.v\tv26,0\(a5\) -** addi\ta5,a1,5 -** vl4re8.v\tv28,0\(t3\) -** vs4r.v\tv28,0\(a5\) -** addi\ta1,a1,6 -** slli\ta5,a2,3 -** add\ta5,a5,sp -** vl8re8.v\tv24,0\(a5\) -** vs8r.v\tv24,0\(a1\) -** csrr\tt0,vlenb -** slli\tt1,t0,4 -** add\tsp,sp,t1 +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,2 +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** ... +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,4 +** slli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,5 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,6 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,4 +** add\tsp,[a-x0-9]+,[a-x0-9]+ ** ... ** jr\tra */