diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 7fe26fcd9391661b3cbdfa8fccfa73f866c85bcf..b3f0bdb9924d380b984836f0ef157570526cb318 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -708,6 +708,7 @@ bool can_be_broadcasted_p (rtx); bool gather_scatter_valid_offset_p (machine_mode); HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int); bool whole_reg_to_reg_move_p (rtx *, machine_mode, int); +bool splat_to_scalar_move_p (rtx *); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 93a1238a5abbd67a97f40d63ccffecd1dcd1e043..4bacb7fea45ffa1881ec6dc108109387dc60e5c1 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5151,4 +5151,16 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index) return false; } +/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f. */ +bool +splat_to_scalar_move_p (rtx *ops) +{ + return satisfies_constraint_Wc1 (ops[1]) + && satisfies_constraint_vu (ops[2]) + && !MEM_P (ops[3]) + && satisfies_constraint_c01 (ops[4]) + && INTVAL (ops[7]) == NONVLMAX + && known_ge (GET_MODE_SIZE (Pmode), GET_MODE_SIZE (GET_MODE (ops[3]))); +} + } // namespace riscv_vector diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 307d9a8c952e64016795c8d747b8744799a6a4fb..ab6e099852dc978342fa89ca09734cc07d2d8162 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1977,8 +1977,15 @@ (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { + /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f + has better chances to do vsetvl fusion in vsetvl pass. */ + if (riscv_vector::splat_to_scalar_move_p (operands)) + { + operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); + operands[3] = force_reg (<VEL>mode, operands[3]); + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ - if (satisfies_constraint_Wdm (operands[3])) + else if (satisfies_constraint_Wdm (operands[3])) { if (satisfies_constraint_Wb1 (operands[1])) { diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c new file mode 100644 index 0000000000000000000000000000000000000000..b3fec269301fd12c458637671d49d09d9c47919d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (uint32_t *outputMat, uint32_t *inputMat) +{ + vuint32m1_t matRegIn0 = __riscv_vle32_v_u32m1 (inputMat, 4); + vuint32m1_t matRegIn1 = __riscv_vle32_v_u32m1 (inputMat + 4, 4); + vuint32m1_t matRegIn2 = __riscv_vle32_v_u32m1 (inputMat + 8, 4); + vuint32m1_t matRegIn3 = __riscv_vle32_v_u32m1 (inputMat + 12, 4); + + vbool32_t oddMask + = __riscv_vreinterpret_v_u32m1_b32 (__riscv_vmv_v_x_u32m1 (0xaaaa, 1)); + + vuint32m1_t smallTransposeMat0 + = __riscv_vslideup_vx_u32m1_tumu (oddMask, matRegIn0, matRegIn1, 1, 4); + vuint32m1_t smallTransposeMat2 + = __riscv_vslideup_vx_u32m1_tumu (oddMask, matRegIn2, matRegIn3, 1, 4); + + vuint32m1_t outMat0 = __riscv_vslideup_vx_u32m1_tu (smallTransposeMat0, + smallTransposeMat2, 2, 4); + + __riscv_vse32_v_u32m1 (outputMat, outMat0, 4); +} + +void +foo2 (void *outputMat, void *inputMat) +{ + vfloat32m1_t v = __riscv_vfmv_v_f_f32m1 (0xaaaa, 1); + __riscv_vse32_v_f32m1 (outputMat, v, 4); +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 2 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c new file mode 100644 index 0000000000000000000000000000000000000000..643f6a96aec0060b783c8f3f371001f0c977b2af --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/attribute-3.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "riscv_vector.h" + +void matrix_transpose_in_register(uint32_t* outputMat, uint32_t* inputMat) { + vuint32m1_t matRegIn0 = __riscv_vle32_v_u32m1(inputMat, 4); + vuint32m1_t matRegIn1 = __riscv_vle32_v_u32m1(inputMat + 4, 4); + vuint32m1_t matRegIn2 = __riscv_vle32_v_u32m1(inputMat + 8, 4); + vuint32m1_t matRegIn3 = __riscv_vle32_v_u32m1(inputMat + 12, 4); + + vbool32_t oddMask = __riscv_vreinterpret_v_u32m1_b32(__riscv_vmv_v_x_u32m1(0xaaaa, 1)); + + vuint32m1_t smallTransposeMat0 = __riscv_vslideup_vx_u32m1_tumu(oddMask, matRegIn0, matRegIn1, 1, 4); + vuint32m1_t smallTransposeMat2 = __riscv_vslideup_vx_u32m1_tumu(oddMask, matRegIn2, matRegIn3, 1, 4); + + vbool32_t evenMask = __riscv_vreinterpret_v_u32m1_b32(__riscv_vmv_v_x_u32m1(0x5555, 1)); + + vuint32m1_t smallTransposeMat1 = __riscv_vslidedown_vx_u32m1_tumu(evenMask, matRegIn1, matRegIn0, 1, 4); + vuint32m1_t smallTransposeMat3 = __riscv_vslidedown_vx_u32m1_tumu(evenMask, matRegIn3, matRegIn2, 1, 4); + + vuint32m1_t outMat0 = __riscv_vslideup_vx_u32m1_tu(smallTransposeMat0, smallTransposeMat2, 2, 4); + vuint32m1_t outMat1 = __riscv_vslideup_vx_u32m1_tu(smallTransposeMat1, smallTransposeMat3, 2, 4); + + vuint32m1_t outMat2 = __riscv_vslidedown_vx_u32m1_tu(smallTransposeMat2, smallTransposeMat0, 2, 2); + vuint32m1_t outMat3 = __riscv_vslidedown_vx_u32m1_tu(smallTransposeMat3, smallTransposeMat1, 2, 2); + __riscv_vse32_v_u32m1(outputMat, outMat0, 4); + __riscv_vse32_v_u32m1(outputMat + 4, outMat1, 4); + __riscv_vse32_v_u32m1(outputMat + 8, outMat2, 4); + __riscv_vse32_v_u32m1(outputMat + 12, outMat3, 4); +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 } } */ +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 3 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */