diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc index cdc863ee4f7659c1356c37c32d82a93da545b056..0ac17fb70a1ff89bd866dd56dae8d3926a430f1f 100644 --- a/gcc/config/riscv/riscv-selftests.cc +++ b/gcc/config/riscv/riscv-selftests.cc @@ -267,15 +267,14 @@ run_const_vector_selftests (void) rtx dup = gen_const_vec_duplicate (mode, GEN_INT (val)); emit_move_insn (dest, dup); rtx_insn *insn = get_last_insn (); - rtx src = XEXP (SET_SRC (PATTERN (insn)), 1); + rtx src = SET_SRC (PATTERN (insn)); /* 1. Should be vmv.v.i for in rang of -16 ~ 15. 2. Should be vmv.v.x for exceed -16 ~ 15. */ if (IN_RANGE (val, -16, 15)) - ASSERT_TRUE (rtx_equal_p (src, dup)); - else ASSERT_TRUE ( - rtx_equal_p (src, - gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); + rtx_equal_p (XEXP (SET_SRC (PATTERN (insn)), 1), dup)); + else + ASSERT_TRUE (GET_CODE (src) == VEC_DUPLICATE); end_sequence (); } } @@ -294,10 +293,9 @@ run_const_vector_selftests (void) rtx dup = gen_const_vec_duplicate (mode, ele); emit_move_insn (dest, dup); rtx_insn *insn = get_last_insn (); - rtx src = XEXP (SET_SRC (PATTERN (insn)), 1); + rtx src = SET_SRC (PATTERN (insn)); /* Should always be vfmv.v.f. */ - ASSERT_TRUE ( - rtx_equal_p (src, gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); + ASSERT_TRUE (GET_CODE (src) == VEC_DUPLICATE); end_sequence (); } } diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 895c11d13fcd501acdfcb1802f60edefdd1dcef6..6116f5df504461eadebf8ce10379af1d57cf10f2 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1001,8 +1001,31 @@ expand_const_vector (rtx target, rtx src) } else { - rtx ops[] = {tmp, elt}; - emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); + /* Emit vec_duplicate<mode> split pattern before RA so that + we could have a better optimization opportunity in LICM + which will hoist vmv.v.x outside the loop and in fwprop && combine + which will transform 'vv' into 'vx' instruction. + + The reason we don't emit vec_duplicate<mode> split pattern during + RA since the split stage after RA is a too late stage to generate + RVV instruction which need an additional register (We can't + allocate a new register after RA) for VL operand of vsetvl + instruction (vsetvl a5, zero). */ + if (lra_in_progress) + { + rtx ops[] = {tmp, elt}; + emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); + } + else + { + struct expand_operand ops[2]; + enum insn_code icode = optab_handler (vec_duplicate_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + create_output_operand (&ops[0], tmp, mode); + create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); + expand_insn (icode, 2, ops); + tmp = ops[0].value; + } } if (tmp != target) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c index 3dfc6f16a250bb69150f032d91d3e2ba4c12ede8..2a735d8c6b6570b885dad58d04b9815c2c0aa5b2 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c @@ -18,7 +18,8 @@ bar (int *x, int a, int b, int n) } /* { dg-final { scan-assembler {e32,m2} } } */ -/* { dg-final { scan-assembler-times {csrr} 1 } } */ +/* { dg-final { scan-assembler-not {jr} } } */ +/* { dg-final { scan-assembler-times {ret} 2 } } * /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c new file mode 100644 index 0000000000000000000000000000000000000000..b203ca907fa1dc39183e2a43c33c541baa59f06e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */ + +void +f3 (uint8_t *restrict a, uint8_t *restrict b, + uint8_t *restrict c, uint8_t *restrict d, + int n) +{ + for (int i = 0; i < n; ++i) + { + a[i * 8] = c[i * 8] + d[i * 8]; + a[i * 8 + 1] = c[i * 8] + d[i * 8 + 1]; + a[i * 8 + 2] = c[i * 8 + 2] + d[i * 8 + 2]; + a[i * 8 + 3] = c[i * 8 + 2] + d[i * 8 + 3]; + a[i * 8 + 4] = c[i * 8 + 4] + d[i * 8 + 4]; + a[i * 8 + 5] = c[i * 8 + 4] + d[i * 8 + 5]; + a[i * 8 + 6] = c[i * 8 + 6] + d[i * 8 + 6]; + a[i * 8 + 7] = c[i * 8 + 6] + d[i * 8 + 7]; + b[i * 8] = c[i * 8 + 1] + d[i * 8]; + b[i * 8 + 1] = c[i * 8 + 1] + d[i * 8 + 1]; + b[i * 8 + 2] = c[i * 8 + 3] + d[i * 8 + 2]; + b[i * 8 + 3] = c[i * 8 + 3] + d[i * 8 + 3]; + b[i * 8 + 4] = c[i * 8 + 5] + d[i * 8 + 4]; + b[i * 8 + 5] = c[i * 8 + 5] + d[i * 8 + 5]; + b[i * 8 + 6] = c[i * 8 + 7] + d[i * 8 + 6]; + b[i * 8 + 7] = c[i * 8 + 7] + d[i * 8 + 7]; + } +} + +/* { dg-final { scan-assembler {e8,m4} } } */ +/* { dg-final { scan-assembler-not {jr} } } */ +/* { dg-final { scan-assembler-times {ret} 1 } } * +/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */