diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index b2f5d72f494dc2c752859e27195110f3a28444d6..d690162bb0c3a1138b66159bbecc534ebc86cc37 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -624,6 +624,7 @@ enum mask_policy enum tail_policy get_prefer_tail_policy (); enum mask_policy get_prefer_mask_policy (); rtx get_avl_type_rtx (enum avl_type); +opt_machine_mode get_lmul_mode (scalar_mode, int); opt_machine_mode get_vector_mode (scalar_mode, poly_uint64); opt_machine_mode get_tuple_mode (machine_mode, unsigned int); bool simm5_p (rtx); @@ -673,7 +674,7 @@ bool slide1_sew64_helper (int, machine_mode, machine_mode, machine_mode, rtx *); rtx gen_avl_for_scalar_move (rtx); void expand_tuple_move (rtx *); -bool expand_block_move (rtx, rtx, rtx); +bool expand_block_move (rtx, rtx, rtx, bool); machine_mode preferred_simd_mode (scalar_mode); machine_mode get_mask_mode (machine_mode); void expand_vec_series (rtx, rtx, rtx, rtx = 0); diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index b590c516354355e3e8fd4c70915ba1cafdbae8a8..64fd6b2909211700fe5a0700eecf908cefbf6731 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -966,7 +966,7 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) /* This function delegates block-move expansion to either the vector implementation or the scalar one. Return TRUE if successful or FALSE - otherwise. */ + otherwise. Assume that the memory regions do not overlap. */ bool riscv_expand_block_move (rtx dest, rtx src, rtx length) @@ -974,7 +974,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) if ((TARGET_VECTOR && !TARGET_XTHEADVECTOR) && stringop_strategy & STRATEGY_VECTOR) { - bool ok = riscv_vector::expand_block_move (dest, src, length); + bool ok = riscv_vector::expand_block_move (dest, src, length, false); if (ok) return true; } @@ -1054,7 +1054,7 @@ namespace riscv_vector { /* Used by cpymemsi in riscv.md . */ bool -expand_block_move (rtx dst_in, rtx src_in, rtx length_in) +expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p) { /* memcpy: @@ -1085,10 +1085,9 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) { HOST_WIDE_INT length = INTVAL (length_in); - /* By using LMUL=8, we can copy as many bytes in one go as there - are bits in a vector register. If the entire block thus fits, - we don't need a loop. */ - if (length <= TARGET_MIN_VLEN) + /* If the VLEN and preferred LMUL allow the entire block to be copied in + one go then no loop is needed. */ + if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL)) { need_loop = false; @@ -1114,19 +1113,32 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) for small element widths, we might allow larger element widths for loops too. */ if (need_loop) - potential_ew = 1; + { + if (movmem_p) + /* Inlining general memmove is a pessimisation: we can't avoid + having to decide which direction to go at runtime, which is + costly in instruction count however for situations where the + entire move fits in one vector operation we can do all reads + before doing any writes so we don't have to worry so generate + the inline vector code in such situations. */ + return false; + potential_ew = 1; + } for (; potential_ew; potential_ew >>= 1) { scalar_int_mode elem_mode; unsigned HOST_WIDE_INT bits = potential_ew * BITS_PER_UNIT; - unsigned HOST_WIDE_INT per_iter; - HOST_WIDE_INT nunits; + poly_uint64 per_iter; + poly_int64 nunits; if (need_loop) - per_iter = TARGET_MIN_VLEN; + per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL; else per_iter = length; - nunits = per_iter / potential_ew; + /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by + this potential_ew. */ + if (!multiple_p (per_iter, potential_ew, &nunits)) + continue; /* Unless we get an implementation that's slow for small element size / non-word-aligned accesses, we assume that the hardware @@ -1137,6 +1149,8 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) if (length % potential_ew != 0 || !int_mode_for_size (bits, 0).exists (&elem_mode)) continue; + + poly_uint64 mode_units; /* Find the mode to use for the copy inside the loop - or the sole copy, if there is no loop. */ if (!need_loop) @@ -1152,12 +1166,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) pointless. Still, by choosing a lower LMUL factor that still allows an entire transfer, we can reduce register pressure. */ - for (unsigned lmul = 1; lmul <= 4; lmul <<= 1) - if (length * BITS_PER_UNIT <= TARGET_MIN_VLEN * lmul - && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew) + for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1) + if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul) + && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew, + &mode_units) && (riscv_vector::get_vector_mode - (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul, - potential_ew)).exists (&vmode))) + (elem_mode, mode_units).exists (&vmode))) break; } @@ -1165,15 +1179,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) if (vmode != VOIDmode) break; - /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes - wide. BYTES_PER_RISCV_VECTOR can't be evenly divided by - the sizes of larger element types; the LMUL factor of 8 can at - the moment be divided by the SEW, with SEW of up to 8 bytes, - but there are reserved encodings so there might be larger - SEW in the future. */ - if (riscv_vector::get_vector_mode - (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * 8, - potential_ew)).exists (&vmode)) + /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible + by potential_ew 1, so this should succeed eventually. */ + if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL, + potential_ew, &mode_units) + && riscv_vector::get_vector_mode (elem_mode, + mode_units).exists (&vmode)) break; /* We may get here if we tried an element size that's larger than @@ -1186,7 +1197,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) } else { - vmode = E_RVVM8QImode; + gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode)); } /* A memcpy libcall in the worst case takes 3 instructions to prepare the diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index fba35652cc285ce224a25dfae019d74b2c1e8c63..630fbd80e941171e81cc0ffd2778bcebfb14fd1f 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1890,6 +1890,18 @@ get_mask_mode (machine_mode mode) return get_vector_mode (BImode, nunits).require (); } +/* Return the appropriate LMUL mode for MODE. */ + +opt_machine_mode +get_lmul_mode (scalar_mode mode, int lmul) +{ + poly_uint64 lmul_nunits; + unsigned int bytes = GET_MODE_SIZE (mode); + if (multiple_p (BYTES_PER_RISCV_VECTOR * lmul, bytes, &lmul_nunits)) + return get_vector_mode (mode, lmul_nunits); + return E_VOIDmode; +} + /* Return the appropriate M1 mode for MODE. */ static opt_machine_mode diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index f1e9bd588500baf7dcd5ef5b0cababadd0c88cdd..5b7b73535a3a9e48ce74f11a233a7c88a5141cf8 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2761,12 +2761,6 @@ FAIL; }) -;; Inlining general memmove is a pessimisation: we can't avoid having to decide -;; which direction to go at runtime, which is costly in instruction count -;; however for situations where the entire move fits in one vector operation -;; we can do all reads before doing any writes so we don't have to worry -;; so generate the inline vector code in such situations -;; nb. prefer scalar path for tiny memmoves. (define_expand "movmem<mode>" [(parallel [(set (match_operand:BLK 0 "general_operand") (match_operand:BLK 1 "general_operand")) @@ -2774,10 +2768,8 @@ (use (match_operand:SI 3 "const_int_operand"))])] "TARGET_VECTOR" { - if ((INTVAL (operands[2]) >= TARGET_MIN_VLEN / 8) - && (INTVAL (operands[2]) <= TARGET_MIN_VLEN) - && riscv_vector::expand_block_move (operands[0], operands[1], - operands[2])) + if (riscv_vector::expand_block_move (operands[0], operands[1], operands[2], + true)) DONE; else FAIL; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c index ef92c6f35d11ff6be8fb8e99d8a2b84e64b2fcef..45086182aa89959daca40031881b476e089aba71 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8" } */ signed char e; short f = 8; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c index cfce88988f7ebaa09faa64a3ffbbae3bd35813b0..a3c61b467dab41e6ddd33065f1508804ddf86945 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers -mrvv-max-lmul=m8" } */ signed char e; short f = 8; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c index 82039f5ac4e362b292c81570a77ce2c1543743f8..86c2400ce513c0331156248e92f5bc12c156fde2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -143,10 +143,6 @@ DEF_RET1_ARG9 (v1024qi) DEF_RET1_ARG9 (v2048qi) DEF_RET1_ARG9 (v4096qi) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 9 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1qi tests: return value (lbu) and function prologue (sb) // 1 lbu per test, argnum sb's when args > 1 /* { dg-final { scan-assembler-times {lbu\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -169,7 +165,4 @@ DEF_RET1_ARG9 (v4096qi) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v32-4096qi tests: return value (vse8.v) -/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v1024-4096qi_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse8 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c index af52b70398652b6965c7272a11017895711a8b19..c489a9ff796661c4783bde396cd5971f5ef2ef48 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hi) DEF_RET1_ARG9 (v1024hi) DEF_RET1_ARG9 (v2048hi) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1hi tests: return value (lhu) and function prologue (sh) // 1 lhu per test, argnum sh's when args > 1 /* { dg-final { scan-assembler-times {lhu\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hi) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v16-2048hi tests: return value (vse16.v) -/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v512-2048qi_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse16 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c index 01c5a1a1ba286b60c81c9da7acfa57584b744d99..97a3282a6570aada9eadec39de99436f9b7ec724 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256si) DEF_RET1_ARG9 (v512si) DEF_RET1_ARG9 (v1024si) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1si tests: return value (lw) and function prologue (sw) // 1 lw per test, argnum sw's when args > 1 /* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024si) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v8-1024si tests: return value (vse32.v) -/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// 256-1024si tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse32 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c index 2c01aa8c260fb61fc8a27e5c3b5452702ba8eedb..a892919feb7c49490ad2f1dab70a27b49a36066d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128di) DEF_RET1_ARG9 (v256di) DEF_RET1_ARG9 (v512di) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1di and v2di tests: return value (ld) and function prologue (sd) // - 1 ld per v1di and 2 ld per v2di with args > 1 // - argnum sd's per v1di when argnum > 1 @@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512di) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v4-512di tests: return value (vse64.v) -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */ +/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c index 98d6d4a758a5a0d55617724d41e39f10c45bc092..0d40349fb0f81d775e9ddd3584a225b6815c283c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hf) DEF_RET1_ARG9 (v1024hf) DEF_RET1_ARG9 (v2048hf) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1hf tests: return value (lhu) and function prologue (sh) // 1 lhu per test, argnum sh's when args > 1 /* { dg-final { scan-assembler-times {lhu\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */ @@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hf) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v16-2048hf tests: return value (vse16.v) -/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v512-2048qf_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse16 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c index 5f59f001969e935bc2664ac03c72f169190fcf72..8b5a779467db952365a4b546a514dcec6997b0ea 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256sf) DEF_RET1_ARG9 (v512sf) DEF_RET1_ARG9 (v1024sf) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1sf tests: return value (lw) and function prologue (sw) // 1 lw per test, argnum sw's when args > 1 /* { dg-final { scan-assembler-times {lw\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */ @@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024sf) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v8-1024sf tests: return value (vse32.v) -/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// 256-1024sf tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse32 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c index 1d427fd08d6c4213bc94260f7c5b101b4bf2fccd..3ba4e1f186427cee73cfe6889df929c28d5a10d8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128df) DEF_RET1_ARG9 (v256df) DEF_RET1_ARG9 (v512df) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1df and v2df tests: return value (ld) and function prologue (sd) // - 1 ld per v1df and 2 ld per v2df with args > 1 // - argnum sd's per v1df when argnum > 1 @@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512df) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v4-512df tests: return value (vse64.v) -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */ +/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c index 1faf31ffd8e07df9c573353c2a021f77046606e3..1df83847363aa9f96a70c41c0505e715a52612ff 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c index e3980a2954064ded657e8be84835e98fe707a066..74b7b699f1af5e9c7f8bb472ce3e277b3d25f088 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c index 0699cb78dd5125e651c5ede0463cdbf05865bc7a..6edb4c9253a4b65562c96b56ef96f15d557d9983 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c @@ -12,7 +12,7 @@ extern void *memcpy(void *__restrict dest, const void *__restrict src, __SIZE_TY /* memcpy should be implemented using the cpymem pattern. ** f1: XX \.L\d+: # local label is ignored -** vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma +** vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma ** vle8\.v\s+v\d+,0\(a1\) ** vse8\.v\s+v\d+,0\(a0\) ** add\s+a1,a1,[ta][0-7] @@ -31,7 +31,7 @@ void f1 (void *a, void *b, __SIZE_TYPE__ l) overflow is undefined. ** f2: XX \.L\d+: # local label is ignored -** vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma +** vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma ** vle8\.v\s+v\d+,0\(a1\) ** vse8\.v\s+v\d+,0\(a0\) ** add\s+a1,a1,[ta][0-7] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c index 6a854c87cd061ed40fd3b10346c4662ccbff2811..7b6a429f34cf4b32264009eccaf8a87c73d9205b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ /* { dg-add-options riscv_v } */ /* { dg-final { check-function-bodies "**" "" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c index d9d4a70a392b35faf4ecd3f367bf248a993d5b9f..1f148bc70528ccbb15103b3f6a6e9722c44ec2fe 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c @@ -7,13 +7,14 @@ /* Tiny memmoves should not be vectorised. ** f1: -** li\s+a2,\d+ -** tail\s+memmove +** lbu\s+[ta][0-7],0\(a1\) +** sb\s+[ta][0-7],0\(a0\) +** ret */ char * f1 (char *a, char const *b) { - return __builtin_memmove (a, b, MIN_VECTOR_BYTES - 1); + return __builtin_memmove (a, b, 1); } /* Vectorise+inline minimum vector register width with LMUL=1 diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c index 8265105f4eb16e02ffb89676998ebf6ca1d73533..7e40ac583bb2f737485beba44a39878184ac5e66 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c index 682d3e9cb7e9dc606bbce0634c00fe9925906cf1..c5be5b1d28e30cc7053f4f903a031acd820f8218 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c index 73a9f51a16b67aeeacff6ed7099792ad7df33e8c..8f66d9670f3f532ce066a19f743f79d4700f7223 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c index bec9b28008d4f6b2004c73519b19cda3e968885b..3e23ae717ae612d024b769d95c3b20a6df217992 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c index c8978052b91b80fe56fe1c0617fe9a01c4086b50..11cdc74ec728a61c3477cf7ea1da82bf40cbb06d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c index 5604ca280fe42190d8bffe97834ca8db4a311d0d..7a5d04e3c5ceb81cde7cf519f168a5432118af58 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c index 9c6484479cf7cd598c9f429c49ca381465f44c39..ba96b3402790fbfd82139480bd29409cc71822b1 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c index 0bb2260cf1cf1c9c231edbbd958789472f22c33a..c0e8d6f1b392f2d825a623bf654b717df3b43347 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c index 1ad588ff8adc2a6c2bd8772b36f56f86f0bc3b7a..917430096395dda355df283a7d7e6d0c66949ce8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c index 5b28863b6ada8694bc8dd938b48814771605a60d..ac7ec74472dec7d4514de34350995a87a03dd793 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c index e55604e1114c403207e8c8c3f8e563dfced18d7d..9ab04b07c1275924f9a78502114718eaa9a77777 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c @@ -52,7 +52,7 @@ int main() { printf("%d\n", m); } -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ /* { dg-final { scan-assembler-not {vsetivli} } } */ -/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c index b20e46395aae2aaffe5dce6b929166b73f02623e..1facfd55d797196a5807db684196f8383bc66e28 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c @@ -62,7 +62,7 @@ int main() { return 0; } -/* { dg-final { scan-assembler-times {vsetvli} 4 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 5 } } */ /* { dg-final { scan-assembler-not {vsetivli} } } */ -/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 } } */ -/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 2 } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 } } */