diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b2f5d72f494dc2c752859e27195110f3a28444d6..d690162bb0c3a1138b66159bbecc534ebc86cc37 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -624,6 +624,7 @@ enum mask_policy
 enum tail_policy get_prefer_tail_policy ();
 enum mask_policy get_prefer_mask_policy ();
 rtx get_avl_type_rtx (enum avl_type);
+opt_machine_mode get_lmul_mode (scalar_mode, int);
 opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
 opt_machine_mode get_tuple_mode (machine_mode, unsigned int);
 bool simm5_p (rtx);
@@ -673,7 +674,7 @@ bool slide1_sew64_helper (int, machine_mode, machine_mode,
 			  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
 void expand_tuple_move (rtx *);
-bool expand_block_move (rtx, rtx, rtx);
+bool expand_block_move (rtx, rtx, rtx, bool);
 machine_mode preferred_simd_mode (scalar_mode);
 machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx, rtx = 0);
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index b590c516354355e3e8fd4c70915ba1cafdbae8a8..64fd6b2909211700fe5a0700eecf908cefbf6731 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -966,7 +966,7 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length)
 
 /* This function delegates block-move expansion to either the vector
    implementation or the scalar one.  Return TRUE if successful or FALSE
-   otherwise.  */
+   otherwise.  Assume that the memory regions do not overlap.  */
 
 bool
 riscv_expand_block_move (rtx dest, rtx src, rtx length)
@@ -974,7 +974,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   if ((TARGET_VECTOR && !TARGET_XTHEADVECTOR)
       && stringop_strategy & STRATEGY_VECTOR)
     {
-      bool ok = riscv_vector::expand_block_move (dest, src, length);
+      bool ok = riscv_vector::expand_block_move (dest, src, length, false);
       if (ok)
 	return true;
     }
@@ -1054,7 +1054,7 @@ namespace riscv_vector {
 /* Used by cpymemsi in riscv.md .  */
 
 bool
-expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
+expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p)
 {
   /*
     memcpy:
@@ -1085,10 +1085,9 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
     {
       HOST_WIDE_INT length = INTVAL (length_in);
 
-      /* By using LMUL=8, we can copy as many bytes in one go as there
-	 are bits in a vector register.  If the entire block thus fits,
-	 we don't need a loop.  */
-      if (length <= TARGET_MIN_VLEN)
+      /* If the VLEN and preferred LMUL allow the entire block to be copied in
+	 one go then no loop is needed.  */
+      if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
 	{
 	  need_loop = false;
 
@@ -1114,19 +1113,32 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	 for small element widths, we might allow larger element widths for
 	 loops too.  */
       if (need_loop)
-	potential_ew = 1;
+	{
+	  if (movmem_p)
+	    /* Inlining general memmove is a pessimisation: we can't avoid
+	       having to decide which direction to go at runtime, which is
+	       costly in instruction count however for situations where the
+	       entire move fits in one vector operation we can do all reads
+	       before doing any writes so we don't have to worry so generate
+	       the inline vector code in such situations.  */
+	    return false;
+	  potential_ew = 1;
+	}
       for (; potential_ew; potential_ew >>= 1)
 	{
 	  scalar_int_mode elem_mode;
 	  unsigned HOST_WIDE_INT bits = potential_ew * BITS_PER_UNIT;
-	  unsigned HOST_WIDE_INT per_iter;
-	  HOST_WIDE_INT nunits;
+	  poly_uint64 per_iter;
+	  poly_int64 nunits;
 
 	  if (need_loop)
-	    per_iter = TARGET_MIN_VLEN;
+	    per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
 	  else
 	    per_iter = length;
-	  nunits = per_iter / potential_ew;
+	  /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
+	     this potential_ew.  */
+	  if (!multiple_p (per_iter, potential_ew, &nunits))
+	    continue;
 
 	  /* Unless we get an implementation that's slow for small element
 	     size / non-word-aligned accesses, we assume that the hardware
@@ -1137,6 +1149,8 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	  if (length % potential_ew != 0
 	      || !int_mode_for_size (bits, 0).exists (&elem_mode))
 	    continue;
+
+	  poly_uint64 mode_units;
 	  /* Find the mode to use for the copy inside the loop - or the
 	     sole copy, if there is no loop.  */
 	  if (!need_loop)
@@ -1152,12 +1166,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 		 pointless.
 		 Still, by choosing a lower LMUL factor that still allows
 		 an entire transfer, we can reduce register pressure.  */
-	      for (unsigned lmul = 1; lmul <= 4; lmul <<= 1)
-		if (length * BITS_PER_UNIT <= TARGET_MIN_VLEN * lmul
-		    && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew)
+	      for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
+		if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
+		    && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew,
+				   &mode_units)
 		    && (riscv_vector::get_vector_mode
-			 (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul,
-				     potential_ew)).exists (&vmode)))
+			 (elem_mode, mode_units).exists (&vmode)))
 		  break;
 	    }
 
@@ -1165,15 +1179,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	  if (vmode != VOIDmode)
 	    break;
 
-	  /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes
-	     wide.  BYTES_PER_RISCV_VECTOR can't be evenly divided by
-	     the sizes of larger element types; the LMUL factor of 8 can at
-	     the moment be divided by the SEW, with SEW of up to 8 bytes,
-	     but there are reserved encodings so there might be larger
-	     SEW in the future.  */
-	  if (riscv_vector::get_vector_mode
-	      (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * 8,
-				     potential_ew)).exists (&vmode))
+	  /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible
+	     by potential_ew 1, so this should succeed eventually.  */
+	  if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+			  potential_ew, &mode_units)
+	      && riscv_vector::get_vector_mode (elem_mode,
+						mode_units).exists (&vmode))
 	    break;
 
 	  /* We may get here if we tried an element size that's larger than
@@ -1186,7 +1197,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
     }
   else
     {
-      vmode = E_RVVM8QImode;
+      gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
     }
 
   /* A memcpy libcall in the worst case takes 3 instructions to prepare the
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fba35652cc285ce224a25dfae019d74b2c1e8c63..630fbd80e941171e81cc0ffd2778bcebfb14fd1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1890,6 +1890,18 @@ get_mask_mode (machine_mode mode)
   return get_vector_mode (BImode, nunits).require ();
 }
 
+/* Return the appropriate LMUL mode for MODE.  */
+
+opt_machine_mode
+get_lmul_mode (scalar_mode mode, int lmul)
+{
+  poly_uint64 lmul_nunits;
+  unsigned int bytes = GET_MODE_SIZE (mode);
+  if (multiple_p (BYTES_PER_RISCV_VECTOR * lmul, bytes, &lmul_nunits))
+    return get_vector_mode (mode, lmul_nunits);
+  return E_VOIDmode;
+}
+
 /* Return the appropriate M1 mode for MODE.  */
 
 static opt_machine_mode
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index f1e9bd588500baf7dcd5ef5b0cababadd0c88cdd..5b7b73535a3a9e48ce74f11a233a7c88a5141cf8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2761,12 +2761,6 @@
     FAIL;
 })
 
-;; Inlining general memmove is a pessimisation: we can't avoid having to decide
-;; which direction to go at runtime, which is costly in instruction count
-;; however for situations where the entire move fits in one vector operation
-;; we can do all reads before doing any writes so we don't have to worry
-;; so generate the inline vector code in such situations
-;; nb. prefer scalar path for tiny memmoves.
 (define_expand "movmem<mode>"
   [(parallel [(set (match_operand:BLK 0 "general_operand")
    (match_operand:BLK 1 "general_operand"))
@@ -2774,10 +2768,8 @@
     (use (match_operand:SI 3 "const_int_operand"))])]
   "TARGET_VECTOR"
 {
-  if ((INTVAL (operands[2]) >= TARGET_MIN_VLEN / 8)
-	&& (INTVAL (operands[2]) <= TARGET_MIN_VLEN)
-	&& riscv_vector::expand_block_move (operands[0], operands[1],
-	     operands[2]))
+  if (riscv_vector::expand_block_move (operands[0], operands[1], operands[2],
+				       true))
     DONE;
   else
     FAIL;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c
index ef92c6f35d11ff6be8fb8e99d8a2b84e64b2fcef..45086182aa89959daca40031881b476e089aba71 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8" } */
 
 signed char e;
 short f = 8;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c
index cfce88988f7ebaa09faa64a3ffbbae3bd35813b0..a3c61b467dab41e6ddd33065f1508804ddf86945 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers -mrvv-max-lmul=m8" } */
 
 signed char e;
 short f = 8;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c
index 82039f5ac4e362b292c81570a77ce2c1543743f8..86c2400ce513c0331156248e92f5bc12c156fde2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -143,10 +143,6 @@ DEF_RET1_ARG9 (v1024qi)
 DEF_RET1_ARG9 (v2048qi)
 DEF_RET1_ARG9 (v4096qi)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 9 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1qi tests: return value (lbu) and function prologue (sb)
 // 1 lbu per test, argnum sb's when args > 1
 /* { dg-final { scan-assembler-times {lbu\s+a0,\s*[0-9]+\(sp\)} 8 } } */
@@ -169,7 +165,4 @@ DEF_RET1_ARG9 (v4096qi)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v32-4096qi tests: return value (vse8.v)
-/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */
-// v1024-4096qi_ARG1 tests: return value (vse64.v)
-// for some reason ARG1 returns using vse64 instead of vse8
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */
+/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c
index af52b70398652b6965c7272a11017895711a8b19..c489a9ff796661c4783bde396cd5971f5ef2ef48 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hi)
 DEF_RET1_ARG9 (v1024hi)
 DEF_RET1_ARG9 (v2048hi)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1hi tests: return value (lhu) and function prologue (sh)
 // 1 lhu per test, argnum sh's when args > 1
 /* { dg-final { scan-assembler-times {lhu\s+a0,\s*[0-9]+\(sp\)} 8 } } */
@@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hi)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v16-2048hi tests: return value (vse16.v)
-/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */
-// v512-2048qi_ARG1 tests: return value (vse64.v)
-// for some reason ARG1 returns using vse64 instead of vse16
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */
+/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c
index 01c5a1a1ba286b60c81c9da7acfa57584b744d99..97a3282a6570aada9eadec39de99436f9b7ec724 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256si)
 DEF_RET1_ARG9 (v512si)
 DEF_RET1_ARG9 (v1024si)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1si tests: return value (lw) and function prologue (sw)
 // 1 lw per test, argnum sw's when args > 1
 /* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */
@@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024si)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v8-1024si tests: return value (vse32.v)
-/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */
-// 256-1024si tests: return value (vse64.v)
-// for some reason ARG1 returns using vse64 instead of vse32
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */
+/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c
index 2c01aa8c260fb61fc8a27e5c3b5452702ba8eedb..a892919feb7c49490ad2f1dab70a27b49a36066d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128di)
 DEF_RET1_ARG9 (v256di)
 DEF_RET1_ARG9 (v512di)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1di and v2di tests: return value (ld) and function prologue (sd)
 //   - 1 ld per v1di and 2 ld per v2di with args > 1
 //   - argnum sd's per v1di when argnum > 1 
@@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512di)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v4-512di tests: return value (vse64.v)
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */
+/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c
index 98d6d4a758a5a0d55617724d41e39f10c45bc092..0d40349fb0f81d775e9ddd3584a225b6815c283c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hf)
 DEF_RET1_ARG9 (v1024hf)
 DEF_RET1_ARG9 (v2048hf)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1hf tests: return value (lhu) and function prologue (sh)
 // 1 lhu per test, argnum sh's when args > 1
 /* { dg-final { scan-assembler-times {lhu\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */
@@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hf)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v16-2048hf tests: return value (vse16.v)
-/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */
-// v512-2048qf_ARG1 tests: return value (vse64.v)
-// for some reason ARG1 returns using vse64 instead of vse16
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */
+/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c
index 5f59f001969e935bc2664ac03c72f169190fcf72..8b5a779467db952365a4b546a514dcec6997b0ea 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256sf)
 DEF_RET1_ARG9 (v512sf)
 DEF_RET1_ARG9 (v1024sf)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1sf tests: return value (lw) and function prologue (sw)
 // 1 lw per test, argnum sw's when args > 1
 /* { dg-final { scan-assembler-times {lw\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */
@@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024sf)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v8-1024sf tests: return value (vse32.v)
-/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */
-// 256-1024sf tests: return value (vse64.v)
-// for some reason ARG1 returns using vse64 instead of vse32
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */
+/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c
index 1d427fd08d6c4213bc94260f7c5b101b4bf2fccd..3ba4e1f186427cee73cfe6889df929c28d5a10d8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
@@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128df)
 DEF_RET1_ARG9 (v256df)
 DEF_RET1_ARG9 (v512df)
 
-// RET1_ARG0 tests
-/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */
-/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */
-
 // v1df and v2df tests: return value (ld) and function prologue (sd)
 //   - 1 ld per v1df and 2 ld per v2df with args > 1
 //   - argnum sd's per v1df when argnum > 1 
@@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512df)
 /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */
 
 // v4-512df tests: return value (vse64.v)
-/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */
+/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
index 1faf31ffd8e07df9c573353c2a021f77046606e3..1df83847363aa9f96a70c41c0505e715a52612ff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
index e3980a2954064ded657e8be84835e98fe707a066..74b7b699f1af5e9c7f8bb472ce3e277b3d25f088 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 
 #include "def.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
index 0699cb78dd5125e651c5ede0463cdbf05865bc7a..6edb4c9253a4b65562c96b56ef96f15d557d9983 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
@@ -12,7 +12,7 @@ extern void *memcpy(void *__restrict dest, const void *__restrict src, __SIZE_TY
 /* memcpy should be implemented using the cpymem pattern.
 ** f1:
 XX	\.L\d+: # local label is ignored
-**	vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma
+**	vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma
 **	vle8\.v\s+v\d+,0\(a1\)
 **	vse8\.v\s+v\d+,0\(a0\)
 **	add\s+a1,a1,[ta][0-7]
@@ -31,7 +31,7 @@ void f1 (void *a, void *b, __SIZE_TYPE__ l)
    overflow is undefined.
 ** f2:
 XX	\.L\d+: # local label is ignored
-**	vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma
+**	vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma
 **	vle8\.v\s+v\d+,0\(a1\)
 **	vse8\.v\s+v\d+,0\(a0\)
 **	add\s+a1,a1,[ta][0-7]
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c
index 6a854c87cd061ed40fd3b10346c4662ccbff2811..7b6a429f34cf4b32264009eccaf8a87c73d9205b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */
 /* { dg-add-options riscv_v } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c
index d9d4a70a392b35faf4ecd3f367bf248a993d5b9f..1f148bc70528ccbb15103b3f6a6e9722c44ec2fe 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c
@@ -7,13 +7,14 @@
 
 /* Tiny memmoves should not be vectorised.
 ** f1:
-**  li\s+a2,\d+
-**  tail\s+memmove
+**  lbu\s+[ta][0-7],0\(a1\)
+**  sb\s+[ta][0-7],0\(a0\)
+**  ret
 */
 char *
 f1 (char *a, char const *b)
 {
-  return __builtin_memmove (a, b, MIN_VECTOR_BYTES - 1);
+  return __builtin_memmove (a, b, 1);
 }
 
 /* Vectorise+inline minimum vector register width with LMUL=1
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
index 8265105f4eb16e02ffb89676998ebf6ca1d73533..7e40ac583bb2f737485beba44a39878184ac5e66 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
index 682d3e9cb7e9dc606bbce0634c00fe9925906cf1..c5be5b1d28e30cc7053f4f903a031acd820f8218 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
index 73a9f51a16b67aeeacff6ed7099792ad7df33e8c..8f66d9670f3f532ce066a19f743f79d4700f7223 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
index bec9b28008d4f6b2004c73519b19cda3e968885b..3e23ae717ae612d024b769d95c3b20a6df217992 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
index c8978052b91b80fe56fe1c0617fe9a01c4086b50..11cdc74ec728a61c3477cf7ea1da82bf40cbb06d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
index 5604ca280fe42190d8bffe97834ca8db4a311d0d..7a5d04e3c5ceb81cde7cf519f168a5432118af58 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
index 9c6484479cf7cd598c9f429c49ca381465f44c39..ba96b3402790fbfd82139480bd29409cc71822b1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
index 0bb2260cf1cf1c9c231edbbd958789472f22c33a..c0e8d6f1b392f2d825a623bf654b717df3b43347 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
index 1ad588ff8adc2a6c2bd8772b36f56f86f0bc3b7a..917430096395dda355df283a7d7e6d0c66949ce8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
index 5b28863b6ada8694bc8dd938b48814771605a60d..ac7ec74472dec7d4514de34350995a87a03dd793 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */
 
 #include "riscv_vector.h"
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c
index e55604e1114c403207e8c8c3f8e563dfced18d7d..9ab04b07c1275924f9a78502114718eaa9a77777 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c
@@ -52,7 +52,7 @@ int main() {
   printf("%d\n", m);
 }
 
-/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
 /* { dg-final { scan-assembler-not {vsetivli} } } */
-/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 3 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c
index b20e46395aae2aaffe5dce6b929166b73f02623e..1facfd55d797196a5807db684196f8383bc66e28 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c
@@ -62,7 +62,7 @@ int main() {
   return 0;
 }
 
-/* { dg-final { scan-assembler-times {vsetvli} 4 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 5 } } */
 /* { dg-final { scan-assembler-not {vsetivli} } } */
-/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 } } */