diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 63139afd3e365ee5ebafdcbe4bddfdb3cc62c72b..0342906176244a23cfc6aff13c5f42f2e25e4a89 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7900,6 +7900,35 @@ static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_N
       code_for_gpr_multi_popret_up_to_s11,
       code_for_gpr_multi_popretz_up_to_s11}};
 
+/*  Set a probe loop for stack clash protection.  */
+static void
+riscv_allocate_and_probe_stack_loop (rtx tmp, enum rtx_code code,
+				     rtx op0, rtx op1, bool vector,
+				     HOST_WIDE_INT offset)
+{
+  tmp = riscv_force_temporary (tmp, gen_int_mode (offset, Pmode));
+
+  /* Loop.  */
+  rtx label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Allocate and probe stack.  */
+  emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, tmp));
+  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+		    STACK_CLASH_CALLER_GUARD));
+  emit_insn (gen_blockage ());
+
+  /* Adjust the remaining vector length.  */
+  if (vector)
+    emit_insn (gen_sub3_insn (op0, op0, tmp));
+
+  /* Branch if there's still more bytes to probe.  */
+  riscv_expand_conditional_branch (label, code, op0, op1);
+  JUMP_LABEL (get_last_insn ()) = label;
+
+  emit_insn (gen_blockage ());
+}
+
 /* Adjust scalable frame of vector for prologue && epilogue. */
 
 static void
@@ -7912,6 +7941,49 @@ riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
   riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
 			      gen_int_mode (offset, Pmode));
 
+  /* If doing stack clash protection then we use a loop to allocate and probe
+     the stack.  */
+  if (flag_stack_clash_protection && !epilogue)
+    {
+      HOST_WIDE_INT min_probe_threshold
+	= (1 << param_stack_clash_protection_guard_size) - STACK_CLASH_CALLER_GUARD;
+
+      if (!frame_pointer_needed)
+	{
+	  /* This is done to provide unwinding information for the stack
+	     adjustments we're about to do, however to prevent the optimizers
+	     from removing the T3 move and leaving the CFA note (which would be
+	     very wrong) we tie the old and new stack pointer together.
+	     The tie will expand to nothing but the optimizers will not touch
+	     the instruction.  */
+	  insn = get_last_insn ();
+	  rtx stack_ptr_copy = gen_rtx_REG (Pmode, RISCV_STACK_CLASH_VECTOR_CFA_REGNUM);
+	  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+	  riscv_emit_stack_tie (stack_ptr_copy);
+
+	  /* We want the CFA independent of the stack pointer for the
+	     duration of the loop.  */
+	  add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      riscv_allocate_and_probe_stack_loop (tmp, GE, adjust_size, tmp, true,
+					   min_probe_threshold);
+
+      /* Allocate the residual.  */
+      insn = emit_insn (gen_sub3_insn (target, target, adjust_size));
+
+      /* Now reset the CFA register if needed.  */
+      if (!frame_pointer_needed)
+	{
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, stack_pointer_rtx, -offset));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      return;
+    }
+
   if (epilogue)
     insn = gen_add3_insn (target, target, adjust_size);
   else
@@ -8059,8 +8131,9 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
   else
     {
       /* Compute the ending address.  */
-      temp1 = riscv_force_temporary (temp1, gen_int_mode (rounded_size, Pmode));
-      insn = emit_insn (gen_sub3_insn (temp1, stack_pointer_rtx, temp1));
+      rtx temp2 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP2_REGNUM);
+      temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, Pmode));
+      insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2));
 
       if (!frame_pointer_needed)
 	{
@@ -8071,25 +8144,9 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
-      /* Allocate and probe the stack.  */
-
-      rtx temp2 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP2_REGNUM);
-      temp2 = riscv_force_temporary (temp2, gen_int_mode (guard_size, Pmode));
-
-      /* Loop.  */
-      rtx label = gen_label_rtx ();
-      emit_label (label);
-
-      emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, temp2));
-      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-			   guard_used_by_caller));
-      emit_insn (gen_blockage ());
-
-      /* Check if the stack pointer is at the ending address.  */
-      riscv_expand_conditional_branch (label, NE, stack_pointer_rtx, temp1);
-      JUMP_LABEL (get_last_insn ()) = label;
-
-      emit_insn (gen_blockage ());
+      /* This allocates and probes the stack.  */
+      riscv_allocate_and_probe_stack_loop (temp1, NE, stack_pointer_rtx, temp2,
+					   false, guard_size);
 
       /* Now reset the CFA register if needed.  */
       if (!frame_pointer_needed)
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 7e9093fed3c80dfdfeabce27267e6f0272cf1181..58e17178212b6fe62bff7c10ddb98bee915be8bd 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -429,6 +429,11 @@ ASM_MISA_SPEC
 #define RISCV_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST + 1)
 #define RISCV_PROLOGUE_TEMP2(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP2_REGNUM)
 
+/* Both prologue temp registers are used in the vector probe loop for when
+   stack-clash protection is enabled, so we need to copy SP to a new register
+   and set it as CFA during the loop, we are using T3 for that.  */
+#define RISCV_STACK_CLASH_VECTOR_CFA_REGNUM (GP_TEMP_FIRST + 23)
+
 #define RISCV_CALL_ADDRESS_TEMP_REGNUM (GP_TEMP_FIRST + 1)
 #define RISCV_CALL_ADDRESS_TEMP(MODE) \
   gen_rtx_REG (MODE, RISCV_CALL_ADDRESS_TEMP_REGNUM)
diff --git a/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c b/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..e45f7bb7df51b2218bc6a55ad97eaa3dfb7a07ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection -funwind-tables -fno-stack-protector" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+#include "stack-check-prologue-16.c"
+
+/* Checks that the CFA notes are correct for every sp adjustment, but we also
+   need to make sure we can unwind correctly before the frame is set up.  So
+   check that we're emitting t3 with a copy of sp an setting the CFA there.  */
+
+/* { dg-final { scan-assembler-times {mv\tt3,sp} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa [0-9]+, 0} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_escape 0xf,0xa,0x72,0,0x92,0xa2,0x38,0,0x9,0xec,0x1e,0x22} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c b/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c
new file mode 100644
index 0000000000000000000000000000000000000000..c74dce04b23655566adafee7a123048d7dcabe98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection" } */
+
+/* Invoke X (P##n) for n in [0, 7].  */
+#define REPEAT8(X, P) \
+  X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7)
+
+/* Invoke X (n) for all octal n in [0, 39].  */
+#define REPEAT40(X) \
+  REPEAT8 (X, 0) REPEAT8 (X, 1)  REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4)
+
+/* Expect vector work to be done, with spilling of vector registers.  */
+void
+f2 (int x[40][100], int *y)
+{
+  /* Try to force some spilling.  */
+#define DECLARE(N) int y##N = y[N];
+  REPEAT40 (DECLARE);
+#pragma omp simd
+  for (int i = 0; i < 100; ++i)
+    {
+#define INC(N) x[N][i] += y##N;
+      REPEAT40 (INC);
+    }
+}
+
+/* Vector spill, requires probing as vector size is unknown at compile time.  */
+
+/* { dg-final { scan-assembler-times {sd\tzero,1024\(sp\)} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/struct_vect_24.c b/gcc/testsuite/gcc.target/riscv/struct_vect_24.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c0852f1a552baf96f12e892e6d3a1a5ab3512d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/struct_vect_24.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection -fno-stack-protector" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-Os" "-Oz" "-funroll-loops"} } */
+
+#include <stdint.h>
+
+#define N 50
+#define S 2 * 4 * 1024
+
+/* Invoke X (P##n) for n in [0, 9].  */
+#define REPEAT8(X, P) \
+  X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7) \
+  X (P##8)  X (P##9)
+
+/* Invoke X (n) for all n in [0, 49].  */
+#define REPEAT50(X) \
+  REPEAT8 (X, ) REPEAT8 (X, 1)  REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4)
+
+  /* Try to force some spilling.  */
+#define DECLARE(N) int src##N = src[N * 4];
+#define INC(N) dest[i] += src##N;
+
+#define TEST_LOOP(NAME, TYPE)				\
+  void __attribute__ ((noinline))	\
+  NAME (TYPE *restrict dest, TYPE *restrict src)	\
+  {							\
+    REPEAT50 (DECLARE);					\
+    volatile char foo[S];				\
+    foo[S-1]=1;						\
+    for (int i = 0; i < N; i++)				\
+      {							\
+	REPEAT50 (INC);					\
+      }							\
+  }
+
+#define TEST(NAME) \
+  TEST_LOOP (NAME##_i32, int32_t) \
+  TEST_LOOP (NAME##_i64, int64_t)
+
+TEST (test)
+
+/* Check the vectorized loop for stack clash probing.  */
+
+/* { dg-final { scan-assembler-times {sd\tzero,1024\(sp\)} 6 } } */
+/* { dg-final { scan-assembler-times {bge\tt1,t0,.[^\\r\\n]*} 2 } } */
+/* { dg-final { scan-assembler-times {sub\s+t1,t1,t0} 2 } } */