From 8edfc4ccb46c6b8ccd48e7136d1ed5f5f646ff9a Mon Sep 17 00:00:00 2001
From: Mark Shinwell <shinwell@codesourcery.com>
Date: Wed, 13 Dec 2006 17:32:47 +0000
Subject: [PATCH] arm.c (arm_output_fldmx): Output FLDMD instead of FLDMX.

        gcc/
        * config/arm/arm.c (arm_output_fldmx): Output FLDMD instead of
        FLDMX.  Rename function to...
        (vfp_output_fldmd): ...this.
        (vfp_output_fstmx): Output FSTMD instead of FSTMX.  Rename
        function to...
        (vfp_output_fstmd): ...this.
        (vfp_emit_fstmx): Don't leave space in the frame layout for the
        FSTMX format word.  Rename function to...
        (vfp_emit_fstmd): ...this.
        (arm_get_vfp_saved_size): Don't add in space for the FSTMX format
        word.
        (arm_output_epilogue): Adjust comment to reflect use of FSTMD.
        (arm_unwind_emit_sequence): Don't compensate for the FSTMX format
        word.  Also emit "vsave" assembler directives in such cases rather
        than "save".
        * config/arm/libunwind.S (gnu_Unwind_Restore_VFP,
        gnu_Unwind_Save_VFP): Adjust comments.
        (gnu_Unwind_Restore_VFP_D, gnu_Unwind_Save_VFP_D): New functions
        for saving and restoring using FSTMD and FLDMD rather than
        FSTMX and FLDMX.
        (gnu_Unwind_Restore_VFP_D_16_to_31, gnu_Unwind_Restore_VFP_D_16_to_31):
        New functions for saving and restoring the VFPv3 registers 16 .. 31.
        * config/arm/pr-support.c (gnu_unwind_execute): Add conditional
        compilation case to correctly handle unwind opcode 0xc8 when using
        VFP.
        * config/arm/unwind-arm.c (struct vfpv3_regs): New.
        (DEMAND_SAVE_VFP_D, DEMAND_SAVE_VFP_V3): New flags.
        (__gnu_Unwind_Save_VFP_D, __gnu_Unwind_Restore_VFP_D,
        __gnu_Unwind_Save_VFP_D_16_to_31, __gnu_Unwind_Restore_VFP_D_16_to_31):
        Declare.
        (restore_non_core_regs): Restore registers using FLDMD rather than
        FLDMX if required.  Also handle restoration of VFPv3 registers.
        (_Unwind_VRS_Pop): Handle saving and restoring of registers using
        FSTMD and FLDMD if required; also handle VFPv3 registers 16 .. 31,
        including cases where the caller specifies a range of registers
        that overlaps the d15/d16 boundary.

From-SVN: r119838
---
 gcc/ChangeLog               |  39 ++++++++++
 gcc/config/arm/arm-protos.h |   2 +-
 gcc/config/arm/arm.c        |  52 +++++++-------
 gcc/config/arm/libunwind.S  |  30 +++++++-
 gcc/config/arm/pr-support.c |  22 ++++--
 gcc/config/arm/unwind-arm.c | 138 +++++++++++++++++++++++++++++++-----
 gcc/config/arm/vfp.md       |   2 +-
 7 files changed, 232 insertions(+), 53 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ccb1674465c1..6f579c7b8505 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,42 @@
+2006-12-13  Mark Shinwell  <shinwell@codesourcery.com>
+
+	* config/arm/arm.c (arm_output_fldmx): Output FLDMD instead of
+	FLDMX.  Rename function to...
+	(vfp_output_fldmd): ...this.
+	(vfp_output_fstmx): Output FSTMD instead of FSTMX.  Rename
+	function to...
+	(vfp_output_fstmd): ...this.
+	(vfp_emit_fstmx): Don't leave space in the frame layout for the
+	FSTMX format word.  Rename function to...
+	(vfp_emit_fstmd): ...this.
+	(arm_get_vfp_saved_size): Don't add in space for the FSTMX format
+	word.
+	(arm_output_epilogue): Adjust comment to reflect use of FSTMD.
+	(arm_unwind_emit_sequence): Don't compensate for the FSTMX format
+	word.  Also emit "vsave" assembler directives in such cases rather
+	than "save".
+	* config/arm/libunwind.S (gnu_Unwind_Restore_VFP,
+	gnu_Unwind_Save_VFP): Adjust comments.
+	(gnu_Unwind_Restore_VFP_D, gnu_Unwind_Save_VFP_D): New functions
+	for saving and restoring using FSTMD and FLDMD rather than
+	FSTMX and FLDMX.
+	(gnu_Unwind_Restore_VFP_D_16_to_31, gnu_Unwind_Restore_VFP_D_16_to_31):
+	New functions for saving and restoring the VFPv3 registers 16 .. 31.
+	* config/arm/pr-support.c (gnu_unwind_execute): Add conditional
+	compilation case to correctly handle unwind opcode 0xc8 when using
+	VFP.
+	* config/arm/unwind-arm.c (struct vfpv3_regs): New.
+	(DEMAND_SAVE_VFP_D, DEMAND_SAVE_VFP_V3): New flags.
+	(__gnu_Unwind_Save_VFP_D, __gnu_Unwind_Restore_VFP_D,
+	__gnu_Unwind_Save_VFP_D_16_to_31, __gnu_Unwind_Restore_VFP_D_16_to_31):
+	Declare.
+	(restore_non_core_regs): Restore registers using FLDMD rather than
+	FLDMX if required.  Also handle restoration of VFPv3 registers.
+	(_Unwind_VRS_Pop): Handle saving and restoring of registers using
+	FSTMD and FLDMD if required; also handle VFPv3 registers 16 .. 31,
+	including cases where the caller specifies a range of registers
+	that overlaps the d15/d16 boundary.
+
 2006-12-13  Diego Novillo  <dnovillo@redhat.com>
 
 	PR 30089
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 4e0d77b33412..032c4e6ec699 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -121,7 +121,7 @@ extern int arm_debugger_arg_offset (int, rtx);
 extern int arm_is_longcall_p (rtx, int, int);
 extern int    arm_emit_vector_const (FILE *, rtx);
 extern const char * arm_output_load_gr (rtx *);
-extern const char *vfp_output_fstmx (rtx *);
+extern const char *vfp_output_fstmd (rtx *);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 41c44e274dd9..c7bf05fc4e4e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8425,13 +8425,17 @@ print_multi_reg (FILE *stream, const char *instr, unsigned reg,
 }
 
 
-/* Output a FLDMX instruction to STREAM.
+/* Output a FLDMD instruction to STREAM.
    BASE if the register containing the address.
    REG and COUNT specify the register range.
-   Extra registers may be added to avoid hardware bugs.  */
+   Extra registers may be added to avoid hardware bugs.
+
+   We output FLDMD even for ARMv5 VFP implementations.  Although
+   FLDMD is technically not supported until ARMv6, it is believed
+   that all VFP implementations support its use in this context.  */
 
 static void
-arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
+vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
 {
   int i;
 
@@ -8444,7 +8448,7 @@ arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
     }
 
   fputc ('\t', stream);
-  asm_fprintf (stream, "fldmfdx\t%r!, {", base);
+  asm_fprintf (stream, "fldmfdd\t%r!, {", base);
 
   for (i = reg; i < reg + count; i++)
     {
@@ -8460,14 +8464,14 @@ arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
 /* Output the assembly for a store multiple.  */
 
 const char *
-vfp_output_fstmx (rtx * operands)
+vfp_output_fstmd (rtx * operands)
 {
   char pattern[100];
   int p;
   int base;
   int i;
 
-  strcpy (pattern, "fstmfdx\t%m0!, {%P1");
+  strcpy (pattern, "fstmfdd\t%m0!, {%P1");
   p = strlen (pattern);
 
   gcc_assert (GET_CODE (operands[1]) == REG);
@@ -8488,7 +8492,7 @@ vfp_output_fstmx (rtx * operands)
    number of bytes pushed.  */
 
 static int
-vfp_emit_fstmx (int base_reg, int count)
+vfp_emit_fstmd (int base_reg, int count)
 {
   rtx par;
   rtx dwarf;
@@ -8505,10 +8509,6 @@ vfp_emit_fstmx (int base_reg, int count)
       count++;
     }
 
-  /* ??? The frame layout is implementation defined.  We describe
-     standard format 1 (equivalent to a FSTMD insn and unused pad word).
-     We really need some way of representing the whole block so that the
-     unwinder can figure it out at runtime.  */
   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
 
@@ -8525,7 +8525,7 @@ vfp_emit_fstmx (int base_reg, int count)
 				   UNSPEC_PUSH_MULT));
 
   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
-		     plus_constant (stack_pointer_rtx, -(count * 8 + 4)));
+		     plus_constant (stack_pointer_rtx, -(count * 8)));
   RTX_FRAME_RELATED_P (tmp) = 1;
   XVECEXP (dwarf, 0, 0) = tmp;
 
@@ -8555,7 +8555,7 @@ vfp_emit_fstmx (int base_reg, int count)
 				       REG_NOTES (par));
   RTX_FRAME_RELATED_P (par) = 1;
 
-  return count * 8 + 4;
+  return count * 8;
 }
 
 
@@ -9451,7 +9451,7 @@ arm_get_vfp_saved_size (void)
 		  /* Workaround ARM10 VFPr1 bug.  */
 		  if (count == 2 && !arm_arch6)
 		    count++;
-		  saved += count * 8 + 4;
+		  saved += count * 8;
 		}
 	      count = 0;
 	    }
@@ -9462,7 +9462,7 @@ arm_get_vfp_saved_size (void)
 	{
 	  if (count == 2 && !arm_arch6)
 	    count++;
-	  saved += count * 8 + 4;
+	  saved += count * 8;
 	}
     }
   return saved;
@@ -9888,8 +9888,8 @@ arm_output_epilogue (rtx sibling)
 	{
 	  int saved_size;
 
-	  /* The fldmx insn does not have base+offset addressing modes,
-	     so we use IP to hold the address.  */
+	  /* The fldmd insns do not have base+offset addressing
+             modes, so we use IP to hold the address.  */
 	  saved_size = arm_get_vfp_saved_size ();
 
 	  if (saved_size > 0)
@@ -9905,14 +9905,14 @@ arm_output_epilogue (rtx sibling)
 		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
 		{
 		  if (start_reg != reg)
-		    arm_output_fldmx (f, IP_REGNUM,
+		    vfp_output_fldmd (f, IP_REGNUM,
 				      (start_reg - FIRST_VFP_REGNUM) / 2,
 				      (reg - start_reg) / 2);
 		  start_reg = reg + 2;
 		}
 	    }
 	  if (start_reg != reg)
-	    arm_output_fldmx (f, IP_REGNUM,
+	    vfp_output_fldmd (f, IP_REGNUM,
 			      (start_reg - FIRST_VFP_REGNUM) / 2,
 			      (reg - start_reg) / 2);
 	}
@@ -10036,14 +10036,14 @@ arm_output_epilogue (rtx sibling)
 		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
 		{
 		  if (start_reg != reg)
-		    arm_output_fldmx (f, SP_REGNUM,
+		    vfp_output_fldmd (f, SP_REGNUM,
 				      (start_reg - FIRST_VFP_REGNUM) / 2,
 				      (reg - start_reg) / 2);
 		  start_reg = reg + 2;
 		}
 	    }
 	  if (start_reg != reg)
-	    arm_output_fldmx (f, SP_REGNUM,
+	    vfp_output_fldmd (f, SP_REGNUM,
 			      (start_reg - FIRST_VFP_REGNUM) / 2,
 			      (reg - start_reg) / 2);
 	}
@@ -10841,13 +10841,13 @@ arm_expand_prologue (void)
 		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
 		{
 		  if (start_reg != reg)
-		    saved_regs += vfp_emit_fstmx (start_reg,
+		    saved_regs += vfp_emit_fstmd (start_reg,
 						  (reg - start_reg) / 2);
 		  start_reg = reg + 2;
 		}
 	    }
 	  if (start_reg != reg)
-	    saved_regs += vfp_emit_fstmx (start_reg,
+	    saved_regs += vfp_emit_fstmd (start_reg,
 					  (reg - start_reg) / 2);
 	}
     }
@@ -15407,12 +15407,12 @@ arm_unwind_emit_stm (FILE * asm_out_file, rtx p)
 	  offset -= 4;
 	}
       reg_size = 4;
+      fprintf (asm_out_file, "\t.save {");
     }
   else if (IS_VFP_REGNUM (reg))
     {
-      /* FPA register saves use an additional word.  */
-      offset -= 4;
       reg_size = 8;
+      fprintf (asm_out_file, "\t.vsave {");
     }
   else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
     {
@@ -15429,8 +15429,6 @@ arm_unwind_emit_stm (FILE * asm_out_file, rtx p)
   if (offset != nregs * reg_size)
     abort ();
 
-  fprintf (asm_out_file, "\t.save {");
-
   offset = 0;
   lastreg = 0;
   /* The remaining insns will describe the stores.  */
diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S
index f53a20232b81..fd66724da723 100644
--- a/gcc/config/arm/libunwind.S
+++ b/gcc/config/arm/libunwind.S
@@ -68,20 +68,46 @@ ARM_FUNC_START restore_core_regs
 	FUNC_END restore_core_regs
 	UNPREFIX restore_core_regs
 
-/* Load VFP registers d0-d15 from the address in r0.  */
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMX format.  */
 ARM_FUNC_START gnu_Unwind_Restore_VFP
 	/* Use the generic coprocessor form so that gas doesn't complain
 	   on soft-float targets.  */
 	ldc   p11,cr0,[r0],{0x21} /* fldmiax r0, {d0-d15} */
 	RET
 
-/* Store VFR regsters d0-d15 to the address in r0.  */
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FSTMX format.  */
 ARM_FUNC_START gnu_Unwind_Save_VFP
 	/* Use the generic coprocessor form so that gas doesn't complain
 	   on soft-float targets.  */
 	stc   p11,cr0,[r0],{0x21} /* fstmiax r0, {d0-d15} */
 	RET
 
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMD format.  */
+ARM_FUNC_START gnu_Unwind_Restore_VFP_D
+	ldc   p11,cr0,[r0],{0x20} /* fldmiad r0, {d0-d15} */
+	RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FLDMD format.  */
+ARM_FUNC_START gnu_Unwind_Save_VFP_D
+	stc   p11,cr0,[r0],{0x20} /* fstmiad r0, {d0-d15} */
+	RET
+
+/* Load VFP registers d16-d31 from the address in r0.
+   Use this to load from FSTMD (=VSTM) format.  Needs VFPv3.  */
+ARM_FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31
+	ldcl  p11,cr0,[r0],{0x20} /* vldm r0, {d16-d31} */
+	RET
+
+/* Store VFP registers d16-d31 to the address in r0.
+   Use this to store in FLDMD (=VLDM) format.  Needs VFPv3.  */
+ARM_FUNC_START gnu_Unwind_Save_VFP_D_16_to_31
+	stcl  p11,cr0,[r0],{0x20} /* vstm r0, {d16-d31} */
+	RET
+
 /* Wrappers to save core registers, then call the real routine.   */
 
 .macro  UNWIND_WRAPPER name nargs
diff --git a/gcc/config/arm/pr-support.c b/gcc/config/arm/pr-support.c
index f0e733a11a77..b5592b192fca 100644
--- a/gcc/config/arm/pr-support.c
+++ b/gcc/config/arm/pr-support.c
@@ -282,13 +282,23 @@ __gnu_unwind_execute (_Unwind_Context * context, __gnu_unwind_state * uws)
 	    }
 	  if (op == 0xc8)
 	    {
-	      /* Pop FPA registers.  */
-	      op = next_unwind_byte (uws);
+#ifndef __VFP_FP__
+ 	      /* Pop FPA registers.  */
+ 	      op = next_unwind_byte (uws);
 	      op = ((op & 0xf0) << 12) | ((op & 0xf) + 1);
-	      if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX)
-		  != _UVRSR_OK)
-		return _URC_FAILURE;
-	      continue;
+ 	      if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX)
+ 		  != _UVRSR_OK)
+ 		return _URC_FAILURE;
+ 	      continue;
+#else
+              /* Pop VFPv3 registers D[16+ssss]-D[16+ssss+cccc] with vldm.  */
+              op = next_unwind_byte (uws);
+              op = (((op & 0xf0) + 16) << 12) | ((op & 0xf) + 1);
+              if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE)
+                  != _UVRSR_OK)
+                return _URC_FAILURE;
+              continue;
+#endif
 	    }
 	  if (op == 0xc9)
 	    {
diff --git a/gcc/config/arm/unwind-arm.c b/gcc/config/arm/unwind-arm.c
index 9b7229e9486d..4b75b0fe665f 100644
--- a/gcc/config/arm/unwind-arm.c
+++ b/gcc/config/arm/unwind-arm.c
@@ -74,6 +74,13 @@ struct vfp_regs
   _uw pad;
 };
 
+struct vfpv3_regs
+{
+  /* Always populated via VSTM, so no need for the "pad" field from
+     vfp_regs (which is used to store the format word for FSTMX).  */
+  _uw64 d[16];
+};
+
 struct fpa_reg
 {
   _uw w[3];
@@ -114,10 +121,14 @@ typedef struct
   struct core_regs core;
   _uw prev_sp; /* Only valid during forced unwinding.  */
   struct vfp_regs vfp;
+  struct vfpv3_regs vfp_regs_16_to_31;
   struct fpa_regs fpa;
 } phase1_vrs;
 
-#define DEMAND_SAVE_VFP 1
+#define DEMAND_SAVE_VFP 1	/* VFP state has been saved if not set */
+#define DEMAND_SAVE_VFP_D 2	/* VFP state is for FLDMD/FSTMD if set */
+#define DEMAND_SAVE_VFP_V3 4    /* VFPv3 state for regs 16 .. 31 has
+                                   been saved if not set */
 
 /* This must match the structure created by the assembly wrappers.  */
 typedef struct
@@ -143,15 +154,33 @@ void __attribute__((noreturn)) restore_core_regs (struct core_regs *);
 
 /* Coprocessor register state manipulation functions.  */
 
+/* Routines for FLDMX/FSTMX format...  */
 void __gnu_Unwind_Save_VFP (struct vfp_regs * p);
 void __gnu_Unwind_Restore_VFP (struct vfp_regs * p);
 
+/* ...and those for FLDMD/FSTMD format...  */
+void __gnu_Unwind_Save_VFP_D (struct vfp_regs * p);
+void __gnu_Unwind_Restore_VFP_D (struct vfp_regs * p);
+
+/* ...and those for VLDM/VSTM format, saving/restoring only registers
+   16 through 31.  */
+void __gnu_Unwind_Save_VFP_D_16_to_31 (struct vfpv3_regs * p);
+void __gnu_Unwind_Restore_VFP_D_16_to_31 (struct vfpv3_regs * p);
+
 /* Restore coprocessor state after phase1 unwinding.  */
 static void
 restore_non_core_regs (phase1_vrs * vrs)
 {
   if ((vrs->demand_save_flags & DEMAND_SAVE_VFP) == 0)
-    __gnu_Unwind_Restore_VFP (&vrs->vfp);
+    {
+      if (vrs->demand_save_flags & DEMAND_SAVE_VFP_D)
+        __gnu_Unwind_Restore_VFP_D (&vrs->vfp);
+      else
+        __gnu_Unwind_Restore_VFP (&vrs->vfp);
+    }
+
+  if ((vrs->demand_save_flags & DEMAND_SAVE_VFP_V3) == 0)
+    __gnu_Unwind_Restore_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31);
 }
 
 /* A better way to do this would probably be to compare the absolute address
@@ -274,35 +303,101 @@ _Unwind_VRS_Result _Unwind_VRS_Pop (_Unwind_Context *context,
 	_uw start = discriminator >> 16;
 	_uw count = discriminator & 0xffff;
 	struct vfp_regs tmp;
+	struct vfpv3_regs tmp_16_to_31;
+	int tmp_count;
 	_uw *sp;
 	_uw *dest;
+        int num_vfpv3_regs = 0;
 
+        /* We use an approximation here by bounding _UVRSD_DOUBLE
+           register numbers at 32 always, since we can't detect if
+           VFPv3 isn't present (in such a case the upper limit is 16).  */
 	if ((representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
-	    || start + count > 16)
+            || start + count > (representation == _UVRSD_VFPX ? 16 : 32)
+            || (representation == _UVRSD_VFPX && start >= 16))
 	  return _UVRSR_FAILED;
 
-	if (vrs->demand_save_flags & DEMAND_SAVE_VFP)
+        /* Check if we're being asked to pop VFPv3-only registers
+           (numbers 16 through 31).  */
+	if (start >= 16)
+          num_vfpv3_regs = count;
+        else if (start + count > 16)
+          num_vfpv3_regs = start + count - 16;
+
+        if (num_vfpv3_regs && representation != _UVRSD_DOUBLE)
+          return _UVRSR_FAILED;
+
+	/* Demand-save coprocessor registers for stage1.  */
+	if (start < 16 && (vrs->demand_save_flags & DEMAND_SAVE_VFP))
 	  {
-	    /* Demand-save resisters for stage1.  */
 	    vrs->demand_save_flags &= ~DEMAND_SAVE_VFP;
-	    __gnu_Unwind_Save_VFP (&vrs->vfp);
+
+            if (representation == _UVRSD_DOUBLE)
+              {
+                /* Save in FLDMD/FSTMD format.  */
+	        vrs->demand_save_flags |= DEMAND_SAVE_VFP_D;
+	        __gnu_Unwind_Save_VFP_D (&vrs->vfp);
+              }
+            else
+              {
+                /* Save in FLDMX/FSTMX format.  */
+	        vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_D;
+	        __gnu_Unwind_Save_VFP (&vrs->vfp);
+              }
+	  }
+
+        if (num_vfpv3_regs > 0
+            && (vrs->demand_save_flags & DEMAND_SAVE_VFP_V3))
+	  {
+	    vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_V3;
+            __gnu_Unwind_Save_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31);
 	  }
 
 	/* Restore the registers from the stack.  Do this by saving the
 	   current VFP registers to a memory area, moving the in-memory
 	   values into that area, and restoring from the whole area.
 	   For _UVRSD_VFPX we assume FSTMX standard format 1.  */
-	__gnu_Unwind_Save_VFP (&tmp);
-
-	/* The stack address is only guaranteed to be word aligned, so
+        if (representation == _UVRSD_VFPX)
+  	  __gnu_Unwind_Save_VFP (&tmp);
+        else
+          {
+	    /* Save registers 0 .. 15 if required.  */
+            if (start < 16)
+              __gnu_Unwind_Save_VFP_D (&tmp);
+
+	    /* Save VFPv3 registers 16 .. 31 if required.  */
+            if (num_vfpv3_regs)
+  	      __gnu_Unwind_Save_VFP_D_16_to_31 (&tmp_16_to_31);
+          }
+
+	/* Work out how many registers below register 16 need popping.  */
+	tmp_count = num_vfpv3_regs > 0 ? 16 - start : count;
+
+	/* Copy registers below 16, if needed.
+	   The stack address is only guaranteed to be word aligned, so
 	   we can't use doubleword copies.  */
 	sp = (_uw *) vrs->core.r[R_SP];
-	dest = (_uw *) &tmp.d[start];
-	count *= 2;
-	while (count--)
-	  *(dest++) = *(sp++);
-
-	/* Skip the pad word */
+        if (tmp_count > 0)
+          {
+	    tmp_count *= 2;
+	    dest = (_uw *) &tmp.d[start];
+	    while (tmp_count--)
+	      *(dest++) = *(sp++);
+          }
+
+	/* Copy VFPv3 registers numbered >= 16, if needed.  */
+        if (num_vfpv3_regs > 0)
+          {
+            /* num_vfpv3_regs is needed below, so copy it.  */
+            int tmp_count_2 = num_vfpv3_regs * 2;
+            int vfpv3_start = start < 16 ? 16 : start;
+
+	    dest = (_uw *) &tmp_16_to_31.d[vfpv3_start - 16];
+	    while (tmp_count_2--)
+	      *(dest++) = *(sp++);
+          }
+
+	/* Skip the format word space if using FLDMX/FSTMX format.  */
 	if (representation == _UVRSD_VFPX)
 	  sp++;
 
@@ -310,7 +405,18 @@ _Unwind_VRS_Result _Unwind_VRS_Pop (_Unwind_Context *context,
 	vrs->core.r[R_SP] = (_uw) sp;
 
 	/* Reload the registers.  */
-	__gnu_Unwind_Restore_VFP (&tmp);
+        if (representation == _UVRSD_VFPX)
+  	  __gnu_Unwind_Restore_VFP (&tmp);
+        else
+          {
+	    /* Restore registers 0 .. 15 if required.  */
+            if (start < 16)
+              __gnu_Unwind_Restore_VFP_D (&tmp);
+
+	    /* Restore VFPv3 registers 16 .. 31 if required.  */
+            if (num_vfpv3_regs > 0)
+  	      __gnu_Unwind_Restore_VFP_D_16_to_31 (&tmp_16_to_31);
+          }
       }
       return _UVRSR_OK;
 
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 2380c83cad9c..6d4d017f14dc 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -828,7 +828,7 @@
 	  (unspec:BLK [(match_operand:DF 1 "s_register_operand" "w")]
 		      UNSPEC_PUSH_MULT))])]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
-  "* return vfp_output_fstmx (operands);"
+  "* return vfp_output_fstmd (operands);"
   [(set_attr "type" "f_stored")]
 )
 
-- 
GitLab