diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index bb9c7c3b5c443dc70aa58344070bd1e4924d4803..459b7e648abad0bbe72c51fae061a3be1702d26a 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+
+	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
+	     If vldr is selected it uses arm_coproc_mem_operand.  */
+	  if (TARGET_LDRD)
+	    return val > -256 && val < 256;
+	  else
+	    return val > -4096 && val < 4092;
+	}
+
+      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+    }
+
   /* For quad modes, we restrict the constant offset to be slightly less
      than what the instruction format permits.  We do this because for
      quad mode moves, we will actually decompose them into two separate
@@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
-  if (arm_address_register_rtx_p (index, strict_p)
-      && (GET_MODE_SIZE (mode) <= 4))
-    return 1;
-
-  if (mode == DImode || mode == DFmode)
-    {
-      if (code == CONST_INT)
-	{
-	  HOST_WIDE_INT val = INTVAL (index);
-
-	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
-	     If vldr is selected it uses arm_coproc_mem_operand.  */
-	  if (TARGET_LDRD)
-	    return val > -256 && val < 256;
-	  else
-	    return val > -4096 && val < 4092;
-	}
-
-      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
-    }
-
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
@@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
 	    && (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
-     full range of the instruction format.  */
+     full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
     return (code == CONST_INT
 	    && INTVAL (index) < 1024
@@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
       && (GET_MODE_SIZE (mode) <= 4))
     return 1;
 
+  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
   if (mode == DImode || mode == DFmode)
     {
       if (code == CONST_INT)
@@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
 	int overlap = -1;
 	for (i = 0; i < nregs; i++)
 	  {
-	    /* We're only using DImode here because it's a convenient
-	       size.  */
-	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, DImode, 8 * i);
+	    /* Use DFmode for vldr/vstr.  */
+	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
+	    ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
@@ -20885,8 +20886,8 @@ output_move_neon (rtx *operands)
 	  }
 	if (overlap != -1)
 	  {
-	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
-	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
+	    ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
+	    ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
 	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
 	    else
diff --git a/gcc/testsuite/gcc.target/arm/pr115153.c b/gcc/testsuite/gcc.target/arm/pr115153.c
new file mode 100644
index 0000000000000000000000000000000000000000..80b57acf87ec667123146873afab1cd4a581e7f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr115153.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -marm" } */
+/* { dg-require-effective-target arm_arch_v7ve_neon_ok } */
+/* { dg-add-options arm_arch_v7ve_neon } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**	add	r0, r0, #256
+**	ldrd	r0, r1, \[r0\]
+**	bx	lr
+*/
+long long f1 (long long *p)
+{
+  return __atomic_load_n (p + 32, __ATOMIC_RELAXED);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index e7a1984704abd7a46008bd2fe97f94257ea6ab76..d3edc7d839ec2d9460501c157c1129176542d870 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5675,6 +5675,8 @@ foreach { armfunc armflag armdefs } {
 	v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
 	v7ve "-march=armv7ve+fp -marm"
 		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
+	v7ve_neon "-march=armv7ve+simd -mfpu=auto -mfloat-abi=softfp"
+		  "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV && __ARM_NEON__"
 	v8a "-march=armv8-a+simd" __ARM_ARCH_8A__
 	v8a_hard "-march=armv8-a+simd -mfpu=auto -mfloat-abi=hard" __ARM_ARCH_8A__
 	v8_1a "-march=armv8.1-a+simd" __ARM_ARCH_8A__