diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 958a785ffd0ec4f5333a078fa175ac8afef99b80..8ec9e8c5d3991f0bd046af8324d3d71d045a7a61 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3931,14 +3931,44 @@ alpha_expand_block_move (rtx operands[])
     {
       words = bytes / 4;
 
-      for (i = 0; i < words; ++i)
-	data_regs[nregs + i] = gen_reg_rtx (SImode);
+      /* Load an even quantity of SImode data pieces only.  */
+      unsigned int hwords = words / 2;
+      for (i = 0; i / 2 < hwords; ++i)
+	{
+	  data_regs[nregs + i] = gen_reg_rtx (SImode);
+	  emit_move_insn (data_regs[nregs + i],
+			  adjust_address (orig_src, SImode, ofs + i * 4));
+	}
 
-      for (i = 0; i < words; ++i)
-	emit_move_insn (data_regs[nregs + i],
-			adjust_address (orig_src, SImode, ofs + i * 4));
+      /* If we'll be using unaligned stores, merge data from pairs
+	 of SImode registers into DImode registers so that we can
+	 store it more efficiently via quadword unaligned stores.  */
+      unsigned int j;
+      if (dst_align < 32)
+	for (i = 0, j = 0; i < words / 2; ++i, j = i * 2)
+	  {
+	    rtx hi = expand_simple_binop (DImode, ASHIFT,
+					  data_regs[nregs + j + 1],
+					  GEN_INT (32), NULL_RTX,
+					  1, OPTAB_WIDEN);
+	    data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi,
+							data_regs[nregs + j],
+							NULL_RTX,
+							1, OPTAB_WIDEN);
+	  }
+      else
+	j = i;
 
-      nregs += words;
+      /* Take care of any remaining odd trailing SImode data piece.  */
+      if (j < words)
+	{
+	  data_regs[nregs + i] = gen_reg_rtx (SImode);
+	  emit_move_insn (data_regs[nregs + i],
+			  adjust_address (orig_src, SImode, ofs + j * 4));
+	  ++i;
+	}
+
+      nregs += i;
       bytes -= words * 4;
       ofs += words * 4;
     }
@@ -4057,13 +4087,12 @@ alpha_expand_block_move (rtx operands[])
     }
 
   /* Due to the above, this won't be aligned.  */
-  /* ??? If we have more than one of these, consider constructing full
-     words in registers and using alpha_expand_unaligned_store_words.  */
   while (i < nregs && GET_MODE (data_regs[i]) == SImode)
     {
       alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
       ofs += 4;
       i++;
+      gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
     }
 
   if (dst_align >= 16)
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c
new file mode 100644
index 0000000000000000000000000000000000000000..2572a3187e9d0638f0b66de3b84d5aa5c7b70195
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int aligned_src_si[17] = { [0 ... 16] = 0xeaebeced };
+unsigned int aligned_dst_si[17] = { [0 ... 16] = 0xdcdbdad9 };
+
+void
+memcpy_aligned_data_si (void)
+{
+  __builtin_memcpy (aligned_dst_si + 1, aligned_src_si + 1, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c
new file mode 100644
index 0000000000000000000000000000000000000000..a2efade87ca47bc42ca071824bbbd67983e1fac3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int unaligned_src_si[17] = { [0 ... 16] = 0xfefdfcfb };
+
+void
+memcpy_unaligned_dst_si (void *dst)
+{
+  __builtin_memcpy (dst, unaligned_src_si + 1, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 10 } } */
+/* { dg-final { scan-assembler-not "\\sstl\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c
new file mode 100644
index 0000000000000000000000000000000000000000..df25c772a6ca15c472942760190369c1bce1b426
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-si-unaligned-src.c"
+
+/* { dg-final { scan-assembler-times "\\sldbu\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 14 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c
new file mode 100644
index 0000000000000000000000000000000000000000..5140d2f8f47e9bb515d0f0d12bf4cc31427ddeeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mno-bwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int unaligned_dst_si[17] = { [0 ... 16] = 0xc8c9cacb };
+
+void
+memcpy_unaligned_src_si (const void *src)
+{
+  __builtin_memcpy (unaligned_dst_si + 1, src, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 10 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c
new file mode 100644
index 0000000000000000000000000000000000000000..9ce61ab801fb8ec9260cedfa7dc612b7fbdb3fd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-sources memcpy-si-aligned.c } */
+/* { dg-additional-sources memcpy-si-unaligned-src.c } */
+/* { dg-additional-sources memcpy-si-unaligned-dst.c } */
+/* { dg-options "" } */
+
+void memcpy_aligned_data_si (void);
+void memcpy_unaligned_dst_si (void *);
+void memcpy_unaligned_src_si (const void *);
+
+extern unsigned int aligned_src_si[];
+extern unsigned int aligned_dst_si[];
+extern unsigned int unaligned_src_si[];
+extern unsigned int unaligned_dst_si[];
+
+int
+main (void)
+{
+  unsigned int v;
+  int i;
+
+  for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404)
+    unaligned_src_si[i] = v;
+  asm ("" : : : "memory");
+  memcpy_unaligned_dst_si (aligned_src_si + 1);
+  asm ("" : : : "memory");
+  memcpy_aligned_data_si ();
+  asm ("" : : : "memory");
+  memcpy_unaligned_src_si (aligned_dst_si + 1);
+  asm ("" : : : "memory");
+  for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404)
+    if (unaligned_dst_si[i] != v)
+      return 1;
+  if (unaligned_src_si[0] != 0xfefdfcfb)
+      return 1;
+  if (unaligned_src_si[16] != 0xfefdfcfb)
+      return 1;
+  if (aligned_src_si[0] != 0xeaebeced)
+      return 1;
+  if (aligned_src_si[16] != 0xeaebeced)
+      return 1;
+  if (aligned_dst_si[0] != 0xdcdbdad9)
+      return 1;
+  if (aligned_dst_si[16] != 0xdcdbdad9)
+      return 1;
+  if (unaligned_dst_si[0] != 0xc8c9cacb)
+      return 1;
+  if (unaligned_dst_si[16] != 0xc8c9cacb)
+      return 1;
+  return 0;
+}