diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 958a785ffd0ec4f5333a078fa175ac8afef99b80..8ec9e8c5d3991f0bd046af8324d3d71d045a7a61 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -3931,14 +3931,44 @@ alpha_expand_block_move (rtx operands[]) { words = bytes / 4; - for (i = 0; i < words; ++i) - data_regs[nregs + i] = gen_reg_rtx (SImode); + /* Load an even quantity of SImode data pieces only. */ + unsigned int hwords = words / 2; + for (i = 0; i / 2 < hwords; ++i) + { + data_regs[nregs + i] = gen_reg_rtx (SImode); + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + i * 4)); + } - for (i = 0; i < words; ++i) - emit_move_insn (data_regs[nregs + i], - adjust_address (orig_src, SImode, ofs + i * 4)); + /* If we'll be using unaligned stores, merge data from pairs + of SImode registers into DImode registers so that we can + store it more efficiently via quadword unaligned stores. */ + unsigned int j; + if (dst_align < 32) + for (i = 0, j = 0; i < words / 2; ++i, j = i * 2) + { + rtx hi = expand_simple_binop (DImode, ASHIFT, + data_regs[nregs + j + 1], + GEN_INT (32), NULL_RTX, + 1, OPTAB_WIDEN); + data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi, + data_regs[nregs + j], + NULL_RTX, + 1, OPTAB_WIDEN); + } + else + j = i; - nregs += words; + /* Take care of any remaining odd trailing SImode data piece. */ + if (j < words) + { + data_regs[nregs + i] = gen_reg_rtx (SImode); + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + j * 4)); + ++i; + } + + nregs += i; bytes -= words * 4; ofs += words * 4; } @@ -4057,13 +4087,12 @@ alpha_expand_block_move (rtx operands[]) } /* Due to the above, this won't be aligned. */ - /* ??? If we have more than one of these, consider constructing full - words in registers and using alpha_expand_unaligned_store_words. */ while (i < nregs && GET_MODE (data_regs[i]) == SImode) { alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); ofs += 4; i++; + gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode); } if (dst_align >= 16) diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c new file mode 100644 index 0000000000000000000000000000000000000000..2572a3187e9d0638f0b66de3b84d5aa5c7b70195 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned int aligned_src_si[17] = { [0 ... 16] = 0xeaebeced }; +unsigned int aligned_dst_si[17] = { [0 ... 16] = 0xdcdbdad9 }; + +void +memcpy_aligned_data_si (void) +{ + __builtin_memcpy (aligned_dst_si + 1, aligned_src_si + 1, 60); +} + +/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c new file mode 100644 index 0000000000000000000000000000000000000000..a2efade87ca47bc42ca071824bbbd67983e1fac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-dst.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned int unaligned_src_si[17] = { [0 ... 16] = 0xfefdfcfb }; + +void +memcpy_unaligned_dst_si (void *dst) +{ + __builtin_memcpy (dst, unaligned_src_si + 1, 60); +} + +/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 4 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s" 10 } } */ +/* { dg-final { scan-assembler-not "\\sstl\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c new file mode 100644 index 0000000000000000000000000000000000000000..df25c772a6ca15c472942760190369c1bce1b426 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src-bwx.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mbwx" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +#include "memcpy-si-unaligned-src.c" + +/* { dg-final { scan-assembler-times "\\sldbu\\s" 4 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */ +/* { dg-final { scan-assembler-times "\\sstb\\s" 4 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s" 14 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c new file mode 100644 index 0000000000000000000000000000000000000000..5140d2f8f47e9bb515d0f0d12bf4cc31427ddeeb --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned-src.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mno-bwx" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned int unaligned_dst_si[17] = { [0 ... 16] = 0xc8c9cacb }; + +void +memcpy_unaligned_src_si (const void *src) +{ + __builtin_memcpy (unaligned_dst_si + 1, src, 60); +} + +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 10 } } */ +/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c new file mode 100644 index 0000000000000000000000000000000000000000..9ce61ab801fb8ec9260cedfa7dc612b7fbdb3fd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-unaligned.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-sources memcpy-si-aligned.c } */ +/* { dg-additional-sources memcpy-si-unaligned-src.c } */ +/* { dg-additional-sources memcpy-si-unaligned-dst.c } */ +/* { dg-options "" } */ + +void memcpy_aligned_data_si (void); +void memcpy_unaligned_dst_si (void *); +void memcpy_unaligned_src_si (const void *); + +extern unsigned int aligned_src_si[]; +extern unsigned int aligned_dst_si[]; +extern unsigned int unaligned_src_si[]; +extern unsigned int unaligned_dst_si[]; + +int +main (void) +{ + unsigned int v; + int i; + + for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404) + unaligned_src_si[i] = v; + asm ("" : : : "memory"); + memcpy_unaligned_dst_si (aligned_src_si + 1); + asm ("" : : : "memory"); + memcpy_aligned_data_si (); + asm ("" : : : "memory"); + memcpy_unaligned_src_si (aligned_dst_si + 1); + asm ("" : : : "memory"); + for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404) + if (unaligned_dst_si[i] != v) + return 1; + if (unaligned_src_si[0] != 0xfefdfcfb) + return 1; + if (unaligned_src_si[16] != 0xfefdfcfb) + return 1; + if (aligned_src_si[0] != 0xeaebeced) + return 1; + if (aligned_src_si[16] != 0xeaebeced) + return 1; + if (aligned_dst_si[0] != 0xdcdbdad9) + return 1; + if (aligned_dst_si[16] != 0xdcdbdad9) + return 1; + if (unaligned_dst_si[0] != 0xc8c9cacb) + return 1; + if (unaligned_dst_si[16] != 0xc8c9cacb) + return 1; + return 0; +}