From ed8cd42d138fa048e0c0eff1ea28b39f5abe1c29 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@orcam.me.uk> Date: Sun, 12 Jan 2025 16:48:54 +0000 Subject: [PATCH] Alpha: Fix a block move pessimisation with zero-extension after LDWU For the BWX case we have a pessimisation in `alpha_expand_block_move' for HImode loads where we place the data loaded into a HImode register as well, therefore losing information that indeed the data loaded has already been zero-extended to the full DImode width of the register. Later on when we store this data in QImode quantities into an unaligned destination, we zero-extend it again for the purpose of right-shifting, such as with the test case included producing code at `-O2' as follows: ldah $2,unaligned_src_hi($29) !gprelhigh lda $1,unaligned_src_hi($2) !gprellow ldwu $6,unaligned_src_hi($2) !gprellow ldwu $5,2($1) ldwu $4,4($1) bis $31,$31,$31 zapnot $6,3,$3 # Redundant! ldbu $7,6($1) zapnot $5,3,$2 # Redundant! stb $6,0($16) zapnot $4,3,$1 # Redundant! stb $5,2($16) srl $3,8,$3 stb $4,4($16) srl $2,8,$2 stb $3,1($16) srl $1,8,$1 stb $2,3($16) stb $1,5($16) stb $7,6($16) The non-BWX case is unaffected, because there we use byte insertion, so we don't care that data is held in a HImode register. Address this by making the holding RTX a HImode subreg of the original DImode register, which the RTL passes can then see through and eliminate the zero-extension where otherwise required, resulting in this shortened code: ldah $2,unaligned_src_hi($29) !gprelhigh lda $1,unaligned_src_hi($2) !gprellow ldwu $4,unaligned_src_hi($2) !gprellow ldwu $3,2($1) ldwu $2,4($1) bis $31,$31,$31 srl $4,8,$6 ldbu $1,6($1) srl $3,8,$5 stb $4,0($16) stb $6,1($16) srl $2,8,$4 stb $3,2($16) stb $5,3($16) stb $2,4($16) stb $4,5($16) stb $1,6($16) While at it reformat the enclosing do-while statement according to the GNU Coding Standards, observing that in this case it does not obfuscate the change owing to the odd original indentation. gcc/ * config/alpha/alpha.cc (alpha_expand_block_move): Use a HImode subreg of a DImode register to hold data from an aligned HImode load. --- gcc/config/alpha/alpha.cc | 17 +++++++++++------ .../gcc.target/alpha/memcpy-hi-unaligned-dst.c | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 8ec9e8c5d399..6965ece16d0b 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -3999,14 +3999,19 @@ alpha_expand_block_move (rtx operands[]) if (bytes >= 2) { if (src_align >= 16) - { - do { - data_regs[nregs++] = tmp = gen_reg_rtx (HImode); - emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); + do + { + tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, + expand_simple_unop (DImode, SET, + adjust_address (orig_src, + HImode, ofs), + NULL_RTX, 1)); + data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0); bytes -= 2; ofs += 2; - } while (bytes >= 2); - } + } + while (bytes >= 2); else if (! TARGET_BWX) { data_regs[nregs++] = tmp = gen_reg_rtx (HImode); diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c new file mode 100644 index 000000000000..4e3c02f5b906 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mbwx" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned short unaligned_src_hi[4]; + +void +memcpy_unaligned_dst_hi (void *dst) +{ + __builtin_memcpy (dst, unaligned_src_hi, 7); +} + +/* { dg-final { scan-assembler-times "\\sldwu\\s" 3 } } */ +/* { dg-final { scan-assembler-times "\\sldbu\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstb\\s" 7 } } */ +/* { dg-final { scan-assembler-not "\\szapnot\\s" } } */ -- GitLab