From 96559be74bfa355507472fc020c95c14587c227f Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay <avr@gjlay.de> Date: Fri, 5 Jul 2024 23:49:43 +0200 Subject: [PATCH] AVR: Create more opportunities for -mfuse-add optimization. avr_split_tiny_move() was only run for AVR_TINY because it has no PLUS addressing modes. Same applies to the X register on ordinary cores, and also to the Z register when used with [E]LPM. For example, without this patch long long addLL (long long *a, long long *b) { return *a + *b; } compiles with "-mmcu=atmgea128 -Os -dp" to: ... movw r26,r24 ; 80 [c=4 l=1] *movhi/0 movw r30,r22 ; 81 [c=4 l=1] *movhi/0 ld r18,X ; 82 [c=4 l=1] movqi_insn/3 adiw r26,1 ; 83 [c=4 l=3] movqi_insn/3 ld r19,X sbiw r26,1 adiw r26,2 ; 84 [c=4 l=3] movqi_insn/3 ld r20,X sbiw r26,2 adiw r26,3 ; 85 [c=4 l=3] movqi_insn/3 ld r21,X sbiw r26,3 adiw r26,4 ; 86 [c=4 l=3] movqi_insn/3 ld r22,X sbiw r26,4 adiw r26,5 ; 87 [c=4 l=3] movqi_insn/3 ld r23,X sbiw r26,5 adiw r26,6 ; 88 [c=4 l=3] movqi_insn/3 ld r24,X sbiw r26,6 adiw r26,7 ; 89 [c=4 l=2] movqi_insn/3 ld r25,X ld r10,Z ; 90 [c=4 l=1] movqi_insn/3 ... whereas with this patch it becomes: ... movw r26,r24 ; 80 [c=4 l=1] *movhi/0 movw r30,r22 ; 81 [c=4 l=1] *movhi/0 ld r18,X+ ; 140 [c=4 l=1] movqi_insn/3 ld r19,X+ ; 142 [c=4 l=1] movqi_insn/3 ld r20,X+ ; 144 [c=4 l=1] movqi_insn/3 ld r21,X+ ; 146 [c=4 l=1] movqi_insn/3 ld r22,X+ ; 148 [c=4 l=1] movqi_insn/3 ld r23,X+ ; 150 [c=4 l=1] movqi_insn/3 ld r24,X+ ; 152 [c=4 l=1] movqi_insn/3 ld r25,X ; 109 [c=4 l=1] movqi_insn/3 ld r10,Z ; 111 [c=4 l=1] movqi_insn/3 ... gcc/ * config/avr/avr.md: Also split with avr_split_tiny_move() for non-AVR_TINY. * config/avr/avr.cc (avr_split_tiny_move): Don't change memory references with base regs that can do PLUS addressing. (avr_out_lpm_no_lpmx) [POST_INC]: Don't output final ADIW when the address register is unused after. gcc/testsuite/ * gcc.target/avr/torture/fuse-add.c: New test. --- gcc/config/avr/avr.cc | 39 ++++++------ gcc/config/avr/avr.md | 3 +- .../gcc.target/avr/torture/fuse-add.c | 59 +++++++++++++++++++ 3 files changed, 80 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/gcc.target/avr/torture/fuse-add.c diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index f048bf5fd41c..d299fceb7824 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -4471,28 +4471,21 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen) gcc_assert (REG_Z == REGNO (XEXP (addr, 0)) && n_bytes <= 4); - if (regno_dest == LPM_REGNO) - avr_asm_len ("%4lpm" CR_TAB - "adiw %2,1", xop, plen, 2); - else - avr_asm_len ("%4lpm" CR_TAB - "mov %A0,%3" CR_TAB - "adiw %2,1", xop, plen, 3); + for (int i = 0; i < n_bytes; ++i) + { + rtx reg = simplify_gen_subreg (QImode, dest, GET_MODE (dest), i); - if (n_bytes >= 2) - avr_asm_len ("%4lpm" CR_TAB - "mov %B0,%3" CR_TAB - "adiw %2,1", xop, plen, 3); + if (i > 0) + avr_asm_len ("adiw %2,1", xop, plen, 1); - if (n_bytes >= 3) - avr_asm_len ("%4lpm" CR_TAB - "mov %C0,%3" CR_TAB - "adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm", xop, plen, 1); - if (n_bytes >= 4) - avr_asm_len ("%4lpm" CR_TAB - "mov %D0,%3" CR_TAB - "adiw %2,1", xop, plen, 3); + if (REGNO (reg) != LPM_REGNO) + avr_asm_len ("mov %0,r0", ®, plen, 1); + } + + if (! _reg_unused_after (insn, xop[2], false)) + avr_asm_len ("adiw %2,1", xop, plen, 1); break; /* POST_INC */ @@ -6685,6 +6678,14 @@ avr_split_tiny_move (rtx_insn * /*insn*/, rtx *xop) if (REGNO (base) > REG_Z) return false; + if (! AVR_TINY + // Only keep base registers that can't do PLUS addressing. + && ((REGNO (base) != REG_X + && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))) + || avr_load_libgcc_p (mem) + || avr_mem_memx_p (mem))) + return false; + bool volatile_p = MEM_VOLATILE_P (mem); bool mem_volatile_p = false; if (frame_pointer_needed diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index dabf4c0fc5a8..2783b8c986f1 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -1035,8 +1035,7 @@ [(parallel [(set (match_operand:MOVMODE 0 "nonimmediate_operand") (match_operand:MOVMODE 1 "general_operand")) (clobber (reg:CC REG_CC))])] - "AVR_TINY - && reload_completed + "reload_completed && avr_fuse_add > 0 // Only split this for .split2 when we are before // pass .avr-fuse-add (which runs after proep). diff --git a/gcc/testsuite/gcc.target/avr/torture/fuse-add.c b/gcc/testsuite/gcc.target/avr/torture/fuse-add.c new file mode 100644 index 000000000000..b78b1aa9fc92 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/fuse-add.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=gnu99" } */ + +typedef __UINT64_TYPE__ uint64_t; + +extern const uint64_t aa __asm ("real_aa"); +extern const uint64_t bb __asm ("real_bb"); + +__attribute__((used)) const uint64_t real_aa = 0x1122334455667788; +__attribute__((used)) const uint64_t real_bb = 0x0908070605040302; + +__attribute__((noinline,noclone)) +uint64_t add1 (const uint64_t *aa, const uint64_t *bb) +{ + return *aa + *bb; +} + +#ifdef __FLASH +extern const __flash uint64_t fa __asm ("real_fa"); +extern const __flash uint64_t fb __asm ("real_fb"); + +__attribute__((used)) const __flash uint64_t real_fa = 0x1122334455667788; +__attribute__((used)) const __flash uint64_t real_fb = 0x0908070605040302; + +__attribute__((noinline,noclone)) +uint64_t add2 (const __flash uint64_t *aa, const uint64_t *bb) +{ + return *aa + *bb; +} + +uint64_t add3 (const uint64_t *aa, const __flash uint64_t *bb) +{ + return *aa + *bb; +} + +uint64_t add4 (const __flash uint64_t *aa, const __flash uint64_t *bb) +{ + return *aa + *bb; +} +#endif /* have __flash */ + +int main (void) +{ + if (add1 (&aa, &bb) != real_aa + real_bb) + __builtin_exit (__LINE__); + +#ifdef __FLASH + if (add2 (&fa, &bb) != real_fa + real_bb) + __builtin_exit (__LINE__); + + if (add3 (&aa, &fb) != real_aa + real_fb) + __builtin_exit (__LINE__); + + if (add4 (&fa, &fb) != real_fa + real_fb) + __builtin_exit (__LINE__); +#endif + + return 0; +} -- GitLab