From 9371640999eedb8bac3fb9d1429db8a1a905b853 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> Date: Tue, 6 Jun 2023 10:51:34 +0100 Subject: [PATCH] aarch64: Reimplement URSHR,SRSHR patterns with standard RTL codes Having converted the patterns for the URSRA,SRSRA instructions to standard RTL codes we can also easily convert the non-accumulating forms URSHR,SRSHR. This patch does that, reusing the various helpers and predicates from that patch in a straightforward way. This allows GCC to perform the optimisations in the testcase, matching what Clang does. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<sur>shr_n<mode>): Delete. (aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn): New define_insn. (aarch64_<sra_op>rshr_n<mode>): New define_expand. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vrshr_1.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 44 ++++++++++++--- .../gcc.target/aarch64/simd/vrshr_1.c | 56 +++++++++++++++++++ 2 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f7cf39f930ca..dd1b084f8569 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6586,17 +6586,47 @@ ;; vrshr_n -(define_insn "aarch64_<sur>shr_n<mode>" +(define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn" [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 - "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] - VRSHR_N))] - "TARGET_SIMD" - "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" + (truncate:VSDQ_I_DI + (SHIFTRT:<V2XWIDE> + (plus:<V2XWIDE> + (<SHIFTEXTEND>:<V2XWIDE> + (match_operand:VSDQ_I_DI 1 "register_operand" "w")) + (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))] + "TARGET_SIMD + && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])" + "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" [(set_attr "type" "neon_sat_shift_imm<q>")] ) +(define_expand "aarch64_<sra_op>rshr_n<mode>" + [(match_operand:VSDQ_I_DI 0 "register_operand") + (SHIFTRT:VSDQ_I_DI + (match_operand:VSDQ_I_DI 1 "register_operand") + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))] + "TARGET_SIMD" + { + /* Use this expander to create the rounding constant vector, which is + 1 << (shift - 1). Use wide_int here to ensure that the right TImode + RTL is generated when handling the DImode expanders. */ + int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec); + rtx shft = gen_int_mode (INTVAL (operands[2]), DImode); + rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode)); + if (VECTOR_MODE_P (<MODE>mode)) + { + shft = gen_const_vec_duplicate (<MODE>mode, shft); + rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd); + } + + emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1], + shft, rnd)); + DONE; + } +) + ;; v(r)sra_n (define_insn "aarch64_<sur>sra_ndi" diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c new file mode 100644 index 000000000000..a5e10ff99e93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vrshr_1.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** foo1: +** srsra v0\.16b, v1\.16b, 3 +** ret +*/ + +int8x16_t +foo1 (int8x16_t acc, int8x16_t a) +{ + return vaddq_s8 (acc, vrshrq_n_s8 (a, 3)); +} + +/* +** foo2: +** srshr v0\.16b, v1\.16b, 3 +** ret +*/ + +int8x16_t +foo2 (int8x16_t acc, int8x16_t a) +{ + int8x16_t z = vdupq_n_s8 (0); + return vrsraq_n_s8 (z, a, 3); +} + +/* +** foo3: +** ursra v0\.16b, v1\.16b, 3 +** ret +*/ + +uint8x16_t +foo3 (uint8x16_t acc, uint8x16_t a) +{ + return vaddq_u8 (acc, vrshrq_n_u8 (a, 3)); +} + +/* +** foo4: +** urshr v0\.16b, v1\.16b, 3 +** ret +*/ + +uint8x16_t +foo4 (uint8x16_t acc, uint8x16_t a) +{ + uint8x16_t z = vdupq_n_u8 (0); + return vrsraq_n_u8 (z, a, 3); +} + -- GitLab