diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e8c951fe55e2698e47f25eb4376790b7e144ed15..68baf416045178b0ebcfeb8de2d201f625f1c317 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4664,8 +4664,74 @@ [(set_attr "type" "neon_sat_shift_reg<q>")] ) +(define_expand "vec_widen_<sur>shiftl_lo_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + +(define_expand "vec_widen_<sur>shiftl_hi_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + ;; vshll_n +(define_insn "aarch64_<sur>shll<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) + return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; + else + return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_<sur>shll2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) + return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; + else + return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + (define_insn "aarch64_<sur>shll_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c new file mode 100644 index 0000000000000000000000000000000000000000..48a3719d4baf17fcf1025e0871bae21ba47cc850 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps" } */ +#include <stdint.h> +#include <string.h> + +#pragma GCC target "+nosve" + +#define ARR_SIZE 1024 + +/* Should produce an shll,shll2 pair*/ +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + +__attribute__((optimize (0))) +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + + +void __attribute__((optimize (0))) +init(uint16_t *a, uint16_t *b) +{ + for( int i = 0; i < ARR_SIZE;i++) + { + a[i] = i; + b[i] = 2*i; + } +} + +int __attribute__((optimize (0))) +main() +{ + uint32_t foo_arr[ARR_SIZE]; + uint32_t bar_arr[ARR_SIZE]; + uint16_t a[ARR_SIZE]; + uint16_t b[ARR_SIZE]; + + init(a, b); + sshll_opt(foo_arr, a, b); + sshll_nonopt(bar_arr, a, b); + if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0) + return 1; + return 0; +} + +/* { dg-final { scan-assembler-times {\tshll\t} 1} } */ +/* { dg-final { scan-assembler-times {\tshll2\t} 1} } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f88f07a62df9f4906ac8b74b69b5f1955c69656b..a4980a931a99047c0fba9397caa74b91efdf0611 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4935,8 +4935,9 @@ vectorizable_conversion (vec_info *vinfo, &vec_oprnds1); if (code == WIDEN_LSHIFT_EXPR) { - vec_oprnds1.create (ncopies * ninputs); - for (i = 0; i < ncopies * ninputs; ++i) + int oprnds_size = vec_oprnds0.length (); + vec_oprnds1.create (oprnds_size); + for (i = 0; i < oprnds_size; ++i) vec_oprnds1.quick_push (op1); } /* Arguments are ready. Create the new vector stmts. */