diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c54b29cd64b9e0dc6c6d12735049386ccedc5408..ec9c731498815d6efa2066fe6d6d5bd19591a3f5 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -7218,13 +7218,12 @@ } ) -;; For copysign (x, y), we want to generate: +;; For copysignf (x, y), we want to generate: ;; -;; LDR d2, #(1 << 63) -;; BSL v2.8b, [y], [x] +;; movi v31.4s, 0x80, lsl 24 +;; bit v0.16b, v1.16b, v31.16b ;; -;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because -;; we expect these operations to nearly always operate on +;; Because we expect these operations to nearly always operate on ;; floating-point values, we do not want the operation to be ;; simplified into a bit-field insert operation that operates on the ;; integer side, since typically that would involve three inter-bank @@ -7239,32 +7238,25 @@ (match_operand:GPF 2 "nonmemory_operand")] "TARGET_SIMD" { - rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U - << (GET_MODE_BITSIZE (<MODE>mode) - 1)); - /* copysign (x, -1) should instead be expanded as orr with the sign - bit. */ + rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1)); + rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign); + v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask); + + /* copysign (x, -1) should instead be expanded as orr with the signbit. */ rtx op2_elt = unwrap_const_vec_duplicate (operands[2]); + if (GET_CODE (op2_elt) == CONST_DOUBLE && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) { - rtx v_bitmask - = force_reg (V2<V_INT_EQUIV>mode, - gen_const_vec_duplicate (V2<V_INT_EQUIV>mode, - signbit_const)); - - emit_insn (gen_iorv2<v_int_equiv>3 ( - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode), - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode), + emit_insn (gen_ior<vq_int_equiv>3 ( + lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode), + lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode), v_bitmask)); DONE; } - - machine_mode int_mode = <V_INT_EQUIV>mode; - rtx bitmask = gen_reg_rtx (int_mode); - emit_move_insn (bitmask, signbit_const); operands[2] = force_reg (<MODE>mode, operands[2]); emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], - bitmask)); + v_bitmask)); DONE; } ) @@ -7273,23 +7265,21 @@ [(set (match_operand:GPF 0 "register_operand") (unspec:GPF [(match_operand:GPF 1 "register_operand") (match_operand:GPF 2 "register_operand") - (match_operand:<V_INT_EQUIV> 3 "register_operand")] + (match_operand:<VQ_INT_EQUIV> 3 "register_operand")] UNSPEC_COPYSIGN))] "TARGET_SIMD" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ] [ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> [ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> [ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> - [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1> } ) - -;; For xorsign (x, y), we want to generate: +;; For xorsignf (x, y), we want to generate: ;; -;; LDR d2, #1<<63 -;; AND v3.8B, v1.8B, v2.8B -;; EOR v0.8B, v0.8B, v3.8B +;; movi v31.4s, 0x80, lsl 24 +;; and v31.16b, v31.16b, v1.16b +;; eor v0.16b, v31.16b, v0.16b ;; (define_expand "@xorsign<mode>3" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index efba78375c26d6a22bb1554ddccd1cec171c099a..e13837504208d55a1cd28fc9469d09a3298799bf 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1891,6 +1891,14 @@ (VNx8SF "vnx8si") (VNx16SF "vnx16si") ]) +;; Mode with floating-point values replaced by 128-bit vector integers. +(define_mode_attr VQ_INT_EQUIV [(DF "V2DI") (SF "V4SI") +]) + +;; Lower case mode with floating-point values replaced by 128-bit vector integers. +(define_mode_attr vq_int_equiv [(DF "v2di") (SF "v4si") +]) + ;; Floating-point equivalent of selected modes. (define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF") (VNx8BF "VNx8HF") diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c new file mode 100644 index 0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +float f1 (float x, float y) +{ + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); +} + +double f2 (double x, double y) +{ + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); +} + +/* { dg-final { scan-assembler-times "movi\t" 2 } } */ +/* { dg-final { scan-assembler-not "copysign\tw" } } */ +/* { dg-final { scan-assembler-not "dup\tw" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c new file mode 100644 index 0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve" } */ + +float f1 (float x, float y) +{ + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); +} + +double f2 (double x, double y) +{ + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); +} + +/* { dg-final { scan-assembler-times "movi\t" 1 } } */ +/* { dg-final { scan-assembler-times "mov\tz" 1 } } */ +/* { dg-final { scan-assembler-not "copysign\tw" } } */ +/* { dg-final { scan-assembler-not "dup\tw" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c index 18d10ee834d5d9b4361d890447060e78f09d3a73..9fe8e9bde6965875816e2aa722c36028ac233198 100644 --- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c @@ -9,7 +9,7 @@ /* ** f1: -** orr v[0-9]+.2s, #?128, lsl #?24 +** orr v[0-9]+.4s, #?128, lsl #?24 ** ret */ float32_t f1 (float32_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index fe08fe31fe87aab4a7ce8497d05488a42fe9ae21..cc97c95d1521be6693f3182b485bab2aa4b1daa0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -7,7 +7,7 @@ /* ** f1: -** orr v0.2s, #?128, lsl #?24 +** orr v0.4s, #?128, lsl #?24 ** ret */ float32_t f1 (float32_t a)