diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 5770491b30ce13ae7d43a39e47fc47f0d4287ffe..e65b24e2ad6aec014811c847be29c2f6f83951dd 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22899,19 +22899,19 @@ aarch64_advsimd_valid_immediate_hs (unsigned int val32, return false; } -/* Return true if replicating VAL64 is a valid immediate for the +/* Return true if replicating VAL64 with mode MODE is a valid immediate for the Advanced SIMD operation described by WHICH. If INFO is nonnull, use it to describe valid immediates. */ static bool aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64, + scalar_int_mode mode, simd_immediate_info *info, enum simd_immediate_check which) { unsigned int val32 = val64 & 0xffffffff; - unsigned int val16 = val64 & 0xffff; unsigned int val8 = val64 & 0xff; - if (val32 == (val64 >> 32)) + if (mode != DImode) { if ((which & AARCH64_CHECK_ORR) != 0 && aarch64_advsimd_valid_immediate_hs (val32, info, which, @@ -22924,9 +22924,7 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64, return true; /* Try using a replicated byte. */ - if (which == AARCH64_CHECK_MOV - && val16 == (val32 >> 16) - && val8 == (val16 >> 8)) + if (which == AARCH64_CHECK_MOV && mode == QImode) { if (info) *info = simd_immediate_info (QImode, val8); @@ -22954,28 +22952,15 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64, return false; } -/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV - instruction. If INFO is nonnull, use it to describe valid immediates. */ +/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE + MOV instruction. If INFO is nonnull, use it to describe valid + immediates. */ static bool -aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64, +aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode, simd_immediate_info *info) { - scalar_int_mode mode = DImode; - unsigned int val32 = val64 & 0xffffffff; - if (val32 == (val64 >> 32)) - { - mode = SImode; - unsigned int val16 = val32 & 0xffff; - if (val16 == (val32 >> 16)) - { - mode = HImode; - unsigned int val8 = val16 & 0xff; - if (val8 == (val16 >> 8)) - mode = QImode; - } - } - HOST_WIDE_INT val = trunc_int_for_mode (val64, mode); + HOST_WIDE_INT val = trunc_int_for_mode (ival, mode); if (IN_RANGE (val, -0x80, 0x7f)) { /* DUP with no shift. */ @@ -22990,7 +22975,7 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64, *info = simd_immediate_info (mode, val); return true; } - if (aarch64_bitmask_imm (val64, mode)) + if (aarch64_bitmask_imm (ival, mode)) { /* DUPM. */ if (info) @@ -23071,6 +23056,91 @@ aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info) return false; } +/* We can only represent floating point constants which will fit in + "quarter-precision" values. These values are characterised by + a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given + by: + + (-1)^s * (n/16) * 2^r + + Where: + 's' is the sign bit. + 'n' is an integer in the range 16 <= n <= 31. + 'r' is an integer in the range -3 <= r <= 4. + + Return true iff R represents a vale encodable into an AArch64 floating point + move instruction as an immediate. Othewise false. */ + +static bool +aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r) +{ + /* This represents our current view of how many bits + make up the mantissa. */ + int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + int exponent; + unsigned HOST_WIDE_INT mantissa, mask; + REAL_VALUE_TYPE m; + bool fail = false; + + /* We cannot represent infinities, NaNs or +/-zero. We won't + know if we have +zero until we analyse the mantissa, but we + can reject the other invalid values. */ + if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) + || REAL_VALUE_MINUS_ZERO (r)) + return false; + + /* Extract exponent. */ + r = real_value_abs (&r); + exponent = REAL_EXP (&r); + + /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the + highest (sign) bit, with a fixed binary point at bit point_pos. + m1 holds the low part of the mantissa, m2 the high part. + WARNING: If we ever have a representation using more than 2 * H_W_I - 1 + bits for the mantissa, this can fail (low bits will be lost). */ + real_ldexp (&m, &r, point_pos - exponent); + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); + + /* If the low part of the mantissa has bits set we cannot represent + the value. */ + if (fail || w.ulow () != 0) + return false; + + /* We have rejected the lower HOST_WIDE_INT, so update our + understanding of how many bits lie in the mantissa and + look only at the high HOST_WIDE_INT. */ + mantissa = w.elt (1); + point_pos -= HOST_BITS_PER_WIDE_INT; + + /* We can only represent values with a mantissa of the form 1.xxxx. */ + mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; + if ((mantissa & mask) != 0) + return false; + + /* Having filtered unrepresentable values, we may now remove all + but the highest 5 bits. */ + mantissa >>= point_pos - 5; + + /* We cannot represent the value 0.0, so reject it. This is handled + elsewhere. */ + if (mantissa == 0) + return false; + + /* Then, as bit 4 is always set, we can mask it off, leaving + the mantissa in the range [0, 15]. */ + mantissa &= ~(1 << 4); + gcc_assert (mantissa <= 15); + + /* GCC internally does not use IEEE754-like encoding (where normalized + significands are in the range [1, 2). GCC uses [0.5, 1) (see real.cc). + Our mantissa values are shifted 4 places to the left relative to + normalized IEEE754 so we must modify the exponent returned by REAL_EXP + by 5 places to correct for GCC's representation. */ + exponent = 5 - exponent; + + return (exponent >= 0 && exponent <= 7); +} + /* Return true if OP is a valid SIMD immediate for the operation described by WHICH. If INFO is nonnull, use it to describe valid immediates. */ @@ -23124,20 +23194,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, else return false; - scalar_float_mode elt_float_mode; - if (n_elts == 1 - && is_a <scalar_float_mode> (elt_mode, &elt_float_mode)) - { - rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0); - if (aarch64_float_const_zero_rtx_p (elt) - || aarch64_float_const_representable_p (elt)) - { - if (info) - *info = simd_immediate_info (elt_float_mode, elt); - return true; - } - } - /* If all elements in an SVE vector have the same value, we have a free choice between using the element mode and using the container mode. Using the element mode means that unused parts of the vector are @@ -23199,10 +23255,57 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes] << (i * BITS_PER_UNIT)); + /* Try encoding the integer immediate as a floating point value if it's an + exact value. */ + scalar_float_mode fmode = DFmode; + scalar_int_mode imode = DImode; + unsigned HOST_WIDE_INT ival = val64; + unsigned int val32 = val64 & 0xffffffff; + if (val32 == (val64 >> 32)) + { + fmode = SFmode; + imode = SImode; + ival = val32; + unsigned int val16 = val32 & 0xffff; + if (val16 == (val32 >> 16)) + { + fmode = HFmode; + imode = HImode; + ival = val16; + unsigned int val8 = val16 & 0xff; + if (val8 == (val16 >> 8)) + { + imode = QImode; + ival = val8; + } + } + } + + if (which == AARCH64_CHECK_MOV + && imode != QImode + && (imode != HImode || TARGET_FP_F16INST)) + { + long int as_long_ints[2]; + as_long_ints[0] = ival & 0xFFFFFFFF; + as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF; + + REAL_VALUE_TYPE r; + real_from_target (&r, as_long_ints, fmode); + if (aarch64_real_float_const_representable_p (r)) + { + if (info) + { + rtx float_val = const_double_from_real_value (r, fmode); + *info = simd_immediate_info (fmode, float_val); + } + return true; + } + } + if (vec_flags & VEC_SVE_DATA) - return aarch64_sve_valid_immediate (val64, info); + return aarch64_sve_valid_immediate (ival, imode, info); else - return aarch64_advsimd_valid_immediate (val64, info, which); + return aarch64_advsimd_valid_immediate (val64, imode, info, which); } /* Check whether X is a VEC_SERIES-like constant that starts at 0 and @@ -25205,106 +25308,29 @@ aarch64_c_mode_for_suffix (char suffix) return VOIDmode; } -/* We can only represent floating point constants which will fit in - "quarter-precision" values. These values are characterised by - a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given - by: - - (-1)^s * (n/16) * 2^r - - Where: - 's' is the sign bit. - 'n' is an integer in the range 16 <= n <= 31. - 'r' is an integer in the range -3 <= r <= 4. */ - -/* Return true iff X can be represented by a quarter-precision +/* Return true iff X with mode MODE can be represented by a quarter-precision floating point immediate operand X. Note, we cannot represent 0.0. */ + bool aarch64_float_const_representable_p (rtx x) { - /* This represents our current view of how many bits - make up the mantissa. */ - int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; - int exponent; - unsigned HOST_WIDE_INT mantissa, mask; - REAL_VALUE_TYPE r, m; - bool fail; - x = unwrap_const_vec_duplicate (x); + machine_mode mode = GET_MODE (x); if (!CONST_DOUBLE_P (x)) return false; - if (GET_MODE (x) == VOIDmode - || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST)) - return false; - - r = *CONST_DOUBLE_REAL_VALUE (x); - - /* We cannot represent infinities, NaNs or +/-zero. We won't - know if we have +zero until we analyse the mantissa, but we - can reject the other invalid values. */ - if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) - || REAL_VALUE_MINUS_ZERO (r)) + if ((mode == HFmode && !TARGET_FP_F16INST) + || mode == BFmode) return false; - /* For BFmode, only handle 0.0. */ - if (GET_MODE (x) == BFmode) - return real_iszero (&r, false); + REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x); - /* Extract exponent. */ - r = real_value_abs (&r); - exponent = REAL_EXP (&r); - - /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the - highest (sign) bit, with a fixed binary point at bit point_pos. - m1 holds the low part of the mantissa, m2 the high part. - WARNING: If we ever have a representation using more than 2 * H_W_I - 1 - bits for the mantissa, this can fail (low bits will be lost). */ - real_ldexp (&m, &r, point_pos - exponent); - wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); - - /* If the low part of the mantissa has bits set we cannot represent - the value. */ - if (w.ulow () != 0) - return false; - /* We have rejected the lower HOST_WIDE_INT, so update our - understanding of how many bits lie in the mantissa and - look only at the high HOST_WIDE_INT. */ - mantissa = w.elt (1); - point_pos -= HOST_BITS_PER_WIDE_INT; - - /* We can only represent values with a mantissa of the form 1.xxxx. */ - mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; - if ((mantissa & mask) != 0) - return false; - - /* Having filtered unrepresentable values, we may now remove all - but the highest 5 bits. */ - mantissa >>= point_pos - 5; - - /* We cannot represent the value 0.0, so reject it. This is handled - elsewhere. */ - if (mantissa == 0) - return false; - - /* Then, as bit 4 is always set, we can mask it off, leaving - the mantissa in the range [0, 15]. */ - mantissa &= ~(1 << 4); - gcc_assert (mantissa <= 15); - - /* GCC internally does not use IEEE754-like encoding (where normalized - significands are in the range [1, 2). GCC uses [0.5, 1) (see real.cc). - Our mantissa values are shifted 4 places to the left relative to - normalized IEEE754 so we must modify the exponent returned by REAL_EXP - by 5 places to correct for GCC's representation. */ - exponent = 5 - exponent; - - return (exponent >= 0 && exponent <= 7); + return aarch64_real_float_const_representable_p (r); } -/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC - immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to - output MOVI/MVNI, ORR or BIC immediate. */ +/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR, BIC or + FMOV immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether + to output MOVI/MVNI, ORR or BIC immediate. */ char* aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, enum simd_immediate_check which) diff --git a/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c new file mode 100644 index 0000000000000000000000000000000000000000..e080afed8aa3578660027979335bfc859ca6bc91 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv9-a -Ofast" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** g: +** fmov v0\.4s, 1\.0e\+0 +** ret +*/ +float32x4_t g(){ + return vdupq_n_f32(1); +} + +/* +** h: +** fmov v0\.4s, 1\.0e\+0 +** ret +*/ +uint32x4_t h() { + return vreinterpretq_u32_f32(g()); +} + +/* +** f1: +** fmov v0\.4s, 1\.0e\+0 +** ret +*/ +uint32x4_t f1() { + return vdupq_n_u32(0x3f800000); +} + +/* +** f2: +** fmov v0\.4s, 1\.5e\+0 +** ret +*/ +uint32x4_t f2() { + return vdupq_n_u32(0x3FC00000); +} + +/* +** f3: +** fmov v0\.4s, 1\.25e\+0 +** ret +*/ +uint32x4_t f3() { + return vdupq_n_u32(0x3FA00000); +} + +/* +** f4: +** fmov v0\.2d, 1\.0e\+0 +** ret +*/ +uint64x2_t f4() { + return vdupq_n_u64(0x3FF0000000000000); +} + +/* +** fn4: +** fmov v0\.2d, -1\.0e\+0 +** ret +*/ +uint64x2_t fn4() { + return vdupq_n_u64(0xBFF0000000000000); +} + +/* +** f5: +** fmov v0\.8h, 1\.5e\+0 +** ret +*/ +uint16x8_t f5() { + return vdupq_n_u16(0x3E00); +} + +/* +** f6: +** adrp x0, \.LC0 +** ldr q0, \[x0, #:lo12:\.LC0\] +** ret +*/ +uint32x4_t f6() { + return vdupq_n_u32(0x4f800000); +}