diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 886ace79075c77f8b0004c8470727d72e0540eb8..0d9f6a6fb72b8dab2aaaa1d1110994020761cb02 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -610,6 +610,9 @@ if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } /* Otherwise consider floating point constants hard, so that the @@ -653,6 +656,9 @@ if (constant_generates_xxspltiw (&vsx_const)) return true; + if (constant_generates_xxspltidp (&vsx_const)) + return true; + return false; }) @@ -727,6 +733,9 @@ if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } if (TARGET_P9_VECTOR diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 74699ab6f72f2c939622410f2661b65577d24cf7..3e03d37ac7ce7dec648fe1c2e3f5a9db512862df 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode, vec_const_128bit_type *); extern unsigned constant_generates_lxvkq (vec_const_128bit_type *); extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5c1b620a1e21edeb8d32d4fb6b3479c2c3cfa14a..e82a47f4c0ebb47ec91092f9eb87c4591efc04c7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6723,6 +6723,13 @@ output_vec_const_move (rtx *operands) operands[2] = GEN_INT (imm); return "xxspltiw %x0,%2"; } + + imm = constant_generates_xxspltidp (&vsx_const); + if (imm) + { + operands[2] = GEN_INT (imm); + return "xxspltidp %x0,%2"; + } } if (TARGET_P9_VECTOR @@ -26524,6 +26531,9 @@ prefixed_xxsplti_p (rtx_insn *insn) { if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } return false; @@ -28731,6 +28741,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) return vsx_const->words[0]; } +/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if + the XXSPLTIDP instruction cannot be used. Otherwise return the immediate + value to be used with the XXSPLTIDP instruction. */ + +unsigned +constant_generates_xxspltidp (vec_const_128bit_type *vsx_const) +{ + if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) + return 0; + + /* Reject if the two 64-bit segments are not the same. */ + if (!vsx_const->all_double_words_same) + return 0; + + /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP. + Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */ + if (vsx_const->all_bytes_same + || vsx_const->all_half_words_same + || vsx_const->all_words_same) + return 0; + + unsigned HOST_WIDE_INT value = vsx_const->double_words[0]; + + /* Avoid values that look like DFmode NaN's, except for the normal NaN bit + pattern and the signalling NaN bit pattern. Recognize infinity and + negative infinity. */ + + /* Bit representation of DFmode normal quiet NaN. */ +#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000) + + /* Bit representation of DFmode normal signaling NaN. */ +#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000) + + /* Bit representation of DFmode positive infinity. */ +#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000) + + /* Bit representation of DFmode negative infinity. */ +#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000) + + if (value != RS6000_CONST_DF_NAN + && value != RS6000_CONST_DF_NANS + && value != RS6000_CONST_DF_INF + && value != RS6000_CONST_DF_NEG_INF) + { + /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for + the exponent, and 52 bits for the mantissa (not counting the hidden + bit used for normal numbers). NaN values have the exponent set to all + 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */ + + int df_exponent = (value >> 52) & 0x7ff; + unsigned HOST_WIDE_INT + df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U); + + if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */ + return 0; + + /* Avoid values that are DFmode subnormal values. Subnormal numbers have + the exponent all 0 bits, and the mantissa non-zero. If the value is + subnormal, then the hidden bit in the mantissa is not set. */ + if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */ + return 0; + } + + /* Change the representation to DFmode constant. */ + long df_words[2] = { vsx_const->words[0], vsx_const->words[1] }; + + /* real_from_target takes the target words in target order. */ + if (!BYTES_BIG_ENDIAN) + std::swap (df_words[0], df_words[1]); + + REAL_VALUE_TYPE rv_type; + real_from_target (&rv_type, df_words, DFmode); + + const REAL_VALUE_TYPE *rv = &rv_type; + + /* Validate that the number can be stored as a SFmode value. */ + if (!exact_real_truncate (SFmode, rv)) + return 0; + + /* Validate that the number is not a SFmode subnormal value (exponent is 0, + mantissa field is non-zero) which is undefined for the XXSPLTIDP + instruction. */ + long sf_value; + real_to_target (&sf_value, rv, SFmode); + + /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent, + and 23 bits for the mantissa. Subnormal numbers have the exponent all + 0 bits, and the mantissa non-zero. */ + long sf_exponent = (sf_value >> 23) & 0xFF; + long sf_mantissa = sf_value & 0x7FFFFF; + + if (sf_exponent == 0 && sf_mantissa != 0) + return 0; + + /* Return the immediate to be used. */ + return sf_value; +} + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index ec7b106fddb4fce0f62c9b4093647a766fd20e93..c1d661d7e6bc0fb8d932b5b921ecc598e4d94227 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -644,6 +644,10 @@ msplat-word-constant Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save Generate (do not generate) code that uses the XXSPLTIW instruction. +msplat-float-constant +Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save +Generate (do not generate) code that uses the XXSPLTIDP instruction. + mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c index bd1502bb30a9aa13ba7f9790157b5703adcff137..dcb30e1d8865eee19cb57f1d9b9ade9dbc364504 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c @@ -24,11 +24,12 @@ vector signed long long splats4(void) return (vector signed long long) vec_sl(mzero, mzero); } -/* Codegen will consist of splat and shift instructions for most types. - If folding is enabled, the vec_sl tests using vector long long type will - generate a lvx instead of a vspltisw+vsld pair. */ +/* Codegen will consist of splat and shift instructions for most types. If + folding is enabled, the vec_sl tests using vector long long type will + generate a lvx instead of a vspltisw+vsld pair. On power10, it will + generate a xxspltidp instruction instead of the lvx. */ /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */ /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */ -/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c new file mode 100644 index 0000000000000000000000000000000000000000..82ffc86f8aaf322e887bcf3625ea71ec8fd07f0a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <math.h> + +/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP + instruction. */ + +vector double +v2df_double_0 (void) +{ + return (vector double) { 0.0, 0.0 }; /* XXSPLTIB or XXLXOR. */ +} + +vector double +v2df_double_1 (void) +{ + return (vector double) { 1.0, 1.0 }; /* XXSPLTIDP. */ +} + +#ifndef __FAST_MATH__ +vector double +v2df_double_m0 (void) +{ + return (vector double) { -0.0, -0.0 }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_nan (void) +{ + return (vector double) { __builtin_nan (""), + __builtin_nan ("") }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_inf (void) +{ + return (vector double) { __builtin_inf (), + __builtin_inf () }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_m_inf (void) +{ + return (vector double) { - __builtin_inf (), + - __builtin_inf () }; /* XXSPLTIDP. */ +} +#endif + +vector double +v2df_double_pi (void) +{ + return (vector double) { M_PI, M_PI }; /* PLVX. */ +} + +vector double +v2df_double_denorm (void) +{ + return (vector double) { (double)0x1p-149f, + (double)0x1p-149f }; /* PLVX. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c new file mode 100644 index 0000000000000000000000000000000000000000..4d44f943d266b03d4cbc3e9c408f4805e6fa7f3f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test generating V2DImode constants that have the same bit pattern as + V2DFmode constants that can be loaded with the XXSPLTIDP instruction with + the ISA 3.1 (power10). */ + +vector long long +vector_0 (void) +{ + /* XXSPLTIB or XXLXOR. */ + return (vector long long) { 0LL, 0LL }; +} + +vector long long +vector_1 (void) +{ + /* XXSPLTIB and VEXTSB2D. */ + return (vector long long) { 1LL, 1LL }; +} + +/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated + with XXSPLTISDP. */ +vector long long +vector_float_neg_0 (void) +{ + /* XXSPLTIDP. */ + return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL }; +} + +/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with + XXSPLTISDP. */ +vector long long +vector_float_1_0 (void) +{ + /* XXSPLTIDP. */ + return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL }; +} + +/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated + with XXSPLTIDP. */ +vector long long +scalar_pi (void) +{ + /* PLXV. */ + return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL }; +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */