diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index e72132b4c28111aabfcbca7ff8acef5bee27909a..a4b05837fa6152aa607d7271254a2b26463300d9 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -213,6 +213,12 @@ "A signed 34-bit integer constant if prefixed instructions are supported." (match_operand 0 "cint34_operand")) +;; A SF/DF scalar constant or a vector constant that can be loaded into vector +;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW. +(define_constraint "eP" + "A constant that can be loaded into a VSX register with one prefixed insn." + (match_operand 0 "vsx_prefixed_constant")) + ;; A TF/KF scalar constant or a vector constant that can load certain IEEE ;; 128-bit constants into vector registers using LXVKQ. (define_constraint "eQ" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index be721673cba9c794a81c3da70d4af4f33f1240d2..886ace79075c77f8b0004c8470727d72e0540eb8 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -605,7 +605,10 @@ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) { - if (constant_generates_lxvkq (&vsx_const) != 0) + if (constant_generates_lxvkq (&vsx_const)) + return true; + + if (constant_generates_xxspltiw (&vsx_const)) return true; } @@ -617,6 +620,42 @@ return 0; }) +;; Return 1 if the operand is a 64-bit floating point scalar constant or a +;; vector constant that can be loaded to a VSX register with one prefixed +;; instruction, such as XXSPLTIDP or XXSPLTIW. +;; +;; In addition regular constants, we also recognize constants formed with the +;; VEC_DUPLICATE insn from scalar constants. +;; +;; We don't handle scalar integer constants here because the assumption is the +;; normal integer constants will be loaded into GPR registers. For the +;; constants that need to be loaded into vector registers, the instructions +;; don't work well with TImode variables assigned a constant. This is because +;; the 64-bit scalar constants are splatted into both halves of the register. + +(define_predicate "vsx_prefixed_constant" + (match_code "const_double,const_vector,vec_duplicate") +{ + /* If we can generate the constant with a few Altivec instructions, don't + generate a prefixed instruction. */ + if (CONST_VECTOR_P (op) && easy_altivec_constant (op, mode)) + return false; + + /* Do we have prefixed instructions and are VSX registers available? Is the + constant recognized? */ + if (!TARGET_PREFIXED || !TARGET_VSX) + return false; + + vec_const_128bit_type vsx_const; + if (!vec_const_128bit_to_bytes (op, mode, &vsx_const)) + return false; + + if (constant_generates_xxspltiw (&vsx_const)) + return true; + + return false; +}) + ;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded ;; via the LXVKQ instruction. @@ -683,7 +722,10 @@ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) { - if (constant_generates_lxvkq (&vsx_const) != 0) + if (constant_generates_lxvkq (&vsx_const)) + return true; + + if (constant_generates_xxspltiw (&vsx_const)) return true; } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4a2e7fa7af1d639d53d1fbc501cc6ed2dcd0d949..74699ab6f72f2c939622410f2661b65577d24cf7 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -198,6 +198,7 @@ enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); extern bool prefixed_load_p (rtx_insn *); extern bool prefixed_store_p (rtx_insn *); extern bool prefixed_paddi_p (rtx_insn *); +extern bool prefixed_xxsplti_p (rtx_insn *); extern void rs6000_asm_output_opcode (FILE *); extern void output_pcrel_opt_reloc (rtx); extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int); @@ -251,6 +252,7 @@ typedef struct { extern bool vec_const_128bit_to_bytes (rtx, machine_mode, vec_const_128bit_type *); extern unsigned constant_generates_lxvkq (vec_const_128bit_type *); +extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 0bc384464f637a95a7384c47ed830802b1fb12e2..5c1b620a1e21edeb8d32d4fb6b3479c2c3cfa14a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6649,6 +6649,13 @@ xxspltib_constant_p (rtx op, else if (IN_RANGE (value, -1, 0)) *num_insns_ptr = 1; + /* Do not generate XXSPLTIB and a sign extend operation if we can generate a + single XXSPLTIW or XXSPLTIDP instruction. */ + else if (vsx_prefixed_constant (op, mode)) + return false; + + /* Return XXSPLITB followed by a sign extend operation to convert the + constant to V8HImode or V4SImode. */ else *num_insns_ptr = 2; @@ -6709,6 +6716,13 @@ output_vec_const_move (rtx *operands) operands[2] = GEN_INT (imm); return "lxvkq %x0,%2"; } + + imm = constant_generates_xxspltiw (&vsx_const); + if (imm) + { + operands[2] = GEN_INT (imm); + return "xxspltiw %x0,%2"; + } } if (TARGET_P9_VECTOR @@ -26480,6 +26494,41 @@ prefixed_paddi_p (rtx_insn *insn) return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); } +/* Whether an instruction is a prefixed XXSPLTI* instruction. This is called + from the prefixed attribute processing. */ + +bool +prefixed_xxsplti_p (rtx_insn *insn) +{ + rtx set = single_set (insn); + if (!set) + return false; + + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + machine_mode mode = GET_MODE (dest); + + if (!REG_P (dest) && !SUBREG_P (dest)) + return false; + + if (GET_CODE (src) == UNSPEC) + { + int unspec = XINT (src, 1); + return (unspec == UNSPEC_XXSPLTIW + || unspec == UNSPEC_XXSPLTIDP + || unspec == UNSPEC_XXSPLTI32DX); + } + + vec_const_128bit_type vsx_const; + if (vec_const_128bit_to_bytes (src, mode, &vsx_const)) + { + if (constant_generates_xxspltiw (&vsx_const)) + return true; + } + + return false; +} + /* Whether the next instruction needs a 'p' prefix issued before the instruction is printed out. */ static bool prepend_p_to_next_insn; @@ -28648,6 +28697,40 @@ constant_generates_lxvkq (vec_const_128bit_type *vsx_const) return 0; } +/* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if + the XXSPLTIW instruction cannot be used. Otherwise return the immediate + value to be used with the XXSPLTIW instruction. */ + +unsigned +constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) +{ + if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) + return 0; + + if (!vsx_const->all_words_same) + return 0; + + /* If we can use XXSPLTIB, don't generate XXSPLTIW. */ + if (vsx_const->all_bytes_same) + return 0; + + /* See if we can use VSPLTISH or VSPLTISW. */ + if (vsx_const->all_half_words_same) + { + unsigned short h_word = vsx_const->half_words[0]; + short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000; + if (EASY_VECTOR_15 (sign_h_word)) + return 0; + } + + unsigned int word = vsx_const->words[0]; + int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (EASY_VECTOR_15 (sign_word)) + return 0; + + return vsx_const->words[0]; +} + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6bec2bddbdee23fef7356636da0e6a1c17c21e62..3a7bcd2426e7a13564e61da2069ecebce71783bd 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -314,6 +314,11 @@ (eq_attr "type" "integer,add") (if_then_else (match_test "prefixed_paddi_p (insn)") + (const_string "yes") + (const_string "no")) + + (eq_attr "type" "vecperm") + (if_then_else (match_test "prefixed_xxsplti_p (insn)") (const_string "yes") (const_string "no"))] diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index b7433ec4e30d854c3021b7a33dd5eb0851be9079..ec7b106fddb4fce0f62c9b4093647a766fd20e93 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -640,6 +640,10 @@ mprivileged Target Var(rs6000_privileged) Init(0) Generate code that will run in privileged state. +msplat-word-constant +Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save +Generate (do not generate) code that uses the XXSPLTIW instruction. + mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index de048408a03d1b6a8cd373a381be2c0dc8995e64..802db0d112b4a9946737c498e6268040eddc1ea0 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1192,19 +1192,19 @@ ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW -;; LXVKQ +;; LXVKQ XXSPLTI* ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov<mode>_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, r, we, ?wQ, ?&r, ??r, ??Y, <??r>, wa, v, - wa, + wa, wa, ?wa, v, <??r>, wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, we, r, r, wQ, Y, r, r, wE, jwM, - eQ, + eQ, eP, ?jwM, W, <nW>, v, wZ"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) @@ -1216,43 +1216,43 @@ [(set_attr "type" "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, store, load, store, *, vecsimple, vecsimple, - vecperm, + vecperm, vecperm, vecsimple, *, *, vecstore, vecload") (set_attr "num_insns" "*, *, *, 2, *, 2, 2, 2, 2, 2, *, *, - *, + *, *, *, 5, 2, *, *") (set_attr "max_prefixed_insns" "*, *, *, *, *, 2, 2, 2, 2, 2, *, *, - *, + *, *, *, *, *, *, *") (set_attr "length" "*, *, *, 8, *, 8, 8, 8, 8, 8, *, *, - *, + *, *, *, 20, 8, *, *") (set_attr "isa" "<VSisa>, <VSisa>, <VSisa>, *, *, *, *, *, *, *, p9v, *, - p10, + p10, p10, <VSisa>, *, *, *, *")]) ;; VSX store VSX load VSX move GPR load GPR store GPR move -;; LXVKQ +;; LXVKQ XXSPLTI* ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov<mode>_32bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO, wa, wa, ??r, ??Y, <??r>, - wa, + wa, wa, wa, v, ?wa, v, <??r>, wZ, v") (match_operand:VSX_M 1 "input_operand" "wa, ZwO, wa, Y, r, r, - eQ, + eQ, eP, wE, jwM, ?jwM, W, <nW>, v, wZ"))] @@ -1264,17 +1264,17 @@ } [(set_attr "type" "vecstore, vecload, vecsimple, load, store, *, - vecperm, + vecperm, vecperm, vecsimple, vecsimple, vecsimple, *, *, vecstore, vecload") (set_attr "length" "*, *, *, 16, 16, 16, - *, + *, *, *, *, *, 20, 16, *, *") (set_attr "isa" "<VSisa>, <VSisa>, <VSisa>, *, *, *, - p10, + p10, p10, p9v, *, <VSisa>, *, *, *, *")]) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 69cb7e3bfde203a022143d47f42dae8ab603efd9..9ec051e94e10cca9eec2773e1b8c01b74b6ea4db 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3336,6 +3336,10 @@ A constant whose negation is a signed 16-bit constant. @item eI A signed 34-bit integer constant if prefixed instructions are supported. +@item eP +A scalar floating point constant or a vector constant that can be +loaded to a VSX register with one prefixed instruction. + @item eQ An IEEE 128-bit constant that can be loaded into a VSX register with the @code{lxvkq} instruction. diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c new file mode 100644 index 0000000000000000000000000000000000000000..27764ddbc83e8289443c741732124b6fdf4adcb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +/* Test whether XXSPLTIW is generated for V16HI vector constants where the + first 4 elements are the same as the next 4 elements, etc. */ + +vector unsigned char +v16qi_const_1 (void) +{ + return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */ +} + +vector unsigned char +v16qi_const_2 (void) +{ + return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c new file mode 100644 index 0000000000000000000000000000000000000000..1f0475cf47ac4edef746433a0aeb5d48c4de561b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +/* Test whether XXSPLTIW is generated for V4SF vector constants. */ + +vector float +v4sf_const_1 (void) +{ + return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_nan (void) +{ + return (vector float) { __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf ("") }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_inf (void) +{ + return (vector float) { __builtin_inff (), + __builtin_inff (), + __builtin_inff (), + __builtin_inff () }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_m0 (void) +{ + return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */ +} + +vector float +v4sf_splats_1 (void) +{ + return vec_splats (1.0f); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_nan (void) +{ + return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_inf (void) +{ + return vec_splats (__builtin_inff ()); /* XXSPLTIW. */ +} + +vector float +v8hi_splats_m0 (void) +{ + return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c new file mode 100644 index 0000000000000000000000000000000000000000..02d0c6d66a26da9f188e8bdc97f6f11c76ca9c1e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure + the power9 support (XXSPLTIB/VEXTSB2W) is not done. */ + +vector int +v4si_const_1 (void) +{ + return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */ +} + +vector int +v4si_const_126 (void) +{ + return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector int +v4si_const_1023 (void) +{ + return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector int +v4si_splats_1 (void) +{ + return vec_splats (1); /* VSLTPISW. */ +} + +vector int +v4si_splats_126 (void) +{ + return vec_splats (126); /* XXSPLTIW. */ +} + +vector int +v8hi_splats_1023 (void) +{ + return vec_splats (1023); /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c new file mode 100644 index 0000000000000000000000000000000000000000..59418d3bb0a8ef0d1142843f2b73add30f1ef676 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> + +/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure + the power9 support (XXSPLTIB/VUPKLSB) is not done. */ + +vector short +v8hi_const_1 (void) +{ + return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */ +} + +vector short +v8hi_const_126 (void) +{ + return (vector short) { 126, 126, 126, 126, + 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector short +v8hi_const_1023 (void) +{ + return (vector short) { 1023, 1023, 1023, 1023, + 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1 (void) +{ + return vec_splats ((short)1); /* VSLTPISH. */ +} + +vector short +v8hi_splats_126 (void) +{ + return vec_splats ((short)126); /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1023 (void) +{ + return vec_splats ((short)1023); /* XXSPLTIW. */ +} + +/* Test that we can optimiza V8HI where all of the even elements are the same + and all of the odd elements are the same. */ +vector short +v8hi_const_1023_1000 (void) +{ + return (vector short) { 1023, 1000, 1023, 1000, + 1023, 1000, 1023, 1000 }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c index a135279b1d7f4e9fa516f21aba039683ac364140..6c01666b6257b897e8a8480c4c76694977e5061a 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c @@ -149,8 +149,8 @@ main (int argc, char *argv []) return 0; } -/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ -/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 3 } } */ /* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */