From acef130fabdabd9c37b27e7ae1ad6c943a34a405 Mon Sep 17 00:00:00 2001 From: Roger Sayle <roger@eyesopen.com> Date: Mon, 10 Apr 2006 21:01:19 +0000 Subject: [PATCH] i386.c (ix86_expand_vector_init_one_nonzero): Renamed from ix86_expand_vector_init_low_nonzero. * config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed from ix86_expand_vector_init_low_nonzero. Take an additional one_var argument indicating which element is non-zero. Support one_var != 0 for V4SFmode and V4SImode by permuting the result. (ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero with one_var instead of ix86_expand_vector_init_low_nonzero. * gcc.target/i386/vecinit-1.c: New test case. * gcc.target/i386/vecinit-2.c: Likewise. From-SVN: r112832 --- gcc/ChangeLog | 9 +++ gcc/config/i386/i386.c | 71 ++++++++++++++++++++--- gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.target/i386/vecinit-1.c | 11 ++++ gcc/testsuite/gcc.target/i386/vecinit-2.c | 11 ++++ 5 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vecinit-1.c create mode 100644 gcc/testsuite/gcc.target/i386/vecinit-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 923be6c3e14a..a50a06ed8467 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2006-04-10 Roger Sayle <roger@eyesopen.com> + + * config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed + from ix86_expand_vector_init_low_nonzero. Take an additional + one_var argument indicating which element is non-zero. Support + one_var != 0 for V4SFmode and V4SImode by permuting the result. + (ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero + with one_var instead of ix86_expand_vector_init_low_nonzero. + 2006-04-10 Kazu Hirata <kazu@codesourcery.com> * Makefile.in (tree-into-ssa.o, tree-outof-ssa.o, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 907682547aa8..656268567961 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17880,15 +17880,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, } /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - whose low element is VAR, and other elements are zero. Return true + whose ONE_VAR element is VAR, and other elements are zero. Return true if successful. */ static bool -ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode, - rtx target, rtx var) +ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, + rtx target, rtx var, int one_var) { enum machine_mode vsimode; - rtx x; + rtx new_target; + rtx x, tmp; switch (mode) { @@ -17900,6 +17901,8 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode, case V2DFmode: case V2DImode: + if (one_var != 0) + return false; var = force_reg (GET_MODE_INNER (mode), var); x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); emit_insn (gen_rtx_SET (VOIDmode, target, x)); @@ -17907,10 +17910,55 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode, case V4SFmode: case V4SImode: + if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + new_target = gen_reg_rtx (mode); + else + new_target = target; var = force_reg (GET_MODE_INNER (mode), var); x = gen_rtx_VEC_DUPLICATE (mode, var); x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); + emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); + if (one_var != 0) + { + /* We need to shuffle the value to the correct position, so + create a new pseudo to store the intermediate result. */ + + /* With SSE2, we can use the integer shuffle insns. */ + if (mode != V4SFmode && TARGET_SSE2) + { + emit_insn (gen_sse2_pshufd_1 (new_target, new_target, + GEN_INT (1), + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0 : 1), + GEN_INT (one_var == 3 ? 0 : 1))); + if (target != new_target) + emit_move_insn (target, new_target); + return true; + } + + /* Otherwise convert the intermediate result to V4SFmode and + use the SSE1 shuffle instructions. */ + if (mode != V4SFmode) + { + tmp = gen_reg_rtx (V4SFmode); + emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); + } + else + tmp = new_target; + + emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, + GEN_INT (1), + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0+4 : 1+4), + GEN_INT (one_var == 3 ? 0+4 : 1+4))); + + if (mode != V4SFmode) + emit_move_insn (target, gen_lowpart (V4SImode, tmp)); + else if (tmp != target) + emit_move_insn (target, tmp); + } + else if (target != new_target) + emit_move_insn (target, new_target); return true; case V8HImode: @@ -17924,11 +17972,15 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode, vsimode = V2SImode; goto widen; widen: + if (one_var != 0) + return false; + /* Zero extend the variable element to SImode and recurse. */ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); x = gen_reg_rtx (vsimode); - if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var)) + if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, + var, one_var)) gcc_unreachable (); emit_move_insn (target, gen_lowpart (mode, x)); @@ -18185,9 +18237,10 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) the pool and overwritten via move later. */ if (n_var == 1) { - if (all_const_zero && one_var == 0 - && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target, - XVECEXP (vals, 0, 0))) + if (all_const_zero + && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, + XVECEXP (vals, 0, one_var), + one_var)) return; if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index becca442bf9f..7cf2a77e0aa2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2006-04-10 Roger Sayle <roger@eyesopen.com> + + * gcc.target/i386/vecinit-1.c: New test case. + * gcc.target/i386/vecinit-2.c: Likewise. + 2006-04-10 Jakub Jelinek <jakub@redhat.com> PR debug/27057 diff --git a/gcc/testsuite/gcc.target/i386/vecinit-1.c b/gcc/testsuite/gcc.target/i386/vecinit-1.c new file mode 100644 index 000000000000..86cad897669a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vecinit-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +#define vector __attribute__((vector_size(16))) + +float a; +vector float f1(void) { return (vector float){ a, 0.0, 0.0, 0.0}; } +vector float f2(void) { return (vector float){ 0.0, a, 0.0, 0.0}; } +vector float f3(void) { return (vector float){ 0.0, 0.0, a, 0.0}; } +vector float f4(void) { return (vector float){ 0.0, 0.0, 0.0, a}; } +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "xor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vecinit-2.c b/gcc/testsuite/gcc.target/i386/vecinit-2.c new file mode 100644 index 000000000000..41e502711a7c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vecinit-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +#define vector __attribute__((vector_size(16))) + +int a; +vector int f1(void) { return (vector int){ a, 0, 0, 0}; } +vector int f2(void) { return (vector int){ 0, a, 0, 0}; } +vector int f3(void) { return (vector int){ 0, 0, a, 0}; } +vector int f4(void) { return (vector int){ 0, 0, 0, a}; } +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "xor" } } */ -- GitLab