From ca659f6ed947ad43caf21917dca2aced344dc35b Mon Sep 17 00:00:00 2001 From: Richard Henderson <rth@redhat.com> Date: Mon, 30 Nov 2009 09:36:07 -0800 Subject: [PATCH] i386.c (avx_vperm2f128_parallel): New. * config/i386/i386.c (avx_vperm2f128_parallel): New. * config/i386/i386-protos.h: Declare it. * config/i386/predicates.md (avx_vperm2f128_v8sf_operand, avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New. * config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander. (*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3. (*avx_vperm2f128<mode>_nozero): New. From-SVN: r154832 --- gcc/ChangeLog | 10 ++++++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c | 52 ++++++++++++++++++++++++++++++ gcc/config/i386/predicates.md | 14 +++++++++ gcc/config/i386/sse.md | 59 ++++++++++++++++++++++++++++++++++- 5 files changed, 135 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8260bb6f2f85..96cacc0e0f78 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2009-11-30 Richard Henderson <rth@redhat.com> + + * config/i386/i386.c (avx_vperm2f128_parallel): New. + * config/i386/i386-protos.h: Declare it. + * config/i386/predicates.md (avx_vperm2f128_v8sf_operand, + avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New. + * config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander. + (*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3. + (*avx_vperm2f128<mode>_nozero): New. + 2009-11-30 Richard Henderson <rth@redhat.com> * config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 88acc1f82a6f..1e94cde200d5 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -48,6 +48,7 @@ extern bool x86_extended_reg_mentioned_p (rtx); extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode); +extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode); extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index be8f38ba1841..fa84e32a997c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24646,6 +24646,58 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode) /* Make sure success has a non-zero value by adding one. */ return mask + 1; } + +/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by + the expansion functions to turn the parallel back into a mask. + The return value is 0 for no match and the imm8+1 for a match. */ + +int +avx_vperm2f128_parallel (rtx par, enum machine_mode mode) +{ + unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; + unsigned mask = 0; + unsigned char ipar[8]; + + if (XVECLEN (par, 0) != (int) nelt) + return 0; + + /* Validate that all of the elements are constants, and not totally + out of range. Copy the data into an integral array to make the + subsequent checks easier. */ + for (i = 0; i < nelt; ++i) + { + rtx er = XVECEXP (par, 0, i); + unsigned HOST_WIDE_INT ei; + + if (!CONST_INT_P (er)) + return 0; + ei = INTVAL (er); + if (ei >= 2 * nelt) + return 0; + ipar[i] = ei; + } + + /* Validate that the halves of the permute are halves. */ + for (i = 0; i < nelt2 - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + for (i = nelt2; i < nelt - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + + /* Reconstruct the mask. */ + for (i = 0; i < 2; ++i) + { + unsigned e = ipar[i * nelt2]; + if (e % nelt2) + return 0; + e /= nelt2; + mask |= e << (i * 4); + } + + /* Make sure success has a non-zero value by adding one. */ + return mask + 1; +} /* Store OPERAND to the memory after reload is completed. This means diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 7200a6a21670..50a68d97ff8b 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1227,3 +1227,17 @@ (define_predicate "avx_vpermilp_v2df_operand" (and (match_code "parallel") (match_test "avx_vpermilp_parallel (op, V2DFmode)"))) + +;; Return 1 if OP is a parallel for a vperm2f128 permute. + +(define_predicate "avx_vperm2f128_v8sf_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V8SFmode)"))) + +(define_predicate "avx_vperm2f128_v8si_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V8SImode)"))) + +(define_predicate "avx_vperm2f128_v4df_operand" + (and (match_code "parallel") + (match_test "avx_vperm2f128_parallel (op, V4DFmode)"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 27c7a8b48426..b73820bc1d0e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11917,7 +11917,44 @@ (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) -(define_insn "avx_vperm2f128<mode>3" +(define_expand "avx_vperm2f128<mode>3" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "") + (unspec:AVX256MODE2P + [(match_operand:AVX256MODE2P 1 "register_operand" "") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_0_to_255_operand" "")] + UNSPEC_VPERMIL2F128))] + "TARGET_AVX" +{ + int mask = INTVAL (operands[2]); + if ((mask & 0x88) == 0) + { + rtx perm[<ssescalarnum>], t1, t2; + int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; + + base = (mask & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i] = GEN_INT (base + i); + + base = ((mask >> 4) & 3) * nelt2; + for (i = 0; i < nelt2; ++i) + perm[i + nelt2] = GEN_INT (base + i); + + t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode, + operands[1], operands[2]); + t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); + t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); + t2 = gen_rtx_SET (VOIDmode, operands[0], t2); + emit_insn (t2); + DONE; + } +}) + +;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which +;; means that in order to represent this properly in rtl we'd have to +;; nest *another* vec_concat with a zero operand and do the select from +;; a 4x wide vector. That doesn't seem very nice. +(define_insn "*avx_vperm2f128<mode>_full" [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") (unspec:AVX256MODE2P [(match_operand:AVX256MODE2P 1 "register_operand" "x") @@ -11932,6 +11969,26 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) +(define_insn "*avx_vperm2f128<mode>_nozero" + [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") + (vec_select:AVX256MODE2P + (vec_concat:<ssedoublesizemode> + (match_operand:AVX256MODE2P 1 "register_operand" "x") + (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) + (match_parallel 3 "avx_vperm2f128_<mode>_operand" + [(match_operand 4 "const_int_operand" "")])))] + "TARGET_AVX" +{ + int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; + operands[3] = GEN_INT (mask); + return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "V8SF")]) + (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>" [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x") (vec_concat:AVXMODEF4P -- GitLab