diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 79a0f905abc176098390ccfc2b3dbb899e2d6164..ddc5149027fa355f3be21ae8f9f9ac8b543a8fe8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -53,6 +53,11 @@ * config/sparc/sparc.md: Use register_or_zero_operand where rJ is the constraint. + * config/sparc/sparc.md (vec_perm_constv8qi, vec_perm<mode>): New + patterns. + * config/sparc/sparc.c (sparc_expand_vec_perm_bmask): New function. + * config/sparc/sparc-protos.h (sparc_expand_vec_perm_bmask): Declare. + 2011-10-17 David S. Miller <davem@davemloft.net> * config/sparc/sparc-modes.def: Add single entry vector modes for diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h index 744747a34d29c19ceae4ea8a9189f3f4a75b386e..4b15b82c07a33ece84a3ed65a6b36f85a825520c 100644 --- a/gcc/config/sparc/sparc-protos.h +++ b/gcc/config/sparc/sparc-protos.h @@ -107,6 +107,7 @@ extern rtx gen_df_reg (rtx, int); extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx); extern const char *output_v8plus_mult (rtx, rtx *, const char *); extern void sparc_expand_vector_init (rtx, rtx); +extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx); #endif /* RTX_CODE */ #endif /* __SPARC_PROTOS_H__ */ diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index b495690b0e6d2fa5fd342210acd075458085949c..a6601def3be8e754afb7640002a9ace57cb9f5be 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -10863,6 +10863,113 @@ sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval) emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); } +void +sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel) +{ + rtx t_1, t_2, t_3; + + sel = gen_lowpart (DImode, sel); + switch (vmode) + { + case V2SImode: + /* inp = xxxxxxxAxxxxxxxB */ + t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), + NULL_RTX, 1, OPTAB_DIRECT); + /* t_1 = ....xxxxxxxAxxx. */ + sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), + GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); + t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), + GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); + /* sel = .......B */ + /* t_1 = ...A.... */ + sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); + /* sel = ...A...B */ + sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); + /* sel = AAAABBBB * 4 */ + t_1 = force_reg (SImode, GEN_INT (0x01230123)); + /* sel = { A*4, A*4+1, A*4+2, ... } */ + break; + + case V4HImode: + /* inp = xxxAxxxBxxxCxxxD */ + t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), + NULL_RTX, 1, OPTAB_DIRECT); + t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), + NULL_RTX, 1, OPTAB_DIRECT); + t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), + NULL_RTX, 1, OPTAB_DIRECT); + /* t_1 = ..xxxAxxxBxxxCxx */ + /* t_2 = ....xxxAxxxBxxxC */ + /* t_3 = ......xxxAxxxBxx */ + sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), + GEN_INT (0x07), + NULL_RTX, 1, OPTAB_DIRECT); + t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), + GEN_INT (0x0700), + NULL_RTX, 1, OPTAB_DIRECT); + t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), + GEN_INT (0x070000), + NULL_RTX, 1, OPTAB_DIRECT); + t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), + GEN_INT (0x07000000), + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = .......D */ + /* t_1 = .....C.. */ + /* t_2 = ...B.... */ + /* t_3 = .A...... */ + sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); + t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); + sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); + /* sel = .A.B.C.D */ + sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); + /* sel = AABBCCDD * 2 */ + t_1 = force_reg (SImode, GEN_INT (0x01010101)); + /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ + break; + + case V8QImode: + /* input = xAxBxCxDxExFxGxH */ + sel = expand_simple_binop (DImode, AND, sel, + GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 + | 0x0f0f0f0f), + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = .A.B.C.D.E.F.G.H */ + t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), + NULL_RTX, 1, OPTAB_DIRECT); + /* t_1 = ..A.B.C.D.E.F.G. */ + sel = expand_simple_binop (DImode, IOR, sel, t_1, + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = .AABBCCDDEEFFGGH */ + sel = expand_simple_binop (DImode, AND, sel, + GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 + | 0xff00ff), + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = ..AB..CD..EF..GH */ + t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), + NULL_RTX, 1, OPTAB_DIRECT); + /* t_1 = ....AB..CD..EF.. */ + sel = expand_simple_binop (DImode, IOR, sel, t_1, + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = ..ABABCDCDEFEFGH */ + sel = expand_simple_binop (DImode, AND, sel, + GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), + NULL_RTX, 1, OPTAB_DIRECT); + /* sel = ....ABCD....EFGH */ + t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), + NULL_RTX, 1, OPTAB_DIRECT); + /* t_1 = ........ABCD.... */ + sel = gen_lowpart (SImode, sel); + t_1 = gen_lowpart (SImode, t_1); + break; + + default: + gcc_unreachable (); + } + + /* Always perform the final addition/merge within the bmask insn. */ + emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); +} + /* Implement TARGET_FRAME_POINTER_REQUIRED. */ static bool diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 75238c52acbd7469e4dda68a657782229918823d..d89f6f93b58b8e760a9c094728f3ddc429d5209a 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -8350,6 +8350,43 @@ [(set_attr "type" "fga") (set_attr "fptype" "double")]) +;; The rtl expanders will happily convert constant permutations on other +;; modes down to V8QI. Rely on this to avoid the complexity of the byte +;; order of the permutation. +(define_expand "vec_perm_constv8qi" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand:V8QI 1 "register_operand" "") + (match_operand:V8QI 2 "register_operand" "") + (match_operand:V8QI 3 "" "")] + "TARGET_VIS2" +{ + unsigned int i, mask; + rtx sel = operands[3]; + + for (i = mask = 0; i < 8; ++i) + mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4); + sel = force_reg (SImode, gen_int_mode (mask, SImode)); + + emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); + emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Unlike constant permutation, we can vastly simplify the compression of +;; the 64-bit selector input to the 32-bit %gsr value by knowing what the +;; width of the input is. +(define_expand "vec_perm<mode>" + [(match_operand:VM64 0 "register_operand" "") + (match_operand:VM64 1 "register_operand" "") + (match_operand:VM64 2 "register_operand" "") + (match_operand:VM64 3 "register_operand" "")] + "TARGET_VIS2" +{ + sparc_expand_vec_perm_bmask (<MODE>mode, operands[3]); + emit_insn (gen_bshuffle<mode>_vis (operands[0], operands[1], operands[2])); + DONE; +}) + ;; VIS 2.0 adds edge variants which do not set the condition codes (define_insn "edge8n<P:mode>_vis" [(set (match_operand:P 0 "register_operand" "=r")