diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md index 0081b688ce97bd74d07ce0cd58a0f5a9d67c3a20..377c63f0d357a458ce93153b1eb5a00092cedd75 100644 --- a/gcc/config/mips/mips-msa.md +++ b/gcc/config/mips/mips-msa.md @@ -125,9 +125,6 @@ ;; Only floating-point modes. (define_mode_iterator FMSA [V2DF V4SF]) -;; Only used for immediate set shuffle elements instruction. -(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF]) - ;; The attribute gives the integer vector mode with same size. (define_mode_attr VIMODE [(V2DF "V2DI") @@ -2520,21 +2517,29 @@ (set_attr "mode" "<MODE>")]) (define_insn "msa_shf_<msafmt_f>" - [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f") - (vec_select:MSA_WHB_W - (match_operand:MSA_WHB_W 1 "register_operand" "f") + [(set (match_operand:MSA 0 "register_operand" "=f") + (vec_select:MSA + (match_operand:MSA 1 "register_operand" "f") (match_operand 2 "par_const_vector_shf_set_operand" "")))] "ISA_HAS_MSA" { - HOST_WIDE_INT val = 0; - unsigned int i; - - /* We convert the selection to an immediate. */ - for (i = 0; i < 4; i++) - val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i); - - operands[2] = GEN_INT (val); - return "shf.<msafmt>\t%w0,%w1,%X2"; + HOST_WIDE_INT rval = mips_msa_shf_i8 (operands); + /* 0b11100100 means that there is no shf needed at all. This RTL + should be optimized out in some pass. */ + if ((rval & 0xff) == 0xe4) + gcc_unreachable (); + operands[2] = GEN_INT (rval & 0xff); + switch (rval & 0xff00) + { + default: gcc_unreachable (); + case 0x400: + return "shf.w\t%w0,%w1,%X2"; + case 0x200: + return "shf.h\t%w0,%w1,%X2"; + case 0x100: + return "shf.b\t%w0,%w1,%X2"; + } + gcc_unreachable (); } [(set_attr "type" "simd_shf") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 75f80984c030ba4656071a85689b3737a996c643..90b4c87fdea15618ba8efec575fd084547281f33 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -387,6 +387,7 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code); extern void mips_register_frame_header_opt (void); extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *, bool); extern void mips_expand_vec_cmp_expr (rtx *); +extern HOST_WIDE_INT mips_msa_shf_i8 (rtx *); extern void mips_emit_speculation_barrier_function (void); diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 7d4791157d1258232fc7c8a9000086f323a0df32..6c797b6216435597ae9aa45b106dac209e6afbff 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -2079,6 +2079,72 @@ mips_const_vector_shuffle_set_p (rtx op, machine_mode mode) int nsets = nunits / 4; int set = 0; int i, j; + int val[4]; + bool ok; + + /* We support swapping 2 Doubleword part with shf.w. */ + if (ISA_HAS_MSA && (mode == V2DFmode || mode == V2DImode)) + { + if (!IN_RANGE (INTVAL (XVECEXP (op, 0, 0)), 0, 1) + || !IN_RANGE (INTVAL (XVECEXP (op, 0, 1)), 0, 1)) + return false; + } + + if (ISA_HAS_MSA && mode == V16QImode) + { + /* We can use shf.w if the elements are in-order inner 32bit. */ + ok = true; + for (j = 0; j < 4; j++) + { + val[0] = INTVAL (XVECEXP (op, 0, j * 4)); + val[1] = INTVAL (XVECEXP (op, 0, j * 4 + 1)); + val[2] = INTVAL (XVECEXP (op, 0, j * 4 + 2)); + val[3] = INTVAL (XVECEXP (op, 0, j * 4 + 3)); + if (val[0] != val[1] - 1 + || val[1] != val[2] - 1 + || val[2] != val[3] - 1) + ok = false; + if (val[0] != 0 && val[0] != 4 && val[0] != 8 && val[0] != 12) + ok = false; + } + if (ok) + return ok; + + /* We can use shf.h if the elements are in order inner 16bit. */ + ok = true; + for (j = 0; j < 4; j++) + { + val[0] = INTVAL (XVECEXP (op, 0, j * 2)); + val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1)); + val[2] = INTVAL (XVECEXP (op, 0, j * 2 + 8)); + val[3] = INTVAL (XVECEXP (op, 0, j * 2 + 1 + 8)); + if (val[0] != val[1] - 1 || val[2] != val[3] - 1) + ok = false; + if (val[0] != val[2] - 8 || val[1] != val[3] - 8) + ok = false; + if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6) + ok = false; + } + if (ok) + return ok; + } + + if (ISA_HAS_MSA && mode == V8HImode) + { + /* We can use shf.w if the elements are in-order inner 32bit. */ + ok = true; + for (j = 0; j < 4; j++) + { + val[0] = INTVAL (XVECEXP (op, 0, j * 2)); + val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1)); + if (val[0] != val[1] - 1) + ok = false; + if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6) + ok = false; + } + if (ok) + return ok; + } /* Check if we have the same 4-element sets. */ for (j = 0; j < nsets; j++, set = 4 * j) @@ -22304,6 +22370,89 @@ mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p) return gen_rtx_PARALLEL (VOIDmode, v); } +/* Construct and return i8 of SHF.df. No error will happen since tt has + been constrained by mips_const_vector_shuffle_set_p. + Return (IMM | (INSN << 8)): The range of IMM is [0, 0xFF]. + The INSN can be 0 (error)/1 (SHF.B)/2 (SHF.H)/4 (SHF.W). */ + +HOST_WIDE_INT +mips_msa_shf_i8 (rtx *operands) +{ + HOST_WIDE_INT rval = 0, val[16]; + unsigned int i; + machine_mode mode = GET_MODE (operands[0]); + int which_op = 0; + + /* We use shf.w to swap 2 doubleword part. */ + if (mode == V2DImode || mode == V2DFmode) + { + val[0] = INTVAL (XVECEXP (operands[2], 0, 0)); + val[1] = INTVAL (XVECEXP (operands[2], 0, 1)); + val[3] = val[1] == 0 ? 1 : 3; + val[2] = val[1] == 0 ? 0 : 2; + val[1] = val[0] == 0 ? 1 : 3; + val[0] = val[0] == 0 ? 0 : 2; + which_op = 4; + } + else if (mode == V16QImode) + { + for (i = 0; i < 16; i++) + val[i] = INTVAL (XVECEXP (operands[2], 0, i)); + if (val[1] - val[0] == 1 + && val[2] - val[1] == 1 + && val[3] - val[2] == 1) + { + which_op = 4; + val[0] = val[0] / 4; + val[1] = val[4] / 4; + val[2] = val[8] / 4; + val[3] = val[12] / 4; + } + else if (val[1] - val[0] == 1 + && val[3] - val[2] == 1) + { + which_op = 2; + val[0] = val[0] / 2; + val[1] = val[2] / 2; + val[2] = val[4] / 2; + val[3] = val[6] / 2; + } + else + which_op = 1; + } + else if (mode == V8HImode) + { + for (i = 0; i < 8; i++) + val[i] = INTVAL (XVECEXP (operands[2], 0, i)); + if (val[1] - val[0] == 1 + && val[3] - val[2] == 1 + && val[5] - val[4] == 1 + && val[7] - val[6] == 1) + { + which_op = 4; + val[0] = val[0] / 2; + val[1] = val[2] / 2; + val[2] = val[4] / 2; + val[3] = val[6] / 2; + } + else + which_op = 2; + } + else if (mode == V4SImode || mode == V4SFmode) + { + for (i = 0; i < 4; i++) + val[i] = INTVAL (XVECEXP (operands[2], 0, i)); + which_op = 4; + } + + /* We convert the selection to an immediate. */ + for (i = 0; i < 4; i++) + rval |= val[i] << (2 * i); + + rval |= (which_op << 8); + return rval; +} + /* A subroutine of mips_expand_vec_init, match constant vector elements. */ static inline bool