diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 5e3a7ffacb42b947231e44e528ccee8e0915fdaf..effc2f24494766331ab9978eb15e707d1a9d4d90 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -919,8 +919,7 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) rtx scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) { - rtx tmp = gen_reg_rtx (vmode); - rtx src; + rtx src, tmp; /* Comparison against anything other than zero, requires an XOR. */ if (op2 != const0_rtx) { @@ -929,6 +928,7 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) /* If both operands are MEMs, explicitly load the OP1 into TMP. */ if (MEM_P (op1) && MEM_P (op2)) { + tmp = gen_reg_rtx (vmode); emit_insn_before (gen_rtx_SET (tmp, op1), insn); src = tmp; } @@ -943,34 +943,56 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) rtx op12 = XEXP (op1, 1); convert_op (&op11, insn); convert_op (&op12, insn); - if (MEM_P (op11)) + if (!REG_P (op11)) { + tmp = gen_reg_rtx (vmode); emit_insn_before (gen_rtx_SET (tmp, op11), insn); op11 = tmp; } src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12); } + else if (GET_CODE (op1) == AND) + { + rtx op11 = XEXP (op1, 0); + rtx op12 = XEXP (op1, 1); + convert_op (&op11, insn); + convert_op (&op12, insn); + if (!REG_P (op11)) + { + tmp = gen_reg_rtx (vmode); + emit_insn_before (gen_rtx_SET (tmp, op11), insn); + op11 = tmp; + } + return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, op11, op12), + UNSPEC_PTEST); + } else { convert_op (&op1, insn); src = op1; } - emit_insn_before (gen_rtx_SET (tmp, src), insn); + + if (!REG_P (src)) + { + tmp = gen_reg_rtx (vmode); + emit_insn_before (gen_rtx_SET (tmp, src), insn); + src = tmp; + } if (vmode == V2DImode) - emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (tmp), - copy_rtx_if_shared (tmp), - copy_rtx_if_shared (tmp)), - insn); + { + tmp = gen_reg_rtx (vmode); + emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn); + src = tmp; + } else if (vmode == V4SImode) - emit_insn_before (gen_sse2_pshufd (copy_rtx_if_shared (tmp), - copy_rtx_if_shared (tmp), - const0_rtx), - insn); - - return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (tmp), - copy_rtx_if_shared (tmp)), - UNSPEC_PTEST); + { + tmp = gen_reg_rtx (vmode); + emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn); + src = tmp; + } + + return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, src, src), UNSPEC_PTEST); } /* Helper function for converting INSN to vector mode. */ @@ -1289,6 +1311,9 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg) void timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) { + if (GET_MODE (*op) == V1TImode) + return; + *op = copy_rtx_if_shared (*op); if (REG_P (*op)) @@ -1296,19 +1321,19 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) else if (MEM_P (*op)) { rtx tmp = gen_reg_rtx (V1TImode); - emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (V1TImode, tmp, 0), + emit_insn_before (gen_rtx_SET (tmp, gen_gpr_to_xmm_move_src (V1TImode, *op)), insn); - *op = gen_rtx_SUBREG (V1TImode, tmp, 0); + *op = tmp; if (dump_file) fprintf (dump_file, " Preloading operand for insn %d into r%d\n", INSN_UID (insn), REGNO (tmp)); } - else if (CONST_INT_P (*op)) + else if (CONST_SCALAR_INT_P (*op)) { rtx vec_cst; - rtx tmp = gen_rtx_SUBREG (V1TImode, gen_reg_rtx (TImode), 0); + rtx tmp = gen_reg_rtx (V1TImode); /* Prefer all ones vector in case of -1. */ if (constm1_operand (*op, TImode)) @@ -1329,7 +1354,7 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) emit_insn_before (seq, insn); } - emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); + emit_insn_before (gen_move_insn (tmp, vec_cst), insn); *op = tmp; } else @@ -1609,14 +1634,26 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) rtx op2 = XEXP (src, 1); /* *cmp<dwi>_doubleword. */ - if ((CONST_INT_P (op1) + if ((CONST_SCALAR_INT_P (op1) || ((REG_P (op1) || MEM_P (op1)) && GET_MODE (op1) == mode)) - && (CONST_INT_P (op2) + && (CONST_SCALAR_INT_P (op2) || ((REG_P (op2) || MEM_P (op2)) && GET_MODE (op2) == mode))) return true; + /* *testti_doubleword. */ + if (op2 == const0_rtx + && GET_CODE (op1) == AND + && REG_P (XEXP (op1, 0))) + { + rtx op12 = XEXP (op1, 1); + return GET_MODE (XEXP (op1, 0)) == TImode + && (CONST_SCALAR_INT_P (op12) + || ((REG_P (op12) || MEM_P (op12)) + && GET_MODE (op12) == TImode)); + } + /* *test<dwi>_not_doubleword. */ if (op2 == const0_rtx && GET_CODE (op1) == AND @@ -1803,15 +1840,21 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) if (!MEM_P (dst) && GET_CODE (XEXP (src, 0)) == NOT && REG_P (XEXP (XEXP (src, 0), 0)) - && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)))) + && (REG_P (XEXP (src, 1)) + || CONST_SCALAR_INT_P (XEXP (src, 1)) + || timode_mem_p (XEXP (src, 1)))) return true; return REG_P (XEXP (src, 0)) - && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1))); + && (REG_P (XEXP (src, 1)) + || CONST_SCALAR_INT_P (XEXP (src, 1)) + || timode_mem_p (XEXP (src, 1))); case IOR: case XOR: return REG_P (XEXP (src, 0)) - && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1))); + && (REG_P (XEXP (src, 1)) + || CONST_SCALAR_INT_P (XEXP (src, 1)) + || timode_mem_p (XEXP (src, 1))); case NOT: return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0)); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 5e30dc884bf981408d9a8013f3f64c0ef276115b..5be76e1dd6ffb8288b31a15635b05e85cce245e1 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21063,11 +21063,25 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case UNSPEC: if (XINT (x, 1) == UNSPEC_TP) *total = 0; - else if (XINT(x, 1) == UNSPEC_VTERNLOG) + else if (XINT (x, 1) == UNSPEC_VTERNLOG) { *total = cost->sse_op; return true; } + else if (XINT (x, 1) == UNSPEC_PTEST) + { + *total = cost->sse_op; + if (XVECLEN (x, 0) == 2 + && GET_CODE (XVECEXP (x, 0, 0)) == AND) + { + rtx andop = XVECEXP (x, 0, 0); + *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop), + AND, opno, speed) + + rtx_cost (XEXP (andop, 1), GET_MODE (andop), + AND, opno, speed); + return true; + } + } return false; case VEC_SELECT: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fd30c573c270f147c944061d56bfa390f4bf0a95..2fde8cdf48b3b7afc1967ac111b7ab820be22408 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9756,6 +9756,27 @@ [(set_attr "type" "test") (set_attr "mode" "QI")]) +;; Provide a *testti instruction that STV can implement using ptest. +;; This pattern splits into *andti3_doubleword and *cmpti_doubleword. +(define_insn_and_split "*testti_doubleword" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (and:TI (match_operand:TI 0 "register_operand") + (match_operand:TI 1 "general_operand")) + (const_int 0)))] + "TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))] +{ + operands[2] = gen_reg_rtx (TImode); + if (!x86_64_hilo_general_operand (operands[1], TImode)) + operands[1] = force_reg (TImode, operands[1]); +}) + ;; Combine likes to form bit extractions for some tests. Humor it. (define_insn_and_split "*testqi_ext_3" [(set (match_operand 0 "flags_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 14d12d16c34f0b0948132ccd1913732237f45442..ccd9d002e9315a0ed9f80c14cd1dd1abff79b0e0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -23021,6 +23021,19 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) +(define_insn_and_split "*ptest<mode>_and" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(and:V_AVX (match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "vector_operand")) + (and:V_AVX (match_dup 0) (match_dup 1))] + UNSPEC_PTEST))] + "TARGET_SSE4_1 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))]) + (define_expand "nearbyint<mode>2" [(set (match_operand:VFH 0 "register_operand") (unspec:VFH diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c b/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c new file mode 100644 index 0000000000000000000000000000000000000000..5c5d803797be9f784e8e2699ea0ac7af942dab35 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse4.1 -mstv -mno-stackrealign" } */ + +__int128 a,b; +int foo() +{ + return (a & b) != 0; +} + +/* { dg-final { scan-assembler-not "pand" } } */ +/* { dg-final { scan-assembler "ptest" } } */