diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 5e3a7ffacb42b947231e44e528ccee8e0915fdaf..effc2f24494766331ab9978eb15e707d1a9d4d90 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -919,8 +919,7 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 rtx
 scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
 {
-  rtx tmp = gen_reg_rtx (vmode);
-  rtx src;
+  rtx src, tmp;
   /* Comparison against anything other than zero, requires an XOR.  */
   if (op2 != const0_rtx)
     {
@@ -929,6 +928,7 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
       if (MEM_P (op1) && MEM_P (op2))
 	{
+	  tmp = gen_reg_rtx (vmode);
 	  emit_insn_before (gen_rtx_SET (tmp, op1), insn);
 	  src = tmp;
 	}
@@ -943,34 +943,56 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
       rtx op12 = XEXP (op1, 1);
       convert_op (&op11, insn);
       convert_op (&op12, insn);
-      if (MEM_P (op11))
+      if (!REG_P (op11))
 	{
+	  tmp = gen_reg_rtx (vmode);
 	  emit_insn_before (gen_rtx_SET (tmp, op11), insn);
 	  op11 = tmp;
 	}
       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
     }
+  else if (GET_CODE (op1) == AND)
+    {
+      rtx op11 = XEXP (op1, 0);
+      rtx op12 = XEXP (op1, 1);
+      convert_op (&op11, insn);
+      convert_op (&op12, insn);
+      if (!REG_P (op11))
+	{
+	  tmp = gen_reg_rtx (vmode);
+	  emit_insn_before (gen_rtx_SET (tmp, op11), insn);
+	  op11 = tmp;
+	}
+      return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, op11, op12),
+			     UNSPEC_PTEST);
+    }
   else
     {
       convert_op (&op1, insn);
       src = op1;
     }
-  emit_insn_before (gen_rtx_SET (tmp, src), insn);
+
+  if (!REG_P (src))
+    {
+      tmp = gen_reg_rtx (vmode);
+      emit_insn_before (gen_rtx_SET (tmp, src), insn);
+      src = tmp;
+    }
 
   if (vmode == V2DImode)
-    emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (tmp),
-						  copy_rtx_if_shared (tmp),
-						  copy_rtx_if_shared (tmp)),
-		      insn);
+    {
+      tmp = gen_reg_rtx (vmode);
+      emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
+      src = tmp;
+    }
   else if (vmode == V4SImode)
-    emit_insn_before (gen_sse2_pshufd (copy_rtx_if_shared (tmp),
-				       copy_rtx_if_shared (tmp),
-				       const0_rtx),
-		      insn);
-
-  return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (tmp),
-					       copy_rtx_if_shared (tmp)),
-			 UNSPEC_PTEST);
+    {
+      tmp = gen_reg_rtx (vmode);
+      emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
+      src = tmp;
+    }
+
+  return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
 }
 
 /* Helper function for converting INSN to vector mode.  */
@@ -1289,6 +1311,9 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg)
 void
 timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
+  if (GET_MODE (*op) == V1TImode)
+    return;
+
   *op = copy_rtx_if_shared (*op);
 
   if (REG_P (*op))
@@ -1296,19 +1321,19 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
   else if (MEM_P (*op))
     {
       rtx tmp = gen_reg_rtx (V1TImode);
-      emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (V1TImode, tmp, 0),
+      emit_insn_before (gen_rtx_SET (tmp,
 				     gen_gpr_to_xmm_move_src (V1TImode, *op)),
 			insn);
-      *op = gen_rtx_SUBREG (V1TImode, tmp, 0);
+      *op = tmp;
 
       if (dump_file)
 	fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
 		 INSN_UID (insn), REGNO (tmp));
     }
-  else if (CONST_INT_P (*op))
+  else if (CONST_SCALAR_INT_P (*op))
     {
       rtx vec_cst;
-      rtx tmp = gen_rtx_SUBREG (V1TImode, gen_reg_rtx (TImode), 0);
+      rtx tmp = gen_reg_rtx (V1TImode);
 
       /* Prefer all ones vector in case of -1.  */
       if (constm1_operand (*op, TImode))
@@ -1329,7 +1354,7 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 	  emit_insn_before (seq, insn);
 	}
 
-      emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
+      emit_insn_before (gen_move_insn (tmp, vec_cst), insn);
       *op = tmp;
     }
   else
@@ -1609,14 +1634,26 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
   rtx op2 = XEXP (src, 1);
 
   /* *cmp<dwi>_doubleword.  */
-  if ((CONST_INT_P (op1)
+  if ((CONST_SCALAR_INT_P (op1)
        || ((REG_P (op1) || MEM_P (op1))
 	   && GET_MODE (op1) == mode))
-      && (CONST_INT_P (op2)
+      && (CONST_SCALAR_INT_P (op2)
 	  || ((REG_P (op2) || MEM_P (op2))
 	      && GET_MODE (op2) == mode)))
     return true;
 
+  /* *testti_doubleword.  */
+  if (op2 == const0_rtx
+      && GET_CODE (op1) == AND
+      && REG_P (XEXP (op1, 0)))
+    {
+      rtx op12 = XEXP (op1, 1);
+      return GET_MODE (XEXP (op1, 0)) == TImode
+	     && (CONST_SCALAR_INT_P (op12)
+		 || ((REG_P (op12) || MEM_P (op12))
+		     && GET_MODE (op12) == TImode));
+    }
+
   /* *test<dwi>_not_doubleword.  */
   if (op2 == const0_rtx
       && GET_CODE (op1) == AND
@@ -1803,15 +1840,21 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
       if (!MEM_P (dst)
 	  && GET_CODE (XEXP (src, 0)) == NOT
 	  && REG_P (XEXP (XEXP (src, 0), 0))
-	  && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1))))
+	  && (REG_P (XEXP (src, 1))
+	      || CONST_SCALAR_INT_P (XEXP (src, 1))
+	      || timode_mem_p (XEXP (src, 1))))
 	return true;
       return REG_P (XEXP (src, 0))
-	     && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+	     && (REG_P (XEXP (src, 1))
+		 || CONST_SCALAR_INT_P (XEXP (src, 1))
+		 || timode_mem_p (XEXP (src, 1)));
 
     case IOR:
     case XOR:
       return REG_P (XEXP (src, 0))
-	     && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+	     && (REG_P (XEXP (src, 1))
+		 || CONST_SCALAR_INT_P (XEXP (src, 1))
+		 || timode_mem_p (XEXP (src, 1)));
 
     case NOT:
       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 5e30dc884bf981408d9a8013f3f64c0ef276115b..5be76e1dd6ffb8288b31a15635b05e85cce245e1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21063,11 +21063,25 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
     case UNSPEC:
       if (XINT (x, 1) == UNSPEC_TP)
 	*total = 0;
-      else if (XINT(x, 1) == UNSPEC_VTERNLOG)
+      else if (XINT (x, 1) == UNSPEC_VTERNLOG)
 	{
 	  *total = cost->sse_op;
 	  return true;
 	}
+      else if (XINT (x, 1) == UNSPEC_PTEST)
+	{
+	  *total = cost->sse_op;
+	  if (XVECLEN (x, 0) == 2
+	      && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+	    {
+	      rtx andop = XVECEXP (x, 0, 0);
+	      *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
+				  AND, opno, speed)
+			+ rtx_cost (XEXP (andop, 1), GET_MODE (andop),
+				    AND, opno, speed);
+	      return true;
+	    }
+	}
       return false;
 
     case VEC_SELECT:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fd30c573c270f147c944061d56bfa390f4bf0a95..2fde8cdf48b3b7afc1967ac111b7ab820be22408 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9756,6 +9756,27 @@
   [(set_attr "type" "test")
    (set_attr "mode" "QI")])
 
+;; Provide a *testti instruction that STV can implement using ptest.
+;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
+(define_insn_and_split "*testti_doubleword"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:TI (match_operand:TI 0 "register_operand")
+		  (match_operand:TI 1 "general_operand"))
+	  (const_int 0)))]
+  "TARGET_64BIT
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
+{
+  operands[2] = gen_reg_rtx (TImode);
+  if (!x86_64_hilo_general_operand (operands[1], TImode))
+    operands[1] = force_reg (TImode, operands[1]);
+})
+
 ;; Combine likes to form bit extractions for some tests.  Humor it.
 (define_insn_and_split "*testqi_ext_3"
   [(set (match_operand 0 "flags_reg_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 14d12d16c34f0b0948132ccd1913732237f45442..ccd9d002e9315a0ed9f80c14cd1dd1abff79b0e0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -23021,6 +23021,19 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
+(define_insn_and_split "*ptest<mode>_and"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(and:V_AVX (match_operand:V_AVX 0 "register_operand")
+			       (match_operand:V_AVX 1 "vector_operand"))
+		    (and:V_AVX (match_dup 0) (match_dup 1))]
+		   UNSPEC_PTEST))]
+  "TARGET_SSE4_1
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
+
 (define_expand "nearbyint<mode>2"
   [(set (match_operand:VFH 0 "register_operand")
 	(unspec:VFH
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c b/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c5d803797be9f784e8e2699ea0ac7af942dab35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-stv-8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse4.1 -mstv -mno-stackrealign" } */
+
+__int128 a,b;
+int foo()
+{
+  return (a & b) != 0;
+}
+
+/* { dg-final { scan-assembler-not "pand" } } */
+/* { dg-final { scan-assembler "ptest" } } */