diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 16e80f1497719fcc82b2214e8518cd22f30cdf1a..2f29cb5c235590125c377ea1fc24bf35f595b334 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2009-12-02  Sebastian Pop  <sebastian.pop@amd.com>
+	    Richard Henderson  <rth@redhat.com>
+
+	* config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed.
+	* config/i386/i386.c (ix86_fma4_valid_op_p): Removed.
+	* config/i386/i386.md: Do not use ix86_fma4_valid_op_p.
+	* config/i386/sse.md (fma4_*): Remove alternative with operand 1
+	matching a memory access.  Do not use ix86_fma4_valid_op_p.
+	(xop_*): Same.
+	Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters.
+
 2009-12-02  Richard Henderson  <rth@redhat.com>
 
 	* config/i386/i386.c (ix86_fixup_binary_operands): For FMA4, force
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bb55da16f2f74996e1505ada85407d3d1c70e6f0..cf29cc7068046aec8535524d19d3fd0d50a458b8 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -218,8 +218,7 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
 extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
 extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
-extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool);
-extern void ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
+extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
 
 extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 436e93585a35a859ebcc1391d79396d4eea92c98..ade3a7d6d327e73981a440fb281165fe2f0fa9d0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28807,197 +28807,35 @@ ix86_expand_round (rtx operand0, rtx operand1)
   emit_move_insn (operand0, res);
 }
 
-/* Validate whether a FMA4 instruction is valid or not.
-   OPERANDS is the array of operands.
-   NUM is the number of operands.
-   USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
-   NUM_MEMORY is the maximum number of memory operands to accept.
-   NUM_MEMORY less than zero is a special case to allow an operand
-   of an instruction to be memory operation.
-   when COMMUTATIVE is set, operand 1 and 2 can be swapped.  */
 
-bool
-ix86_fma4_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
-		      bool uses_oc0, int num_memory, bool commutative)
-{
-  int mem_mask;
-  int mem_count;
-  int i;
-
-  /* Count the number of memory arguments */
-  mem_mask = 0;
-  mem_count = 0;
-  for (i = 0; i < num; i++)
-    {
-      enum machine_mode mode = GET_MODE (operands[i]);
-      if (register_operand (operands[i], mode))
-	;
-
-      else if (memory_operand (operands[i], mode))
-	{
-	  mem_mask |= (1 << i);
-	  mem_count++;
-	}
-
-      else
-	{
-	  rtx pattern = PATTERN (insn);
-
-	  /* allow 0 for pcmov */
-	  if (GET_CODE (pattern) != SET
-	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
-	      || i < 2
-	      || operands[i] != CONST0_RTX (mode))
-	    return false;
-	}
-    }
-
-  /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
-     a memory operation.  */
-  if (num_memory < 0)
-    {
-      num_memory = -num_memory;
-      if ((mem_mask & (1 << (num-1))) != 0)
-	{
-	  mem_mask &= ~(1 << (num-1));
-	  mem_count--;
-	}
-    }
-
-  /* If there were no memory operations, allow the insn */
-  if (mem_mask == 0)
-    return true;
-
-  /* Do not allow the destination register to be a memory operand.  */
-  else if (mem_mask & (1 << 0))
-    return false;
-
-  /* If there are too many memory operations, disallow the instruction.  While
-     the hardware only allows 1 memory reference, before register allocation
-     for some insns, we allow two memory operations sometimes in order to allow
-     code like the following to be optimized:
+/* Fixup an FMA4 or XOP instruction that has 2 memory input references
+   into a form the hardware will allow by using the destination
+   register to load one of the memory operations.  Presently this is
+   used by the multiply/add routines to allow 2 memory references.  */
 
-	float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
-
-    or similar cases that are vectorized into using the vfmaddss
-    instruction.  */
-  else if (mem_count > num_memory)
-    return false;
-
-  /* Don't allow more than one memory operation if not optimizing.  */
-  else if (mem_count > 1 && !optimize)
-    return false;
-
-  else if (num == 4 && mem_count == 1)
-    {
-      /* formats (destination is the first argument), example vfmaddss:
-	 xmm1, xmm1, xmm2, xmm3/mem
-	 xmm1, xmm1, xmm2/mem, xmm3
-	 xmm1, xmm2, xmm3/mem, xmm1
-	 xmm1, xmm2/mem, xmm3, xmm1 */
-      if (uses_oc0)
-	return ((mem_mask == (1 << 1))
-		|| (mem_mask == (1 << 2))
-		|| (mem_mask == (1 << 3)));
-
-      /* format, example vpmacsdd:
-	 xmm1, xmm2, xmm3/mem, xmm1 */
-      if (commutative)
-	return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
-      else
-	return (mem_mask == (1 << 2));
-    }
-
-  else if (num == 4 && num_memory == 2)
-    {
-      /* If there are two memory operations, we can load one of the memory ops
-	 into the destination register.  This is for optimizing the
-	 multiply/add ops, which the combiner has optimized both the multiply
-	 and the add insns to have a memory operation.  We have to be careful
-	 that the destination doesn't overlap with the inputs.  */
-      rtx op0 = operands[0];
-
-      if (reg_mentioned_p (op0, operands[1])
-	  || reg_mentioned_p (op0, operands[2])
-	  || reg_mentioned_p (op0, operands[3]))
-	return false;
-
-      /* formats (destination is the first argument), example vfmaddss:
-	 xmm1, xmm1, xmm2, xmm3/mem
-	 xmm1, xmm1, xmm2/mem, xmm3
-	 xmm1, xmm2, xmm3/mem, xmm1
-	 xmm1, xmm2/mem, xmm3, xmm1
-
-         For the oc0 case, we will load either operands[1] or operands[3] into
-         operands[0], so any combination of 2 memory operands is ok.  */
-      if (uses_oc0)
-	return true;
-
-      /* format, example vpmacsdd:
-	 xmm1, xmm2, xmm3/mem, xmm1
-
-         For the integer multiply/add instructions be more restrictive and
-         require operands[2] and operands[3] to be the memory operands.  */
-      if (commutative)
-	return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
-      else
-	return (mem_mask == ((1 << 2) | (1 << 3)));
-    }
-
-  else if (num == 3 && num_memory == 1)
-    {
-      /* formats, example vprotb:
-	 xmm1, xmm2, xmm3/mem
-	 xmm1, xmm2/mem, xmm3 */
-      if (uses_oc0)
-	return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
-
-      /* format, example vpcomeq:
-	 xmm1, xmm2, xmm3/mem */
-      else
-	return (mem_mask == (1 << 2));
-    }
-
-  else
-    gcc_unreachable ();
-
-  return false;
-}
-
-
-/* Fixup an FMA4 instruction that has 2 memory input references into a form the
-   hardware will allow by using the destination register to load one of the
-   memory operations.  Presently this is used by the multiply/add routines to
-   allow 2 memory references.  */
-
-void
+bool
 ix86_expand_fma4_multiple_memory (rtx operands[],
 				  enum machine_mode mode)
 {
-  rtx op0 = operands[0];
+  rtx scratch = operands[0];
 
-  if (memory_operand (op0, mode)
-      || reg_mentioned_p (op0, operands[1])
-      || reg_mentioned_p (op0, operands[2])
-      || reg_mentioned_p (op0, operands[3]))
-    gcc_unreachable ();
+  gcc_assert (register_operand (operands[0], mode));
+  gcc_assert (register_operand (operands[1], mode));
+  gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
 
-  /* For 2 memory operands, pick either operands[1] or operands[3] to move into
-     the destination register.  */
-  if (memory_operand (operands[1], mode))
+  if (reg_mentioned_p (scratch, operands[1]))
     {
-      emit_move_insn (op0, operands[1]);
-      operands[1] = op0;
-    }
-  else if (memory_operand (operands[3], mode))
-    {
-      emit_move_insn (op0, operands[3]);
-      operands[3] = op0;
+      if (!can_create_pseudo_p ())
+	return false;
+      scratch = gen_reg_rtx (mode);
     }
-  else
-    gcc_unreachable ();
 
-  return;
+  emit_move_insn (scratch, operands[3]);
+  if (rtx_equal_p (operands[2], operands[3]))
+    operands[2] = operands[3] = scratch;
+  else
+    operands[3] = scratch;
+  return true;
 }
 
 /* Table of valid machine attributes.  */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 851061dcd8db99d86203fd353e2e3aa64a84142d..1ef3025ef56ad242cfe13568069146751f55ebc4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19248,7 +19248,7 @@
 	  (match_operand:MODEF 1 "register_operand" "x")
 	  (match_operand:MODEF 2 "register_operand" "x")
 	  (match_operand:MODEF 3 "register_operand" "x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_XOP"
   "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
   [(set_attr "type" "sse4arg")])
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4899c0ae8e087cda9c6736cdc256c1597e24cf63..78e4b6af1d898ba77a4f86576ae96490d7827d24 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1703,14 +1703,13 @@
 ;;	(set (reg3) (plus (reg2) (mem (addr3))))
 
 (define_insn "fma4_fmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
 	(plus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
+	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1720,34 +1719,29 @@
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(plus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "register_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (plus:FMA4MODEF4
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+ if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+   FAIL;
 })
 
 ;; Floating multiply and subtract
 ;; Allow two memory operands the same as fmadd
 (define_insn "fma4_fmsub<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
+	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1757,21 +1751,17 @@
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "register_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+ if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+   FAIL;
 })
 
 ;; Floating point negative multiply and add
@@ -1779,14 +1769,13 @@
 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
 ;; Allow two memory operands to help in optimizing.
 (define_insn "fma4_fnmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
 	(minus:FMA4MODEF4
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")
+	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1795,22 +1784,18 @@
 (define_split
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(minus:FMA4MODEF4
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "register_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+	 (match_dup 3)
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; Floating point negative multiply and subtract
@@ -1821,11 +1806,10 @@
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
 	  (neg:FMA4MODEF4
-	   (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
+	   (match_operand:FMA4MODEF4 1 "register_operand" "%x,x"))
+	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
 	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1836,33 +1820,30 @@
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
 	  (neg:FMA4MODEF4
-	   (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	   (match_operand:FMA4MODEF4 1 "register_operand" ""))
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+         (mult:FMA4MODEF4
+	  (neg:FMA4MODEF4 (match_dup 1))
+	  (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
-				        operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (define_insn "fma4_fmadd<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
 	(plus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:SSEMODEF4 1 "register_operand" "%x,x")
+	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
+	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1872,21 +1853,17 @@
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(plus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "register_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (plus:SSEMODEF4
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -1897,13 +1874,12 @@
 	(vec_merge:SSEMODEF2P
 	 (plus:SSEMODEF2P
 	  (mult:SSEMODEF2P
-	   (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	   (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	  (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	 (match_dup 0)
 	 (const_int 1)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1911,14 +1887,13 @@
 ;; Floating multiply and subtract
 ;; Allow two memory operands the same as fmadd
 (define_insn "fma4_fmsub<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:SSEMODEF4 1 "register_operand" "%x,x")
+	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
+	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1928,21 +1903,17 @@
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "register_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -1953,13 +1924,12 @@
 	(vec_merge:SSEMODEF2P
 	 (minus:SSEMODEF2P
 	  (mult:SSEMODEF2P
-	   (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	   (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	  (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	 (match_dup 0)
 	 (const_int 1)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1969,14 +1939,13 @@
 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
 ;; Allow two memory operands to help in optimizing.
 (define_insn "fma4_fnmadd<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
+  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
 	(minus:SSEMODEF4
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")
+	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+	  (match_operand:SSEMODEF4 1 "register_operand" "%x,x")
+	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1985,22 +1954,18 @@
 (define_split
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(minus:SSEMODEF4
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "register_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+	 (match_dup 3)
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -2012,12 +1977,11 @@
 	 (minus:SSEMODEF2P
 	  (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
 	  (mult:SSEMODEF2P
-	   (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+	   (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
 	 (match_dup 0)
 	 (const_int 1)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2030,11 +1994,10 @@
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
 	  (neg:SSEMODEF4
-	   (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x"))
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
+	   (match_operand:SSEMODEF4 1 "register_operand" "%x,x"))
+	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2045,21 +2008,19 @@
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
 	  (neg:SSEMODEF4
-	   (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	   (match_operand:SSEMODEF4 1 "register_operand" ""))
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+         (mult:SSEMODEF4
+	  (neg:SSEMODEF4 (match_dup 1))
+	  (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -2071,13 +2032,12 @@
 	 (minus:SSEMODEF2P
 	  (mult:SSEMODEF2P
 	   (neg:SSEMODEF2P
-	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
-	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	    (match_operand:SSEMODEF2P 1 "register_operand" "%x,x"))
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	  (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	 (match_dup 0)
 	 (const_int 1)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2089,11 +2049,11 @@
 	(unspec:FMA4MODEF4
 	 [(plus:FMA4MODEF4
 	   (mult:FMA4MODEF4
-	    (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
-	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
+	    (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2103,11 +2063,11 @@
 	(unspec:FMA4MODEF4
 	 [(minus:FMA4MODEF4
 	   (mult:FMA4MODEF4
-	    (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
-	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
+	    (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2118,10 +2078,10 @@
 	 [(minus:FMA4MODEF4
 	   (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
 	   (mult:FMA4MODEF4
-	    (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
-	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))]
+	    (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")
+	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2132,11 +2092,11 @@
 	 [(minus:FMA4MODEF4
 	   (mult:FMA4MODEF4
 	    (neg:FMA4MODEF4
-	     (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
-	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
+	     (match_operand:FMA4MODEF4 1 "register_operand" "%x,x"))
+	    (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2147,11 +2107,11 @@
 	(unspec:SSEMODEF2P
 	 [(plus:SSEMODEF2P
 	   (mult:SSEMODEF2P
-	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	    (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2161,11 +2121,11 @@
 	(unspec:SSEMODEF2P
 	 [(minus:SSEMODEF2P
 	   (mult:SSEMODEF2P
-	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	    (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2176,10 +2136,10 @@
 	 [(minus:SSEMODEF2P
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
 	   (mult:SSEMODEF2P
-	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))]
+	    (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2190,11 +2150,11 @@
 	 [(minus:SSEMODEF2P
 	   (mult:SSEMODEF2P
 	    (neg:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
-	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	     (match_operand:SSEMODEF2P 1 "register_operand" "%x,x"))
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2207,13 +2167,13 @@
 	 [(vec_merge:SSEMODEF2P
 	   (plus:SSEMODEF2P
 	    (mult:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
-	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	     (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	   (match_dup 0)
 	   (const_int 1))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -2224,13 +2184,13 @@
 	 [(vec_merge:SSEMODEF2P
 	   (minus:SSEMODEF2P
 	    (mult:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
-	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	     (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	   (match_dup 0)
 	   (const_int 1))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -2242,12 +2202,12 @@
 	   (minus:SSEMODEF2P
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
 	    (mult:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
-	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+	     (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")
+	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
 	   (match_dup 0)
 	   (const_int 1))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -2259,13 +2219,13 @@
 	   (minus:SSEMODEF2P
 	    (mult:SSEMODEF2P
 	     (neg:SSEMODEF2P
-	      (match_operand:SSEMODEF2P 1 "register_operand" "x,x"))
-	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:SSEMODEF2P 1 "register_operand" "%x,x"))
+	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	   (match_dup 0)
 	   (const_int 1))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -2281,8 +2241,8 @@
 	(vec_merge:V8SF
 	  (plus:V8SF
 	    (mult:V8SF
-	      (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V8SF 1 "register_operand" "%x,x")
+	      (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V8SF
 	    (mult:V8SF
@@ -2290,8 +2250,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 170)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
@@ -2301,8 +2260,8 @@
 	(vec_merge:V4DF
 	  (plus:V4DF
 	    (mult:V4DF
-	      (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V4DF 1 "register_operand" "%x,x")
+	      (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V4DF
 	    (mult:V4DF
@@ -2310,8 +2269,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 10)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4DF")])
@@ -2321,8 +2279,8 @@
 	(vec_merge:V4SF
 	  (plus:V4SF
 	    (mult:V4SF
-	      (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V4SF 1 "register_operand" "%x,x")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V4SF
 	    (mult:V4SF
@@ -2330,8 +2288,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 10)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4SF")])
@@ -2341,8 +2298,8 @@
 	(vec_merge:V2DF
 	  (plus:V2DF
 	    (mult:V2DF
-	      (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V2DF 1 "register_operand" "%x,x")
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V2DF
 	    (mult:V2DF
@@ -2350,8 +2307,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 2)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V2DF")])
@@ -2361,8 +2317,8 @@
 	(vec_merge:V8SF
 	  (plus:V8SF
 	    (mult:V8SF
-	      (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V8SF 1 "register_operand" "%x,x")
+	      (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V8SF
 	    (mult:V8SF
@@ -2370,8 +2326,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 85)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
@@ -2381,8 +2336,8 @@
 	(vec_merge:V4DF
 	  (plus:V4DF
 	    (mult:V4DF
-	      (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V4DF 1 "register_operand" "%x,x")
+	      (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V4DF
 	    (mult:V4DF
@@ -2390,8 +2345,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 5)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4DF")])
@@ -2401,8 +2355,8 @@
 	(vec_merge:V4SF
 	  (plus:V4SF
 	    (mult:V4SF
-	      (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V4SF 1 "register_operand" "%x,x")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V4SF
 	    (mult:V4SF
@@ -2410,8 +2364,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 5)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4SF")])
@@ -2421,8 +2374,8 @@
 	(vec_merge:V2DF
 	  (plus:V2DF
 	    (mult:V2DF
-	      (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
-	      (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
+	      (match_operand:V2DF 1 "register_operand" "%x,x")
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
 	    (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
 	  (minus:V2DF
 	    (mult:V2DF
@@ -2430,8 +2383,7 @@
 	      (match_dup 2))
 	    (match_dup 3))
 	  (const_int 1)))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V2DF")])
@@ -2444,8 +2396,8 @@
 	 [(vec_merge:V8SF
 	   (plus:V8SF
 	     (mult:V8SF
-	       (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V8SF 1 "register_operand" "%x,x")
+	       (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V8SF
 	     (mult:V8SF
@@ -2454,8 +2406,7 @@
 	     (match_dup 3))
 	   (const_int 170))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
@@ -2466,8 +2417,8 @@
 	 [(vec_merge:V4DF
 	   (plus:V4DF
 	     (mult:V4DF
-	       (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V4DF 1 "register_operand" "%x,x")
+	       (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V4DF
 	     (mult:V4DF
@@ -2476,8 +2427,7 @@
 	     (match_dup 3))
 	   (const_int 10))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4DF")])
@@ -2488,8 +2438,8 @@
 	 [(vec_merge:V4SF
 	   (plus:V4SF
 	     (mult:V4SF
-	       (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V4SF 1 "register_operand" "%x,x")
+	       (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V4SF
 	     (mult:V4SF
@@ -2498,8 +2448,7 @@
 	     (match_dup 3))
 	   (const_int 10))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4SF")])
@@ -2510,8 +2459,8 @@
 	 [(vec_merge:V2DF
 	   (plus:V2DF
 	     (mult:V2DF
-	       (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V2DF 1 "register_operand" "%x,x")
+	       (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V2DF
 	     (mult:V2DF
@@ -2520,8 +2469,7 @@
 	     (match_dup 3))
 	   (const_int 2))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V2DF")])
@@ -2532,8 +2480,8 @@
 	 [(vec_merge:V8SF
 	   (plus:V8SF
 	     (mult:V8SF
-	       (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V8SF 1 "register_operand" "%x,x")
+	       (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V8SF
 	     (mult:V8SF
@@ -2542,8 +2490,7 @@
 	     (match_dup 3))
 	   (const_int 85))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
@@ -2554,8 +2501,8 @@
 	 [(vec_merge:V4DF
 	   (plus:V4DF
 	     (mult:V4DF
-	       (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V4DF 1 "register_operand" "%x,x")
+	       (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V4DF
 	     (mult:V4DF
@@ -2564,8 +2511,7 @@
 	     (match_dup 3))
 	   (const_int 5))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4DF")])
@@ -2576,8 +2522,8 @@
 	 [(vec_merge:V4SF
 	   (plus:V4SF
 	     (mult:V4SF
-	       (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V4SF 1 "register_operand" "%x,x")
+	       (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V4SF
 	     (mult:V4SF
@@ -2586,8 +2532,7 @@
 	     (match_dup 3))
 	   (const_int 5))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V4SF")])
@@ -2598,8 +2543,8 @@
 	 [(vec_merge:V2DF
 	   (plus:V2DF
 	     (mult:V2DF
-	       (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
-	       (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
+	       (match_operand:V2DF 1 "register_operand" "%x,x")
+	       (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
 	     (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
 	   (minus:V2DF
 	     (mult:V2DF
@@ -2608,8 +2553,7 @@
 	     (match_dup 3))
 	   (const_int 1))]
 	 UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4
-   && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
+  "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V2DF")])
@@ -10356,16 +10300,14 @@
 ;; that it does and splitting it later allows the following to be recognized:
 ;;	a[i] = b[i] * c[i] + d[i];
 (define_insn "xop_pmacsww"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
         (plus:V8HI
 	 (mult:V8HI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
-	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
-	 (match_operand:V8HI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
-  "@
-   vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	  (match_operand:V8HI 1 "register_operand" "%x")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V8HI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
@@ -10373,33 +10315,27 @@
 (define_split
   [(set (match_operand:V8HI 0 "register_operand" "")
 	(plus:V8HI
-	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
-		    (match_operand:V8HI 2 "nonimmediate_operand" ""))
-	 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
-  "TARGET_XOP
-   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	 (mult:V8HI (match_operand:V8HI 1 "register_operand" "")
+		    (match_operand:V8HI 2 "memory_operand" ""))
+	 (match_operand:V8HI 3 "memory_operand" "")))]
+  "TARGET_XOP"
+  [(set (match_dup 0)
+        (plus:V8HI
+         (mult:V8HI (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, V8HImode);
-  emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
-			      operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, V8HImode))
+    FAIL;
 })
 
 (define_insn "xop_pmacssww"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
         (ss_plus:V8HI
-	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
-		    (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
-	 (match_operand:V8HI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (mult:V8HI (match_operand:V8HI 1 "register_operand" "%x")
+		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V8HI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
@@ -10408,16 +10344,14 @@
 ;; that it does and splitting it later allows the following to be recognized:
 ;;	a[i] = b[i] * c[i] + d[i];
 (define_insn "xop_pmacsdd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
         (plus:V4SI
 	 (mult:V4SI
-	  (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
-	  (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
-  "@
-   vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	  (match_operand:V4SI 1 "register_operand" "%x")
+	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
@@ -10425,117 +10359,105 @@
 (define_split
   [(set (match_operand:V4SI 0 "register_operand" "")
 	(plus:V4SI
-	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
-		    (match_operand:V4SI 2 "nonimmediate_operand" ""))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
-  "TARGET_XOP
-   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
-   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+		    (match_operand:V4SI 2 "memory_operand" ""))
+	 (match_operand:V4SI 3 "memory_operand" "")))]
+  "TARGET_XOP"
+  [(set (match_dup 0)
+        (plus:V4SI
+         (mult:V4SI (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, V4SImode);
-  emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
-			      operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, V4SImode))
+    FAIL;
 })
 
 (define_insn "xop_pmacssdd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
         (ss_plus:V4SI
-	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
-		    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
+		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmacssdql"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(ss_plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (vec_select:V2SI
-	   (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	   (parallel [(const_int 1)
 		      (const_int 3)])))
-	 (match_operand:V2DI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmacssdqh"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(ss_plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmacsdql"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 1)
 		       (const_int 3)]))))
-	 (match_operand:V2DI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn_and_split "*xop_pmacsdql_mem"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
 	(plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 1)
 		       (const_int 3)]))))
-	 (match_operand:V2DI 3 "memory_operand" "m,m")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+	 (match_operand:V2DI 3 "memory_operand" "m")))]
+  "TARGET_XOP"
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
@@ -10564,7 +10486,7 @@
 	(mult:V2DI
 	  (sign_extend:V2DI
 	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (match_operand:V4SI 1 "register_operand" "%x")
 	      (parallel [(const_int 1)
 			 (const_int 3)])))
 	  (sign_extend:V2DI
@@ -10598,43 +10520,41 @@
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmacsdqh"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V2DI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn_and_split "*xop_pmacsdqh_mem"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
 	(plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V4SI 1 "register_operand" "%x")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "memory_operand" "m,m")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+	 (match_operand:V2DI 3 "memory_operand" "m")))]
+  "TARGET_XOP"
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
@@ -10663,7 +10583,7 @@
 	(mult:V2DI
 	  (sign_extend:V2DI
 	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (match_operand:V4SI 1 "register_operand" "%x")
 	      (parallel [(const_int 0)
 			 (const_int 2)])))
 	  (sign_extend:V2DI
@@ -10698,72 +10618,68 @@
 
 ;; XOP parallel integer multiply/add instructions for the intrinisics
 (define_insn "xop_pmacsswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(ss_plus:V4SI
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V8HI 1 "register_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)])))
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)]))))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmacswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(plus:V4SI
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
+	    (match_operand:V8HI 1 "register_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)])))
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)]))))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmadcsswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(ss_plus:V4SI
 	 (plus:V4SI
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
+	     (match_operand:V8HI 1 "register_operand" "%x")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
 			(const_int 6)])))
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
+	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -10783,29 +10699,27 @@
 			(const_int 3)
 			(const_int 5)
 			(const_int 7)])))))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pmadcswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(plus:V4SI
 	 (plus:V4SI
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
+	     (match_operand:V8HI 1 "register_operand" "%x")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
 			(const_int 6)])))
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
+	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -10825,32 +10739,30 @@
 			(const_int 3)
 			(const_int 5)
 			(const_int 7)])))))
-	 (match_operand:V4SI 3 "register_operand" "x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
-  "@
-   vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+	 (match_operand:V4SI 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
 ;; XOP parallel XMM conditional moves
 (define_insn "xop_pcmov_<mode>"
-  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
+  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
 	(if_then_else:SSEMODE
-	  (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m")
-	  (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x")
-	  (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+	  (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
+	  (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
+	  (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
+  "TARGET_XOP"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")])
 
 (define_insn "xop_pcmov_<mode>256"
-  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
+  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
 	(if_then_else:AVX256MODE
-	  (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m")
-	  (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x")
-	  (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+	  (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
+	  (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
+	  (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
+  "TARGET_XOP"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")])
 
@@ -11296,53 +11208,53 @@
 
 ;; XOP permute instructions
 (define_insn "xop_pperm"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
 	(unspec:V16QI
-	  [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m")
-	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x")
-	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
 	  UNSPEC_XOP_PERMUTE))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 ;; XOP pack instructions that combine two vectors into a smaller vector
 (define_insn "xop_pperm_pack_v2di_v4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
 	(vec_concat:V4SI
 	 (truncate:V2SI
-	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m"))
+	  (match_operand:V2DI 1 "register_operand" "x,x"))
 	 (truncate:V2SI
-	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pperm_pack_v4si_v8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
 	(vec_concat:V8HI
 	 (truncate:V4HI
-	  (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m"))
+	  (match_operand:V4SI 1 "register_operand" "x,x"))
 	 (truncate:V4HI
-	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 (define_insn "xop_pperm_pack_v8hi_v16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
 	(vec_concat:V16QI
 	 (truncate:V8QI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m"))
+	  (match_operand:V8HI 1 "register_operand" "x,x"))
 	 (truncate:V8QI
-	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -11471,7 +11383,7 @@
 	 (rotatert:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "prefix_data16" "0")
@@ -11526,7 +11438,7 @@
 	 (ashiftrt:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "prefix_data16" "0")
@@ -11545,7 +11457,7 @@
 	 (lshiftrt:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "prefix_data16" "0")