From 7b1980026cceb8cdd46dc796b8be79245366f1f7 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Wed, 2 Apr 2008 21:07:27 +0200
Subject: [PATCH] i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): Emit
 gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values in 32bit mode...

        * config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
        Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
        in 32bit mode when XMM registers are available to avoid store
        forwarding stalls.
        (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
        corresponding post-reload splitters.

From-SVN: r133845
---
 gcc/ChangeLog           |  9 ++++++
 gcc/config/i386/i386.md | 71 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 703fd01589aa..984ff05559c7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2008-04-02  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
+	Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
+	in 32bit mode when XMM registers are available to avoid store
+	forwarding stalls.
+	(floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
+	corresponding post-reload splitters.
+
 2008-04-02  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* config/i386/i386.c (bdesc_sse_3arg): Add __builtin_ia32_shufps
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e2d68bb8209d..adeafc2f3f5e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4925,7 +4925,21 @@
   "&& 1"
   [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
 	      (clobber (match_dup 2))])]
-  "operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);")
+{
+  operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+  /* Avoid store forwarding (partial memory) stall penalty
+     by passing DImode value through XMM registers.  */
+  if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT 
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES 
+      && !optimize_size)
+    {
+      emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+							    operands[1],
+							    operands[2]));
+      DONE;
+    }
+})
 
 (define_insn "*floatsi<mode>2_vector_mixed_with_temp"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
@@ -5310,6 +5324,61 @@
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
   "")
 
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=&x,x"))
+   (clobber (match_scratch:V4SI 4 "=&x,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+   (clobber (match_operand:V4SI 3 "register_operand" ""))
+   (clobber (match_operand:V4SI 4 "register_operand" ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+   (clobber (match_operand:V4SI 2 "register_operand" ""))
+   (clobber (match_operand:V4SI 3 "register_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
 ;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
-- 
GitLab