From f19e3a64adb7011b6077943eef5a2a6f305fa1e1 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@gcc.gnu.org>
Date: Tue, 5 Apr 2005 22:20:42 +0200
Subject: [PATCH] [multiple changes]

2005-04-05  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse
	and local functions, set sse_nregs to 8 and float_in_sse.
	(function_arg_advance, function_arg): If float_in_sse, pass
	SFmode and DFmode arguments in SSE registers.
	* config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field.

	* config/i386/i386.c (ix86_value_regno): Only optimize local functions
	of -funit-at-a-time.

2005-04-05  Paolo Bonzini  <bonzini@gnu.org>

	* config/i386/i386-protos.h (ix86_function_value): Accept two
	arguments, like the target macro.
	* config/i386/i386.h (FUNCTION_VALUE): Pass both arguments.
	* config/i386/i386.c (ix86_function_value): Accept the second
	argument of the target macro.
	(ix86_function_ok_for_sibcall): Pass a function pointer to
	ix86_function_value.
	(ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust
	call to ix86_value_regno).
	(ix86_value_regno): Add support for returning floating point values
	in SSE registers.

2005-04-05  Paolo Bonzini  <bonzini@gnu.org>

	* gcc.dg/i386-ssefn-1.c: New test.
	* gcc.dg/i386-ssefn-2.c: New test.
	* gcc.dg/i386-ssefn-3.c: New test.
	* gcc.dg/i386-ssefn-4.c: New test.

From-SVN: r97654
---
 gcc/ChangeLog                       | 25 ++++++++++
 gcc/config/i386/i386-protos.h       |  2 +-
 gcc/config/i386/i386.c              | 77 ++++++++++++++++++++++++-----
 gcc/config/i386/i386.h              |  4 +-
 gcc/testsuite/ChangeLog             |  7 +++
 gcc/testsuite/gcc.dg/i386-ssefn-1.c | 30 +++++++++++
 gcc/testsuite/gcc.dg/i386-ssefn-2.c | 30 +++++++++++
 gcc/testsuite/gcc.dg/i386-ssefn-3.c | 43 ++++++++++++++++
 gcc/testsuite/gcc.dg/i386-ssefn-4.c | 43 ++++++++++++++++
 9 files changed, 246 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-1.c
 create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-2.c
 create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-3.c
 create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-4.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f9034674f747..12c9ab95891c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2005-04-05  Jakub Jelinek  <jakub@redhat.com>
+
+	* config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse
+	and local functions, set sse_nregs to 8 and float_in_sse.
+	(function_arg_advance, function_arg): If float_in_sse, pass
+	SFmode and DFmode arguments in SSE registers.
+	* config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field.
+
+	* config/i386/i386.c (ix86_value_regno): Only optimize local functions
+	of -funit-at-a-time.
+
+2005-04-05  Paolo Bonzini  <bonzini@gnu.org>
+
+	* config/i386/i386-protos.h (ix86_function_value): Accept two
+	arguments, like the target macro.
+	* config/i386/i386.h (FUNCTION_VALUE): Pass both arguments.
+	* config/i386/i386.c (ix86_function_value): Accept the second
+	argument of the target macro.
+	(ix86_function_ok_for_sibcall): Pass a function pointer to
+	ix86_function_value.
+	(ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust
+	call to ix86_value_regno).
+	(ix86_value_regno): Add support for returning floating point values
+	in SSE registers.
+
 2005-04-05  Jakub Jelinek  <jakub@redhat.com>
 
 	PR tree-optimization/20076
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2f949bd1f1cb..fa9c03af9261 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,7 +204,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
 extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
 extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
 				  tree, int);
-extern rtx ix86_function_value (tree);
+extern rtx ix86_function_value (tree, tree);
 #endif
 
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 802103531d67..5eeb54867c4a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -915,7 +915,7 @@ const struct attribute_spec ix86_attribute_table[];
 static bool ix86_function_ok_for_sibcall (tree, tree);
 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
-static int ix86_value_regno (enum machine_mode);
+static int ix86_value_regno (enum machine_mode, tree);
 static bool contains_128bit_aligned_vector_p (tree);
 static rtx ix86_struct_value_rtx (tree, int);
 static bool ix86_ms_bitfield_layout_p (tree);
@@ -1645,19 +1645,27 @@ const struct attribute_spec ix86_attribute_table[] =
 static bool
 ix86_function_ok_for_sibcall (tree decl, tree exp)
 {
+  tree func;
+
   /* If we are generating position-independent code, we cannot sibcall
      optimize any indirect call, or a direct call to a global function,
      as the PLT requires %ebx be live.  */
   if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
     return false;
 
+  if (decl)
+    func = decl;
+  else
+    func = NULL;
+
   /* If we are returning floats on the 80387 register stack, we cannot
      make a sibcall from a function that doesn't return a float to a
      function that does or, conversely, from a function that does return
      a float to a function that doesn't; the necessary stack adjustment
      would not be executed.  */
-  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
-      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
+  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
+      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+					   cfun->decl)))
     return false;
 
   /* If this call is indirect, we'll need to be able to use a call-clobbered
@@ -2037,7 +2045,22 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
     }
   if ((!fntype && !libname)
       || (fntype && !TYPE_ARG_TYPES (fntype)))
-    cum->maybe_vaarg = 1;
+    cum->maybe_vaarg = true;
+
+  /* For local functions, pass SFmode (and DFmode for SSE2) arguments
+     in SSE registers even for 32-bit mode and not just 3, but up to
+     8 SSE arguments in registers.  */
+  if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
+      && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
+      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (fndecl);
+      if (i && i->local)
+	{
+	  cum->sse_nregs = 8;
+	  cum->float_in_sse = true;
+	}
+    }
 
   if (TARGET_DEBUG_ARG)
     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
@@ -2728,6 +2751,14 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	    }
 	  break;
 
+	case DFmode:
+	  if (!TARGET_SSE2)
+	    break;
+	case SFmode:
+	  if (!cum->float_in_sse)
+	    break;
+	  /* FALLTHRU */
+
 	case TImode:
 	case V16QImode:
 	case V8HImode:
@@ -2849,6 +2880,13 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
 	    ret = gen_rtx_REG (mode, regno);
 	  }
 	break;
+      case DFmode:
+	if (!TARGET_SSE2)
+	  break;
+      case SFmode:
+	if (!cum->float_in_sse)
+	  break;
+	/* FALLTHRU */
       case TImode:
       case V16QImode:
       case V8HImode:
@@ -3040,7 +3078,7 @@ ix86_function_value_regno_p (int regno)
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 rtx
-ix86_function_value (tree valtype)
+ix86_function_value (tree valtype, tree func)
 {
   enum machine_mode natmode = type_natural_mode (valtype);
 
@@ -3056,7 +3094,7 @@ ix86_function_value (tree valtype)
       return ret;
     }
   else
-    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
+    return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
 }
 
 /* Return false iff type is returned in memory.  */
@@ -3158,23 +3196,36 @@ ix86_libcall_value (enum machine_mode mode)
 	}
     }
   else
-    return gen_rtx_REG (mode, ix86_value_regno (mode));
+    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
 }
 
 /* Given a mode, return the register to use for a return value.  */
 
 static int
-ix86_value_regno (enum machine_mode mode)
+ix86_value_regno (enum machine_mode mode, tree func)
 {
-  /* Floating point return values in %st(0).  */
-  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
-    return FIRST_FLOAT_REG;
+  gcc_assert (!TARGET_64BIT);
+
   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
      we prevent this case when sse is not available.  */
   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
     return FIRST_SSE_REG;
-  /* Everything else in %eax.  */
-  return 0;
+
+  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
+  if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
+    return 0;
+
+  /* Floating point return values in %st(0), except for local functions when
+     SSE math is enabled.  */
+  if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
+      && flag_unit_at_a_time)
+    {
+      struct cgraph_local_info *i = cgraph_local_info (func);
+      if (i && i->local)
+	return FIRST_SSE_REG;
+    }
+
+  return FIRST_FLOAT_REG;
 }
 
 /* Create the va_list data type.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 559ea82b47da..819b0b2367d7 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1698,7 +1698,7 @@ enum reg_class
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 #define FUNCTION_VALUE(VALTYPE, FUNC)  \
-   ix86_function_value (VALTYPE)
+   ix86_function_value (VALTYPE, FUNC)
 
 #define FUNCTION_VALUE_REGNO_P(N) \
   ix86_function_value_regno_p (N)
@@ -1738,6 +1738,8 @@ typedef struct ix86_args {
   int mmx_nregs;		/* # mmx registers available for passing */
   int mmx_regno;		/* next available mmx register number */
   int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int float_in_sse;		/* true if in 32-bit mode SFmode/DFmode should
+				   be passed in SSE registers.  */
 } CUMULATIVE_ARGS;
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 79bfe2300b56..78e6d98282ba 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2005-04-05  Paolo Bonzini  <bonzini@gnu.org>
+
+	* gcc.dg/i386-ssefn-1.c: New test.
+	* gcc.dg/i386-ssefn-2.c: New test.
+	* gcc.dg/i386-ssefn-3.c: New test.
+	* gcc.dg/i386-ssefn-4.c: New test.
+
 2005-04-05  Jakub Jelinek  <jakub@redhat.com>
 
 	PR tree-optimization/20076
diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-1.c b/gcc/testsuite/gcc.dg/i386-ssefn-1.c
new file mode 100644
index 000000000000..a8b0b14e73cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/i386-ssefn-1.c
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler-not "movsd" } } */
+/* { dg-final { scan-assembler-not "mulsd" } } */
+/* { dg-options "-O2 -msse -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-2.c b/gcc/testsuite/gcc.dg/i386-ssefn-2.c
new file mode 100644
index 000000000000..0e1970032e9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/i386-ssefn-2.c
@@ -0,0 +1,30 @@
+/* Test argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do compile { target i?86-*-* } } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler "mulss" } } */
+/* { dg-final { scan-assembler "movsd" } } */
+/* { dg-final { scan-assembler "mulsd" } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -fno-inline" } */
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-3.c b/gcc/testsuite/gcc.dg/i386-ssefn-3.c
new file mode 100644
index 000000000000..2816919f7c27
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/i386-ssefn-3.c
@@ -0,0 +1,43 @@
+/* Execution test for argument passing with SSE and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-4.c b/gcc/testsuite/gcc.dg/i386-ssefn-4.c
new file mode 100644
index 000000000000..353afcec0a17
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/i386-ssefn-4.c
@@ -0,0 +1,43 @@
+/* Execution test for argument passing with SSE2 and local functions
+   Written by Paolo Bonzini, 25 January 2005 */
+
+/* { dg-do run { target i?86-*-* } } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
+#include <assert.h>
+#include "i386-cpuid.h"
+
+static float xs (void)
+{
+  return 3.14159265;
+}
+
+float ys (float a)
+{
+  return xs () * a;
+}
+
+static double xd (void)
+{
+  return 3.1415926535;
+}
+
+double yd (double a)
+{
+  return xd () * a;
+}
+
+int main()
+{
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_cpuid ();
+
+  if (cpu_facilities & bit_SSE2)
+    {
+      assert (ys (1) == xs ());
+      assert (ys (2) == xs () * 2);
+      assert (yd (1) == xd ());
+      assert (yd (2) == xd () * 2);
+    }
+  return 0;
+}
-- 
GitLab