From f19e3a64adb7011b6077943eef5a2a6f305fa1e1 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek <jakub@gcc.gnu.org> Date: Tue, 5 Apr 2005 22:20:42 +0200 Subject: [PATCH] [multiple changes] 2005-04-05 Jakub Jelinek <jakub@redhat.com> * config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse and local functions, set sse_nregs to 8 and float_in_sse. (function_arg_advance, function_arg): If float_in_sse, pass SFmode and DFmode arguments in SSE registers. * config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field. * config/i386/i386.c (ix86_value_regno): Only optimize local functions of -funit-at-a-time. 2005-04-05 Paolo Bonzini <bonzini@gnu.org> * config/i386/i386-protos.h (ix86_function_value): Accept two arguments, like the target macro. * config/i386/i386.h (FUNCTION_VALUE): Pass both arguments. * config/i386/i386.c (ix86_function_value): Accept the second argument of the target macro. (ix86_function_ok_for_sibcall): Pass a function pointer to ix86_function_value. (ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust call to ix86_value_regno). (ix86_value_regno): Add support for returning floating point values in SSE registers. 2005-04-05 Paolo Bonzini <bonzini@gnu.org> * gcc.dg/i386-ssefn-1.c: New test. * gcc.dg/i386-ssefn-2.c: New test. * gcc.dg/i386-ssefn-3.c: New test. * gcc.dg/i386-ssefn-4.c: New test. From-SVN: r97654 --- gcc/ChangeLog | 25 ++++++++++ gcc/config/i386/i386-protos.h | 2 +- gcc/config/i386/i386.c | 77 ++++++++++++++++++++++++----- gcc/config/i386/i386.h | 4 +- gcc/testsuite/ChangeLog | 7 +++ gcc/testsuite/gcc.dg/i386-ssefn-1.c | 30 +++++++++++ gcc/testsuite/gcc.dg/i386-ssefn-2.c | 30 +++++++++++ gcc/testsuite/gcc.dg/i386-ssefn-3.c | 43 ++++++++++++++++ gcc/testsuite/gcc.dg/i386-ssefn-4.c | 43 ++++++++++++++++ 9 files changed, 246 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-1.c create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-2.c create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-3.c create mode 100644 gcc/testsuite/gcc.dg/i386-ssefn-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f9034674f747..12c9ab95891c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2005-04-05 Jakub Jelinek <jakub@redhat.com> + + * config/i386/i386.c (init_cumulative_args): For -m32 -mfpmath=sse + and local functions, set sse_nregs to 8 and float_in_sse. + (function_arg_advance, function_arg): If float_in_sse, pass + SFmode and DFmode arguments in SSE registers. + * config/i386/i386.h (CUMULATIVE_ARGS): Add float_in_sse field. + + * config/i386/i386.c (ix86_value_regno): Only optimize local functions + of -funit-at-a-time. + +2005-04-05 Paolo Bonzini <bonzini@gnu.org> + + * config/i386/i386-protos.h (ix86_function_value): Accept two + arguments, like the target macro. + * config/i386/i386.h (FUNCTION_VALUE): Pass both arguments. + * config/i386/i386.c (ix86_function_value): Accept the second + argument of the target macro. + (ix86_function_ok_for_sibcall): Pass a function pointer to + ix86_function_value. + (ix86_function_value, ix86_libcall_value) [!TARGET_64BIT]: Adjust + call to ix86_value_regno). + (ix86_value_regno): Add support for returning floating point values + in SSE registers. + 2005-04-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/20076 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 2f949bd1f1cb..fa9c03af9261 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -204,7 +204,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int); extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, tree, int); -extern rtx ix86_function_value (tree); +extern rtx ix86_function_value (tree, tree); #endif #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 802103531d67..5eeb54867c4a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -915,7 +915,7 @@ const struct attribute_spec ix86_attribute_table[]; static bool ix86_function_ok_for_sibcall (tree, tree); static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *); static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *); -static int ix86_value_regno (enum machine_mode); +static int ix86_value_regno (enum machine_mode, tree); static bool contains_128bit_aligned_vector_p (tree); static rtx ix86_struct_value_rtx (tree, int); static bool ix86_ms_bitfield_layout_p (tree); @@ -1645,19 +1645,27 @@ const struct attribute_spec ix86_attribute_table[] = static bool ix86_function_ok_for_sibcall (tree decl, tree exp) { + tree func; + /* If we are generating position-independent code, we cannot sibcall optimize any indirect call, or a direct call to a global function, as the PLT requires %ebx be live. */ if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl))) return false; + if (decl) + func = decl; + else + func = NULL; + /* If we are returning floats on the 80387 register stack, we cannot make a sibcall from a function that doesn't return a float to a function that does or, conversely, from a function that does return a float to a function that doesn't; the necessary stack adjustment would not be executed. */ - if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp))) - != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl))))) + if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func)) + != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl))) return false; /* If this call is indirect, we'll need to be able to use a call-clobbered @@ -2037,7 +2045,22 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ } if ((!fntype && !libname) || (fntype && !TYPE_ARG_TYPES (fntype))) - cum->maybe_vaarg = 1; + cum->maybe_vaarg = true; + + /* For local functions, pass SFmode (and DFmode for SSE2) arguments + in SSE registers even for 32-bit mode and not just 3, but up to + 8 SSE arguments in registers. */ + if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall + && cum->sse_nregs == SSE_REGPARM_MAX && fndecl + && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) + { + struct cgraph_local_info *i = cgraph_local_info (fndecl); + if (i && i->local) + { + cum->sse_nregs = 8; + cum->float_in_sse = true; + } + } if (TARGET_DEBUG_ARG) fprintf (stderr, ", nregs=%d )\n", cum->nregs); @@ -2728,6 +2751,14 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, } break; + case DFmode: + if (!TARGET_SSE2) + break; + case SFmode: + if (!cum->float_in_sse) + break; + /* FALLTHRU */ + case TImode: case V16QImode: case V8HImode: @@ -2849,6 +2880,13 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, ret = gen_rtx_REG (mode, regno); } break; + case DFmode: + if (!TARGET_SSE2) + break; + case SFmode: + if (!cum->float_in_sse) + break; + /* FALLTHRU */ case TImode: case V16QImode: case V8HImode: @@ -3040,7 +3078,7 @@ ix86_function_value_regno_p (int regno) If the precise function being called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. */ rtx -ix86_function_value (tree valtype) +ix86_function_value (tree valtype, tree func) { enum machine_mode natmode = type_natural_mode (valtype); @@ -3056,7 +3094,7 @@ ix86_function_value (tree valtype) return ret; } else - return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode)); + return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func)); } /* Return false iff type is returned in memory. */ @@ -3158,23 +3196,36 @@ ix86_libcall_value (enum machine_mode mode) } } else - return gen_rtx_REG (mode, ix86_value_regno (mode)); + return gen_rtx_REG (mode, ix86_value_regno (mode, NULL)); } /* Given a mode, return the register to use for a return value. */ static int -ix86_value_regno (enum machine_mode mode) +ix86_value_regno (enum machine_mode mode, tree func) { - /* Floating point return values in %st(0). */ - if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) - return FIRST_FLOAT_REG; + gcc_assert (!TARGET_64BIT); + /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where we prevent this case when sse is not available. */ if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) return FIRST_SSE_REG; - /* Everything else in %eax. */ - return 0; + + /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ + if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387) + return 0; + + /* Floating point return values in %st(0), except for local functions when + SSE math is enabled. */ + if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && flag_unit_at_a_time) + { + struct cgraph_local_info *i = cgraph_local_info (func); + if (i && i->local) + return FIRST_SSE_REG; + } + + return FIRST_FLOAT_REG; } /* Create the va_list data type. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 559ea82b47da..819b0b2367d7 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1698,7 +1698,7 @@ enum reg_class If the precise function being called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. */ #define FUNCTION_VALUE(VALTYPE, FUNC) \ - ix86_function_value (VALTYPE) + ix86_function_value (VALTYPE, FUNC) #define FUNCTION_VALUE_REGNO_P(N) \ ix86_function_value_regno_p (N) @@ -1738,6 +1738,8 @@ typedef struct ix86_args { int mmx_nregs; /* # mmx registers available for passing */ int mmx_regno; /* next available mmx register number */ int maybe_vaarg; /* true for calls to possibly vardic fncts. */ + int float_in_sse; /* true if in 32-bit mode SFmode/DFmode should + be passed in SSE registers. */ } CUMULATIVE_ARGS; /* Initialize a variable CUM of type CUMULATIVE_ARGS diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 79bfe2300b56..78e6d98282ba 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2005-04-05 Paolo Bonzini <bonzini@gnu.org> + + * gcc.dg/i386-ssefn-1.c: New test. + * gcc.dg/i386-ssefn-2.c: New test. + * gcc.dg/i386-ssefn-3.c: New test. + * gcc.dg/i386-ssefn-4.c: New test. + 2005-04-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/20076 diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-1.c b/gcc/testsuite/gcc.dg/i386-ssefn-1.c new file mode 100644 index 000000000000..a8b0b14e73cd --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssefn-1.c @@ -0,0 +1,30 @@ +/* Test argument passing with SSE and local functions + Written by Paolo Bonzini, 25 January 2005 */ + +/* { dg-do compile { target i?86-*-* } } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler "mulss" } } */ +/* { dg-final { scan-assembler-not "movsd" } } */ +/* { dg-final { scan-assembler-not "mulsd" } } */ +/* { dg-options "-O2 -msse -mfpmath=sse -fno-inline" } */ + +static float xs (void) +{ + return 3.14159265; +} + +float ys (float a) +{ + return xs () * a; +} + +static double xd (void) +{ + return 3.1415926535; +} + +double yd (double a) +{ + return xd () * a; +} + diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-2.c b/gcc/testsuite/gcc.dg/i386-ssefn-2.c new file mode 100644 index 000000000000..0e1970032e9a --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssefn-2.c @@ -0,0 +1,30 @@ +/* Test argument passing with SSE2 and local functions + Written by Paolo Bonzini, 25 January 2005 */ + +/* { dg-do compile { target i?86-*-* } } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler "mulss" } } */ +/* { dg-final { scan-assembler "movsd" } } */ +/* { dg-final { scan-assembler "mulsd" } } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse -fno-inline" } */ + +static float xs (void) +{ + return 3.14159265; +} + +float ys (float a) +{ + return xs () * a; +} + +static double xd (void) +{ + return 3.1415926535; +} + +double yd (double a) +{ + return xd () * a; +} + diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-3.c b/gcc/testsuite/gcc.dg/i386-ssefn-3.c new file mode 100644 index 000000000000..2816919f7c27 --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssefn-3.c @@ -0,0 +1,43 @@ +/* Execution test for argument passing with SSE and local functions + Written by Paolo Bonzini, 25 January 2005 */ + +/* { dg-do run { target i?86-*-* } } */ +/* { dg-options "-O2 -msse -mfpmath=sse" } */ +#include <assert.h> +#include "i386-cpuid.h" + +static float xs (void) +{ + return 3.14159265; +} + +float ys (float a) +{ + return xs () * a; +} + +static double xd (void) +{ + return 3.1415926535; +} + +double yd (double a) +{ + return xd () * a; +} + +int main() +{ + unsigned long cpu_facilities; + + cpu_facilities = i386_cpuid (); + + if (cpu_facilities & bit_SSE) + { + assert (ys (1) == xs ()); + assert (ys (2) == xs () * 2); + assert (yd (1) == xd ()); + assert (yd (2) == xd () * 2); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/i386-ssefn-4.c b/gcc/testsuite/gcc.dg/i386-ssefn-4.c new file mode 100644 index 000000000000..353afcec0a17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssefn-4.c @@ -0,0 +1,43 @@ +/* Execution test for argument passing with SSE2 and local functions + Written by Paolo Bonzini, 25 January 2005 */ + +/* { dg-do run { target i?86-*-* } } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ +#include <assert.h> +#include "i386-cpuid.h" + +static float xs (void) +{ + return 3.14159265; +} + +float ys (float a) +{ + return xs () * a; +} + +static double xd (void) +{ + return 3.1415926535; +} + +double yd (double a) +{ + return xd () * a; +} + +int main() +{ + unsigned long cpu_facilities; + + cpu_facilities = i386_cpuid (); + + if (cpu_facilities & bit_SSE2) + { + assert (ys (1) == xs ()); + assert (ys (2) == xs () * 2); + assert (yd (1) == xd ()); + assert (yd (2) == xd () * 2); + } + return 0; +} -- GitLab