From 7cbe41d35e6a60776484e04e42e408de9fc82954 Mon Sep 17 00:00:00 2001 From: Tamar Christina <tamar.christina@arm.com> Date: Wed, 10 Jan 2024 17:18:28 +0000 Subject: [PATCH] middle-end: Don't apply copysign optimization if target does not implement optab [PR112468] Currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr. The latter has a libcall fallback and the IFN can only do optabs. Because of this the change I made to optimize copysign only works if the target has impemented the optab, but it should work for those that have the libcall too. More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN then the change made them lose vectorization. The proper fix for this is to treat the IFN the same as the tree EXPR and to enhance expand_COPYSIGN to also support vector calls. I have such a patch for GCC 15 but it's quite big and too invasive for stage-4. As such this is a minimal fix, just don't apply the transformation and leave targets which don't have the optab unoptimized. Targets list for check_effective_target_ifn_copysign was gotten by grepping for copysign and looking at the optab. gcc/ChangeLog: PR tree-optimization/112468 * doc/sourcebuild.texi: Document ifn_copysign. * match.pd: Only apply transformation if target supports the IFN. gcc/testsuite/ChangeLog: PR tree-optimization/112468 * gcc.dg/fold-copysign-1.c: Modify tests based on if target supports IFN_COPYSIGN. * gcc.dg/pr55152-2.c: Likewise. * gcc.dg/tree-ssa/abs-4.c: Likewise. * gcc.dg/tree-ssa/backprop-6.c: Likewise. * gcc.dg/tree-ssa/copy-sign-2.c: Likewise. * gcc.dg/tree-ssa/mult-abs-2.c: Likewise. * lib/target-supports.exp (check_effective_target_ifn_copysign): New. --- gcc/doc/sourcebuild.texi | 4 ++++ gcc/match.pd | 22 ++++++++++++++----- gcc/testsuite/gcc.dg/fold-copysign-1.c | 7 ++++-- gcc/testsuite/gcc.dg/pr55152-2.c | 6 ++++-- gcc/testsuite/gcc.dg/tree-ssa/abs-4.c | 9 +++++--- gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c | 9 +++++--- gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c | 6 ++++-- gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c | 4 ++-- gcc/testsuite/lib/target-supports.exp | 24 +++++++++++++++++++++ 9 files changed, 72 insertions(+), 19 deletions(-) diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 3a394e7739b6..2a96927e8bb5 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2681,6 +2681,10 @@ Target requires a command line argument to enable a SIMD instruction set. @item xorsign Target supports the xorsign optab expansion. +@item ifn_copysign +Target supports the copysign optab expansion of float and double for +both scalar and vector modes. + @end table @subsubsection Environment attributes diff --git a/gcc/match.pd b/gcc/match.pd index 7b4b15acc412..d75babd86c25 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1160,18 +1160,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (hypots @0 (copysigns @1 @2)) (hypots @0 @1)))) -/* copysign(x, CST) -> abs (x). */ +/* copysign(x, CST) -> abs (x). If the target does not + support the copysign optab then canonicalize + copysign(x, -CST) -> fneg (abs (x)). */ (for copysigns (COPYSIGN_ALL) (simplify (copysigns @0 REAL_CST@1) (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1))) - (abs @0)))) + (abs @0) +#if GIMPLE + (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type, + OPTIMIZE_FOR_BOTH)) + (negate (abs @0))) +#endif + ))) -/* Transform fneg (fabs (X)) -> copysign (X, -1). */ +#if GIMPLE +/* Transform fneg (fabs (X)) -> copysign (X, -1) as the canonical + representation if the target supports the copysign optab. */ (simplify (negate (abs @0)) - (IFN_COPYSIGN @0 { build_minus_one_cst (type); })) - + (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type, + OPTIMIZE_FOR_BOTH)) + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))) +#endif /* copysign(copysign(x, y), z) -> copysign(x, z). */ (for copysigns (COPYSIGN_ALL) (simplify diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c index f9cafd14ab05..96b80c733794 100644 --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-cddce1" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ double foo (double x) { @@ -12,5 +13,7 @@ double bar (double x) return __builtin_copysign (x, minuszero); } -/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c index 605f202ed6bc..24068cffa4a8 100644 --- a/gcc/testsuite/gcc.dg/pr55152-2.c +++ b/gcc/testsuite/gcc.dg/pr55152-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ double g (double a) { @@ -10,5 +11,6 @@ int f(int a) return (a<-a)?a:-a; } -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c index e1b825f37f69..80fa448df125 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* PR tree-optimization/109829 */ float abs_f(float x) { return __builtin_signbit(x) ? x : -x; } @@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; } /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */ /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c index c3a138642d6f..4087ba93018b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-backprop-details" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ void start (void *); void end (void *); @@ -26,6 +27,8 @@ TEST_FUNCTION (float, f) TEST_FUNCTION (double, ) TEST_FUNCTION (long double, l) -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */ -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */ -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c index e5d565c4b983..e43bc315bef2 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c @@ -1,4 +1,5 @@ /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* { dg-do compile } */ float f(float x) { @@ -10,5 +11,6 @@ float f1(float x) float t = __builtin_copysignf (1.0f, -x); return x * t; } -/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c index a22896b21c8b..675127cfe56b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c @@ -34,5 +34,5 @@ float i1(float x) { return x * (x <= 0.f ? 1.f : -1.f); } -/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */ -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */ + +/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4e329d207d77..43aa24a7c1d2 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7926,6 +7926,30 @@ proc check_effective_target_xorsign { } { || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}] } +# Return 1 if the target plus current options supports folding of +# copysign into IFN_COPYSIGN. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_ifn_copysign { } { + return [check_cached_effective_target_indexed ifn_copysign { + expr { + (([istarget i?86-*-*] || [istarget x86_64-*-*]) + && [is-effective-target sse]) + || ([istarget loongarch*-*-*] + && [check_effective_target_hard_float]) + || ([istarget powerpc*-*-*] + && ![istarget powerpc-*-linux*paired*]) + || [istarget alpha*-*-*] + || [istarget aarch64*-*-*] + || [is-effective-target arm_neon] + || ([istarget s390*-*-*] + && [check_effective_target_s390_vx]) + || ([istarget riscv*-*-*] + && [check_effective_target_hard_float]) + }}] +} + # Return 1 if the target plus current options supports a vector # widening summation of *short* args into *int* result, 0 otherwise. # -- GitLab