From 9c4397cafc5ded9b008a92a55d4e5207e1c2e4e4 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 16 Jan 2025 09:20:15 +0100
Subject: [PATCH] tailc: Virtually undo IPA-VRP return value optimization for
 tail calls [PR118430]

When we have return somefn (whatever); where somefn is normally tail
callable and IPA-VRP determines somefn returns a singleton range, VRP
just changes the IL to
  somefn (whatever);
  return 42;
(or whatever the value in that range is).  The introduction of IPA-VRP
return value tracking then effectively regresses the tail call optimization.
This is even more important if the call is [[gnu::musttail]].

So, the following patch queries IPA-VRP whether a function returns singleton
range and if so and the value returned is identical to that, marks the
call as [tail call] anyway.  If expansion decides it can't use the tail
call, we'll still expand the return 42; or similar statement, and if it
decides it can use the tail call, that part will be ignored and we'll emit
normal tail call.

The reason it works is that the expand pass relies on the tailc pass to
do its job properly.
E.g. when we have
  <bb 2> [local count: 1073741824]:
  foo (x_2(D));
  baz (&v);
  v ={v} {CLOBBER(eos)};
  bar (x_2(D)); [tail call]
  return 1;
when expand_gimple_basic_block handles the bar (x_2(D)); call, it uses
          if (call_stmt && gimple_call_tail_p (call_stmt))
            {
              bool can_fallthru;
              new_bb = expand_gimple_tailcall (bb, call_stmt, &can_fallthru);
              if (new_bb)
                {
                  if (can_fallthru)
                    bb = new_bb;
                  else
                    {
                      currently_expanding_gimple_stmt = NULL;
                      return new_bb;
                    }
                }
            }
As it is actually tail callable during expansion of the bar (x_2(D)); call
stmt, expand_gimple_tailbb returns non-NULL and sets can_fallthru to false,
plus emits
;; bar (x_2(D)); [tail call]

(insn 11 10 12 2 (set (reg:SI 5 di)
        (reg/v:SI 99 [ x ])) "pr118430.c":35:10 -1
     (nil))

(call_insn/j 12 11 13 2 (set (reg:SI 0 ax)
        (call (mem:QI (symbol_ref:DI ("bar") [flags 0x3]  <function_decl 0x7fb39020bd00 bar>) [0 bar S1 A8])
            (const_int 0 [0]))) "pr118430.c":35:10 -1
     (expr_list:REG_CALL_DECL (symbol_ref:DI ("bar") [flags 0x3]  <function_decl 0x7fb39020bd00 bar>)
        (expr_list:REG_EH_REGION (const_int 0 [0])
            (nil)))
    (expr_list:SI (use (reg:SI 5 di))
        (nil)))

(barrier 13 12 0)
Because it doesn't fallthru, no further statements in the same bb are
expanded.  Now, if the bb with return happened to be in some other basic
block from the [tail call], it could be expanded but because the bb with
tail call ends with a barrier, it doesn't fall thru there and if nothing
else could reach it, we'd remove the unreachable bb RSN.

2025-01-16  Jakub Jelinek  <jakub@redhat.com>
	    Andrew Pinski  <quic_apinski@quicinc.com>

	PR tree-optimization/118430
	* tree-tailcall.cc: Include gimple-range.h, alloc-pool.h, sreal.h,
	symbol-summary.h, ipa-cp.h and ipa-prop.h.
	(find_tail_calls): If ass_var is NULL and ret_var is not, check if
	IPA-VRP has not found singleton return range for it.  In that case,
	don't punt if ret_var is the only value in that range.  Adjust the
	maybe_error_musttail message otherwise to diagnose different value
	being returned from the caller and callee rather than using return
	slot.  Formatting fixes.

	* c-c++-common/musttail14.c: New test.
	* c-c++-common/pr118430.c: New test.
---
 gcc/testsuite/c-c++-common/musttail14.c | 90 +++++++++++++++++++++++++
 gcc/testsuite/c-c++-common/pr118430.c   | 89 ++++++++++++++++++++++++
 gcc/tree-tailcall.cc                    | 56 ++++++++++++---
 3 files changed, 224 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/musttail14.c
 create mode 100644 gcc/testsuite/c-c++-common/pr118430.c

diff --git a/gcc/testsuite/c-c++-common/musttail14.c b/gcc/testsuite/c-c++-common/musttail14.c
new file mode 100644
index 000000000000..e2ab20bc5a46
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail14.c
@@ -0,0 +1,90 @@
+/* PR tree-optimization/118430 */
+/* { dg-do compile { target musttail } } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "  bar \\\(\[^\n\r]\*\\\); \\\[tail call\\\] \\\[must tail call\\\]" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "  freddy \\\(\[^\n\r]\*\\\); \\\[tail call\\\] \\\[must tail call\\\]" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "  (?:bar|freddy) \\\(\[^\n\r]\*\\\); \\\[tail call\\\]" 2 "optimized" } } */
+
+__attribute__ ((noipa)) void
+foo (int x)
+{
+  (void) x;
+}
+
+__attribute__ ((noinline)) int
+bar (int x)
+{
+  foo (x);
+  return 1;
+}
+
+__attribute__ ((noinline)) int
+baz (int *x)
+{
+  foo (*x);
+  return 2;
+}
+
+__attribute__((noipa)) int
+qux (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  [[gnu::musttail]]
+  return bar (x);
+}
+
+__attribute__((noipa)) int
+corge (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  return bar (x) + 1;
+}
+
+__attribute__ ((noinline)) float
+freddy (int x)
+{
+  foo (x);
+  return 1.75f;
+}
+
+__attribute__((noipa)) float
+garply (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  [[gnu::musttail]]
+  return freddy (x);
+}
+
+__attribute__((noipa)) float
+quux (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  return freddy (x) + 0.25f;
+}
+
+int v;
+
+int
+main ()
+{
+  qux (v);
+  corge (v);
+  garply (v);
+  quux (v);
+}
diff --git a/gcc/testsuite/c-c++-common/pr118430.c b/gcc/testsuite/c-c++-common/pr118430.c
new file mode 100644
index 000000000000..e14592f79807
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr118430.c
@@ -0,0 +1,89 @@
+/* PR tree-optimization/118430 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "  bar \\\(\[^\n\r]\*\\\); \\\[tail call\\\]" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "  freddy \\\(\[^\n\r]\*\\\); \\\[tail call\\\]" 2 "optimized" } } */
+
+__attribute__ ((noipa)) void
+foo (int x)
+{
+  (void) x;
+}
+
+__attribute__ ((noinline)) int
+bar (int x)
+{
+  foo (x);
+  return 1;
+}
+
+__attribute__ ((noinline)) int
+baz (int *x)
+{
+  foo (*x);
+  return 2;
+}
+
+__attribute__((noipa)) int
+qux (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  return bar (x);
+}
+
+__attribute__((noipa)) int
+corge (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  bar (x);
+  return 1;
+}
+
+__attribute__ ((noinline)) float
+freddy (int x)
+{
+  foo (x);
+  return 1.75f;
+}
+
+__attribute__((noipa)) float
+garply (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  return freddy (x);
+}
+
+__attribute__((noipa)) float
+quux (int x)
+{
+  {
+    int v;
+    foo (x);
+    baz (&v);
+  }
+  freddy (x);
+  return 1.75f;
+}
+
+int v;
+
+int
+main ()
+{
+  qux (v);
+  corge (v);
+  garply (v);
+  quux (v);
+}
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index d6d7eb4b47c3..f97df31eb3cf 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -45,6 +45,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-utils.h"
 #include "tree-ssa-live.h"
 #include "diagnostic-core.h"
+#include "gimple-range.h"
+#include "alloc-pool.h"
+#include "sreal.h"
+#include "symbol-summary.h"
+#include "ipa-cp.h"
+#include "ipa-prop.h"
 
 /* The file implements the tail recursion elimination.  It is also used to
    analyze the tail calls in general, passing the results to the rtl level
@@ -483,7 +489,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
 	{
 	  if (dump_file)
 	    fprintf (dump_file, "Basic block %d has extra exit edges\n",
-			    bb->index);
+		     bb->index);
 	  return;
 	}
       if (!cfun->has_musttail)
@@ -517,7 +523,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
 	  if (bad_stmt)
 	    {
 	      maybe_error_musttail (call,
-			      _("memory reference or volatile after call"));
+				    _("memory reference or volatile after "
+				      "call"));
 	      return;
 	    }
 	  ass_var = gimple_call_lhs (call);
@@ -597,10 +604,10 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
   {
     if (stmt == last_stmt)
       maybe_error_musttail (call,
-			  _("call may throw exception that does not propagate"));
+			    _("call may throw exception that does not "
+			      "propagate"));
     else
-      maybe_error_musttail (call,
-			  _("code between call and return"));
+      maybe_error_musttail (call, _("code between call and return"));
     return;
   }
 
@@ -715,7 +722,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
 	    {
 	      if (local_live_vars)
 		BITMAP_FREE (local_live_vars);
-	      maybe_error_musttail (call, _("call invocation refers to locals"));
+	      maybe_error_musttail (call,
+				    _("call invocation refers to locals"));
 	      return;
 	    }
 	  else
@@ -724,7 +732,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
 	      if (bitmap_bit_p (local_live_vars, *v))
 		{
 		  BITMAP_FREE (local_live_vars);
-		  maybe_error_musttail (call, _("call invocation refers to locals"));
+		  maybe_error_musttail (call,
+					_("call invocation refers to locals"));
 		  return;
 		}
 	    }
@@ -833,15 +842,39 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
       && (ret_var != ass_var
 	  && !(is_empty_type (TREE_TYPE (ret_var)) && !ass_var)))
     {
-      maybe_error_musttail (call, _("call uses return slot"));
-      return;
+      bool ok = false;
+      value_range val;
+      tree valr;
+      /* If IPA-VRP proves called function always returns a singleton range,
+	 the return value is replaced by the only value in that range.
+	 For tail call purposes, pretend such replacement didn't happen.  */
+      if (ass_var == NULL_TREE
+	  && !tail_recursion
+	  && TREE_CONSTANT (ret_var))
+	if (tree type = gimple_range_type (call))
+	  if (tree callee = gimple_call_fndecl (call))
+	    if ((INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
+		&& useless_type_conversion_p (TREE_TYPE (TREE_TYPE (callee)),
+					      type)
+		&& useless_type_conversion_p (TREE_TYPE (ret_var), type)
+		&& ipa_return_value_range (val, callee)
+		&& val.singleton_p (&valr)
+		&& operand_equal_p (ret_var, valr, 0))
+	      ok = true;
+      if (!ok)
+	{
+	  maybe_error_musttail (call,
+				_("call and return value are different"));
+	  return;
+	}
     }
 
   /* If this is not a tail recursive call, we cannot handle addends or
      multiplicands.  */
   if (!tail_recursion && (m || a))
     {
-      maybe_error_musttail (call, _("operations after non tail recursive call"));
+      maybe_error_musttail (call,
+			    _("operations after non tail recursive call"));
       return;
     }
 
@@ -849,7 +882,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
   if (m && POINTER_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
     {
       maybe_error_musttail (call,
-		      _("tail recursion with pointers can only use additions"));
+			    _("tail recursion with pointers can only use "
+			      "additions"));
       return;
     }
 
-- 
GitLab