From 87217bea3aa556779a111cec0ef45dcefd1736f6 Mon Sep 17 00:00:00 2001
From: Jennifer Schmitz <jschmitz@nvidia.com>
Date: Fri, 30 Aug 2024 06:56:52 -0700
Subject: [PATCH] SVE intrinsics: Refactor const_binop to allow constant
 folding of intrinsics.

This patch sets the stage for constant folding of binary operations for SVE
intrinsics:
In fold-const.cc, the code for folding vector constants was moved from
const_binop to a new function vector_const_binop. This function takes a
function pointer as argument specifying how to fold the vector elements.
The intention is to call vector_const_binop from the backend with an
aarch64-specific callback function.
The code in const_binop for folding operations where the first operand is a
vector constant and the second argument is an integer constant was also moved
into vector_const_binop to to allow folding of binary SVE intrinsics where
the second operand is an integer (_n).
To allow calling poly_int_binop from the backend, the latter was made public.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>

gcc/
	* fold-const.h: Declare vector_const_binop.
	* fold-const.cc (const_binop): Remove cases for vector constants.
	(vector_const_binop): New function that folds vector constants
	element-wise.
	(int_const_binop): Remove call to wide_int_binop.
	(poly_int_binop): Add call to wide_int_binop.
---
 gcc/fold-const.cc | 189 ++++++++++++++++++++++++----------------------
 gcc/fold-const.h  |   5 ++
 2 files changed, 105 insertions(+), 89 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 81dcc13925a7..2ada59f712bb 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1236,13 +1236,24 @@ can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
    produce a new constant in RES.  Return FALSE if we don't know how
    to evaluate CODE at compile-time.  */
 
-static bool
+bool
 poly_int_binop (poly_wide_int &res, enum tree_code code,
 		const_tree arg1, const_tree arg2,
 		signop sign, wi::overflow_type *overflow)
 {
   gcc_assert (NUM_POLY_INT_COEFFS != 1);
   gcc_assert (poly_int_tree_p (arg1) && poly_int_tree_p (arg2));
+
+  if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg2) == INTEGER_CST)
+    {
+      wide_int warg1 = wi::to_wide (arg1), wi_res;
+      wide_int warg2 = wi::to_wide (arg2, TYPE_PRECISION (TREE_TYPE (arg1)));
+      if (!wide_int_binop (wi_res, code, warg1, warg2, sign, overflow))
+	return NULL_TREE;
+      res = wi_res;
+      return true;
+    }
+
   switch (code)
     {
     case PLUS_EXPR:
@@ -1304,17 +1315,9 @@ int_const_binop (enum tree_code code, const_tree arg1, const_tree arg2,
   signop sign = TYPE_SIGN (type);
   wi::overflow_type overflow = wi::OVF_NONE;
 
-  if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg2) == INTEGER_CST)
-    {
-      wide_int warg1 = wi::to_wide (arg1), res;
-      wide_int warg2 = wi::to_wide (arg2, TYPE_PRECISION (type));
-      if (!wide_int_binop (res, code, warg1, warg2, sign, &overflow))
-	return NULL_TREE;
-      poly_res = res;
-    }
-  else if (!poly_int_tree_p (arg1)
-	   || !poly_int_tree_p (arg2)
-	   || !poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
+  if (!poly_int_tree_p (arg1)
+      || !poly_int_tree_p (arg2)
+      || !poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
     return NULL_TREE;
   return force_fit_type (type, poly_res, overflowable,
 			 (((sign == SIGNED || overflowable == -1)
@@ -1365,6 +1368,90 @@ simplify_const_binop (tree_code code, tree op, tree other_op,
   return NULL_TREE;
 }
 
+/* If ARG1 and ARG2 are constants, and if performing CODE on them would
+   be an elementwise vector operation, try to fold the operation to a
+   constant vector, using ELT_CONST_BINOP to fold each element.  Return
+   the folded value on success, otherwise return null.  */
+tree
+vector_const_binop (tree_code code, tree arg1, tree arg2,
+		    tree (*elt_const_binop) (enum tree_code, tree, tree))
+{
+  if (TREE_CODE (arg1) == VECTOR_CST && TREE_CODE (arg2) == VECTOR_CST
+      && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)),
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2))))
+    {
+      tree type = TREE_TYPE (arg1);
+      bool step_ok_p;
+      if (VECTOR_CST_STEPPED_P (arg1)
+	  && VECTOR_CST_STEPPED_P (arg2))
+      /* We can operate directly on the encoding if:
+
+      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
+      implies
+      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
+
+      Addition and subtraction are the supported operators
+      for which this is true.  */
+	step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR);
+      else if (VECTOR_CST_STEPPED_P (arg1))
+      /* We can operate directly on stepped encodings if:
+
+      a3 - a2 == a2 - a1
+      implies:
+      (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
+
+      which is true if (x -> x op c) distributes over addition.  */
+	step_ok_p = distributes_over_addition_p (code, 1);
+      else
+      /* Similarly in reverse.  */
+	step_ok_p = distributes_over_addition_p (code, 2);
+      tree_vector_builder elts;
+      if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p))
+	return NULL_TREE;
+      unsigned int count = elts.encoded_nelts ();
+      for (unsigned int i = 0; i < count; ++i)
+	{
+	  tree elem1 = VECTOR_CST_ELT (arg1, i);
+	  tree elem2 = VECTOR_CST_ELT (arg2, i);
+
+	  tree elt = elt_const_binop (code, elem1, elem2);
+
+      /* It is possible that const_binop cannot handle the given
+      code and return NULL_TREE */
+	  if (elt == NULL_TREE)
+	    return NULL_TREE;
+	  elts.quick_push (elt);
+	}
+
+      return elts.build ();
+    }
+
+  if (TREE_CODE (arg1) == VECTOR_CST
+      && TREE_CODE (arg2) == INTEGER_CST)
+    {
+      tree type = TREE_TYPE (arg1);
+      bool step_ok_p = distributes_over_addition_p (code, 1);
+      tree_vector_builder elts;
+      if (!elts.new_unary_operation (type, arg1, step_ok_p))
+	return NULL_TREE;
+      unsigned int count = elts.encoded_nelts ();
+      for (unsigned int i = 0; i < count; ++i)
+	{
+	  tree elem1 = VECTOR_CST_ELT (arg1, i);
+
+	  tree elt = elt_const_binop (code, elem1, arg2);
+
+	  /* It is possible that const_binop cannot handle the given
+	     code and return NULL_TREE.  */
+	  if (elt == NULL_TREE)
+	    return NULL_TREE;
+	  elts.quick_push (elt);
+	}
+
+      return elts.build ();
+    }
+  return NULL_TREE;
+}
 
 /* Combine two constants ARG1 and ARG2 under operation CODE to produce a new
    constant.  We assume ARG1 and ARG2 have the same data type, or at least
@@ -1677,83 +1764,7 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
       && (simplified = simplify_const_binop (code, arg2, arg1, 1)))
     return simplified;
 
-  if (TREE_CODE (arg1) == VECTOR_CST
-      && TREE_CODE (arg2) == VECTOR_CST
-      && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)),
-		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2))))
-    {
-      tree type = TREE_TYPE (arg1);
-      bool step_ok_p;
-      if (VECTOR_CST_STEPPED_P (arg1)
-	  && VECTOR_CST_STEPPED_P (arg2))
-	/* We can operate directly on the encoding if:
-
-	      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
-	    implies
-	      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
-
-	   Addition and subtraction are the supported operators
-	   for which this is true.  */
-	step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR);
-      else if (VECTOR_CST_STEPPED_P (arg1))
-	/* We can operate directly on stepped encodings if:
-
-	     a3 - a2 == a2 - a1
-	   implies:
-	     (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
-
-	   which is true if (x -> x op c) distributes over addition.  */
-	step_ok_p = distributes_over_addition_p (code, 1);
-      else
-	/* Similarly in reverse.  */
-	step_ok_p = distributes_over_addition_p (code, 2);
-      tree_vector_builder elts;
-      if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p))
-	return NULL_TREE;
-      unsigned int count = elts.encoded_nelts ();
-      for (unsigned int i = 0; i < count; ++i)
-	{
-	  tree elem1 = VECTOR_CST_ELT (arg1, i);
-	  tree elem2 = VECTOR_CST_ELT (arg2, i);
-
-	  tree elt = const_binop (code, elem1, elem2);
-
-	  /* It is possible that const_binop cannot handle the given
-	     code and return NULL_TREE */
-	  if (elt == NULL_TREE)
-	    return NULL_TREE;
-	  elts.quick_push (elt);
-	}
-
-      return elts.build ();
-    }
-
-  /* Shifts allow a scalar offset for a vector.  */
-  if (TREE_CODE (arg1) == VECTOR_CST
-      && TREE_CODE (arg2) == INTEGER_CST)
-    {
-      tree type = TREE_TYPE (arg1);
-      bool step_ok_p = distributes_over_addition_p (code, 1);
-      tree_vector_builder elts;
-      if (!elts.new_unary_operation (type, arg1, step_ok_p))
-	return NULL_TREE;
-      unsigned int count = elts.encoded_nelts ();
-      for (unsigned int i = 0; i < count; ++i)
-	{
-	  tree elem1 = VECTOR_CST_ELT (arg1, i);
-
-	  tree elt = const_binop (code, elem1, arg2);
-
-	  /* It is possible that const_binop cannot handle the given
-	     code and return NULL_TREE.  */
-	  if (elt == NULL_TREE)
-	    return NULL_TREE;
-	  elts.quick_push (elt);
-	}
-
-      return elts.build ();
-    }
-  return NULL_TREE;
+  return vector_const_binop (code, arg1, arg2, const_binop);
 }
 
 /* Overload that adds a TYPE parameter to be able to dispatch
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index b82ef137e2f2..3e3998b57b04 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -126,6 +126,9 @@ extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
 extern bool wide_int_binop (wide_int &res, enum tree_code,
 			    const wide_int &arg1, const wide_int &arg2,
 			    signop, wi::overflow_type *);
+extern bool poly_int_binop (poly_wide_int &res, enum tree_code,
+			    const_tree, const_tree, signop,
+			    wi::overflow_type *);
 extern tree int_const_binop (enum tree_code, const_tree, const_tree, int = 1);
 #define build_fold_addr_expr(T)\
         build_fold_addr_expr_loc (UNKNOWN_LOCATION, (T))
@@ -218,6 +221,8 @@ extern bool simple_condition_p (tree);
 extern tree exact_inverse (tree, tree);
 extern bool expr_not_equal_to (tree t, const wide_int &);
 extern tree const_unop (enum tree_code, tree, tree);
+extern tree vector_const_binop (enum tree_code, tree, tree,
+				tree (*) (enum tree_code, tree, tree));
 extern tree const_binop (enum tree_code, tree, tree, tree);
 extern bool negate_mathfn_p (combined_fn);
 extern const char *getbyterep (tree, unsigned HOST_WIDE_INT *);
-- 
GitLab