From 87217bea3aa556779a111cec0ef45dcefd1736f6 Mon Sep 17 00:00:00 2001 From: Jennifer Schmitz <jschmitz@nvidia.com> Date: Fri, 30 Aug 2024 06:56:52 -0700 Subject: [PATCH] SVE intrinsics: Refactor const_binop to allow constant folding of intrinsics. This patch sets the stage for constant folding of binary operations for SVE intrinsics: In fold-const.cc, the code for folding vector constants was moved from const_binop to a new function vector_const_binop. This function takes a function pointer as argument specifying how to fold the vector elements. The intention is to call vector_const_binop from the backend with an aarch64-specific callback function. The code in const_binop for folding operations where the first operand is a vector constant and the second argument is an integer constant was also moved into vector_const_binop to to allow folding of binary SVE intrinsics where the second operand is an integer (_n). To allow calling poly_int_binop from the backend, the latter was made public. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * fold-const.h: Declare vector_const_binop. * fold-const.cc (const_binop): Remove cases for vector constants. (vector_const_binop): New function that folds vector constants element-wise. (int_const_binop): Remove call to wide_int_binop. (poly_int_binop): Add call to wide_int_binop. --- gcc/fold-const.cc | 189 ++++++++++++++++++++++++---------------------- gcc/fold-const.h | 5 ++ 2 files changed, 105 insertions(+), 89 deletions(-) diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 81dcc13925a7..2ada59f712bb 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -1236,13 +1236,24 @@ can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res) produce a new constant in RES. Return FALSE if we don't know how to evaluate CODE at compile-time. */ -static bool +bool poly_int_binop (poly_wide_int &res, enum tree_code code, const_tree arg1, const_tree arg2, signop sign, wi::overflow_type *overflow) { gcc_assert (NUM_POLY_INT_COEFFS != 1); gcc_assert (poly_int_tree_p (arg1) && poly_int_tree_p (arg2)); + + if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg2) == INTEGER_CST) + { + wide_int warg1 = wi::to_wide (arg1), wi_res; + wide_int warg2 = wi::to_wide (arg2, TYPE_PRECISION (TREE_TYPE (arg1))); + if (!wide_int_binop (wi_res, code, warg1, warg2, sign, overflow)) + return NULL_TREE; + res = wi_res; + return true; + } + switch (code) { case PLUS_EXPR: @@ -1304,17 +1315,9 @@ int_const_binop (enum tree_code code, const_tree arg1, const_tree arg2, signop sign = TYPE_SIGN (type); wi::overflow_type overflow = wi::OVF_NONE; - if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg2) == INTEGER_CST) - { - wide_int warg1 = wi::to_wide (arg1), res; - wide_int warg2 = wi::to_wide (arg2, TYPE_PRECISION (type)); - if (!wide_int_binop (res, code, warg1, warg2, sign, &overflow)) - return NULL_TREE; - poly_res = res; - } - else if (!poly_int_tree_p (arg1) - || !poly_int_tree_p (arg2) - || !poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) + if (!poly_int_tree_p (arg1) + || !poly_int_tree_p (arg2) + || !poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) return NULL_TREE; return force_fit_type (type, poly_res, overflowable, (((sign == SIGNED || overflowable == -1) @@ -1365,6 +1368,90 @@ simplify_const_binop (tree_code code, tree op, tree other_op, return NULL_TREE; } +/* If ARG1 and ARG2 are constants, and if performing CODE on them would + be an elementwise vector operation, try to fold the operation to a + constant vector, using ELT_CONST_BINOP to fold each element. Return + the folded value on success, otherwise return null. */ +tree +vector_const_binop (tree_code code, tree arg1, tree arg2, + tree (*elt_const_binop) (enum tree_code, tree, tree)) +{ + if (TREE_CODE (arg1) == VECTOR_CST && TREE_CODE (arg2) == VECTOR_CST + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) + { + tree type = TREE_TYPE (arg1); + bool step_ok_p; + if (VECTOR_CST_STEPPED_P (arg1) + && VECTOR_CST_STEPPED_P (arg2)) + /* We can operate directly on the encoding if: + + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 + implies + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) + + Addition and subtraction are the supported operators + for which this is true. */ + step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR); + else if (VECTOR_CST_STEPPED_P (arg1)) + /* We can operate directly on stepped encodings if: + + a3 - a2 == a2 - a1 + implies: + (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) + + which is true if (x -> x op c) distributes over addition. */ + step_ok_p = distributes_over_addition_p (code, 1); + else + /* Similarly in reverse. */ + step_ok_p = distributes_over_addition_p (code, 2); + tree_vector_builder elts; + if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p)) + return NULL_TREE; + unsigned int count = elts.encoded_nelts (); + for (unsigned int i = 0; i < count; ++i) + { + tree elem1 = VECTOR_CST_ELT (arg1, i); + tree elem2 = VECTOR_CST_ELT (arg2, i); + + tree elt = elt_const_binop (code, elem1, elem2); + + /* It is possible that const_binop cannot handle the given + code and return NULL_TREE */ + if (elt == NULL_TREE) + return NULL_TREE; + elts.quick_push (elt); + } + + return elts.build (); + } + + if (TREE_CODE (arg1) == VECTOR_CST + && TREE_CODE (arg2) == INTEGER_CST) + { + tree type = TREE_TYPE (arg1); + bool step_ok_p = distributes_over_addition_p (code, 1); + tree_vector_builder elts; + if (!elts.new_unary_operation (type, arg1, step_ok_p)) + return NULL_TREE; + unsigned int count = elts.encoded_nelts (); + for (unsigned int i = 0; i < count; ++i) + { + tree elem1 = VECTOR_CST_ELT (arg1, i); + + tree elt = elt_const_binop (code, elem1, arg2); + + /* It is possible that const_binop cannot handle the given + code and return NULL_TREE. */ + if (elt == NULL_TREE) + return NULL_TREE; + elts.quick_push (elt); + } + + return elts.build (); + } + return NULL_TREE; +} /* Combine two constants ARG1 and ARG2 under operation CODE to produce a new constant. We assume ARG1 and ARG2 have the same data type, or at least @@ -1677,83 +1764,7 @@ const_binop (enum tree_code code, tree arg1, tree arg2) && (simplified = simplify_const_binop (code, arg2, arg1, 1))) return simplified; - if (TREE_CODE (arg1) == VECTOR_CST - && TREE_CODE (arg2) == VECTOR_CST - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), - TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) - { - tree type = TREE_TYPE (arg1); - bool step_ok_p; - if (VECTOR_CST_STEPPED_P (arg1) - && VECTOR_CST_STEPPED_P (arg2)) - /* We can operate directly on the encoding if: - - a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 - implies - (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) - - Addition and subtraction are the supported operators - for which this is true. */ - step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR); - else if (VECTOR_CST_STEPPED_P (arg1)) - /* We can operate directly on stepped encodings if: - - a3 - a2 == a2 - a1 - implies: - (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) - - which is true if (x -> x op c) distributes over addition. */ - step_ok_p = distributes_over_addition_p (code, 1); - else - /* Similarly in reverse. */ - step_ok_p = distributes_over_addition_p (code, 2); - tree_vector_builder elts; - if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p)) - return NULL_TREE; - unsigned int count = elts.encoded_nelts (); - for (unsigned int i = 0; i < count; ++i) - { - tree elem1 = VECTOR_CST_ELT (arg1, i); - tree elem2 = VECTOR_CST_ELT (arg2, i); - - tree elt = const_binop (code, elem1, elem2); - - /* It is possible that const_binop cannot handle the given - code and return NULL_TREE */ - if (elt == NULL_TREE) - return NULL_TREE; - elts.quick_push (elt); - } - - return elts.build (); - } - - /* Shifts allow a scalar offset for a vector. */ - if (TREE_CODE (arg1) == VECTOR_CST - && TREE_CODE (arg2) == INTEGER_CST) - { - tree type = TREE_TYPE (arg1); - bool step_ok_p = distributes_over_addition_p (code, 1); - tree_vector_builder elts; - if (!elts.new_unary_operation (type, arg1, step_ok_p)) - return NULL_TREE; - unsigned int count = elts.encoded_nelts (); - for (unsigned int i = 0; i < count; ++i) - { - tree elem1 = VECTOR_CST_ELT (arg1, i); - - tree elt = const_binop (code, elem1, arg2); - - /* It is possible that const_binop cannot handle the given - code and return NULL_TREE. */ - if (elt == NULL_TREE) - return NULL_TREE; - elts.quick_push (elt); - } - - return elts.build (); - } - return NULL_TREE; + return vector_const_binop (code, arg1, arg2, const_binop); } /* Overload that adds a TYPE parameter to be able to dispatch diff --git a/gcc/fold-const.h b/gcc/fold-const.h index b82ef137e2f2..3e3998b57b04 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -126,6 +126,9 @@ extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &); extern bool wide_int_binop (wide_int &res, enum tree_code, const wide_int &arg1, const wide_int &arg2, signop, wi::overflow_type *); +extern bool poly_int_binop (poly_wide_int &res, enum tree_code, + const_tree, const_tree, signop, + wi::overflow_type *); extern tree int_const_binop (enum tree_code, const_tree, const_tree, int = 1); #define build_fold_addr_expr(T)\ build_fold_addr_expr_loc (UNKNOWN_LOCATION, (T)) @@ -218,6 +221,8 @@ extern bool simple_condition_p (tree); extern tree exact_inverse (tree, tree); extern bool expr_not_equal_to (tree t, const wide_int &); extern tree const_unop (enum tree_code, tree, tree); +extern tree vector_const_binop (enum tree_code, tree, tree, + tree (*) (enum tree_code, tree, tree)); extern tree const_binop (enum tree_code, tree, tree, tree); extern bool negate_mathfn_p (combined_fn); extern const char *getbyterep (tree, unsigned HOST_WIDE_INT *); -- GitLab