diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c new file mode 100644 index 0000000000000000000000000000000000000000..ca7204cbc3cea636183408e24d7dd36d702ffdb2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__) + +int count1 (unsigned char b) { + int c = 0; + + while (b) { + b &= b - 1; + c++; + } + if (c <= PREC) + return 0; + else + return 34567; +} + +int count2 (unsigned char b) { + int c = 0; + + while (b) { + b &= b - 1; + c++; + } + if (c <= PREC - 1) + return 0; + else + return 76543; +} + +/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */ diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index 0af34e46580bb9a6f9b40e09c9f29b8454a4aaf6..fece876099c1687569d6351e7d2416ea6acae5b5 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2026,6 +2026,48 @@ number_of_iterations_cond (class loop *loop, return ret; } +/* Return an expression that computes the popcount of src. */ + +static tree +build_popcount_expr (tree src) +{ + tree fn; + int prec = TYPE_PRECISION (TREE_TYPE (src)); + int i_prec = TYPE_PRECISION (integer_type_node); + int li_prec = TYPE_PRECISION (long_integer_type_node); + int lli_prec = TYPE_PRECISION (long_long_integer_type_node); + if (prec <= i_prec) + fn = builtin_decl_implicit (BUILT_IN_POPCOUNT); + else if (prec == li_prec) + fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL); + else if (prec == lli_prec || prec == 2 * lli_prec) + fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL); + else + return NULL_TREE; + + tree utype = unsigned_type_for (TREE_TYPE (src)); + src = fold_convert (utype, src); + if (prec < i_prec) + src = fold_convert (unsigned_type_node, src); + tree call; + if (prec == 2 * lli_prec) + { + tree src1 = fold_convert (long_long_unsigned_type_node, + fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), + unshare_expr (src), + build_int_cst (integer_type_node, + lli_prec))); + tree src2 = fold_convert (long_long_unsigned_type_node, src); + tree call1 = build_call_expr (fn, 1, src1); + tree call2 = build_call_expr (fn, 1, src2); + call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2); + } + else + call = build_call_expr (fn, 1, src); + + return call; +} + /* Utility function to check if OP is defined by a stmt that is a val - 1. */ @@ -2041,45 +2083,18 @@ ssa_defined_by_minus_one_stmt_p (tree op, tree val) && integer_minus_onep (gimple_assign_rhs2 (stmt))); } -/* See if LOOP is a popcout implementation, determine NITER for the loop +/* See comment below for number_of_iterations_bitcount. + For popcount, we have: - We match: - <bb 2> - goto <bb 4> + modify: + _1 = iv_1 + -1 + iv_2 = iv_1 & _1 - <bb 3> - _1 = b_11 + -1 - b_6 = _1 & b_11 - - <bb 4> - b_11 = PHI <b_5(D)(2), b_6(3)> + test: + if (iv != 0) - exit block - if (b_11 != 0) - goto <bb 3> - else - goto <bb 5> - - OR we match copy-header version: - if (b_5 != 0) - goto <bb 3> - else - goto <bb 4> - - <bb 3> - b_11 = PHI <b_5(2), b_6(3)> - _1 = b_11 + -1 - b_6 = _1 & b_11 - - exit block - if (b_6 != 0) - goto <bb 3> - else - goto <bb 4> - - If popcount pattern, update NITER accordingly. - i.e., set NITER to __builtin_popcount (b) - return true if we did, false otherwise. + modification count: + popcount (src) */ @@ -2088,138 +2103,150 @@ number_of_iterations_popcount (loop_p loop, edge exit, enum tree_code code, class tree_niter_desc *niter) { - bool adjust = true; - tree iter; + bool modify_before_test = true; HOST_WIDE_INT max; - adjust = true; - tree fn = NULL_TREE; - - /* Check loop terminating branch is like - if (b != 0). */ - gimple *stmt = last_stmt (exit->src); - if (!stmt - || gimple_code (stmt) != GIMPLE_COND + + /* Check that condition for staying inside the loop is like + if (iv != 0). */ + gimple *cond_stmt = last_stmt (exit->src); + if (!cond_stmt + || gimple_code (cond_stmt) != GIMPLE_COND || code != NE_EXPR - || !integer_zerop (gimple_cond_rhs (stmt)) - || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME) + || !integer_zerop (gimple_cond_rhs (cond_stmt)) + || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) return false; - gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt)); + tree iv_2 = gimple_cond_lhs (cond_stmt); + gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); - /* Depending on copy-header is performed, feeding PHI stmts might be in - the loop header or loop latch, handle this. */ - if (gimple_code (and_stmt) == GIMPLE_PHI - && gimple_bb (and_stmt) == loop->header - && gimple_phi_num_args (and_stmt) == 2 - && (TREE_CODE (gimple_phi_arg_def (and_stmt, + /* If the test comes before the iv modification, then these will actually be + iv_1 and a phi node. */ + if (gimple_code (iv_2_stmt) == GIMPLE_PHI + && gimple_bb (iv_2_stmt) == loop->header + && gimple_phi_num_args (iv_2_stmt) == 2 + && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx)) == SSA_NAME)) { - /* SSA used in exit condition is defined by PHI stmt - b_11 = PHI <b_5(D)(2), b_6(3)> - from the PHI stmt, get the and_stmt - b_6 = _1 & b_11. */ - tree t = gimple_phi_arg_def (and_stmt, loop_latch_edge (loop)->dest_idx); - and_stmt = SSA_NAME_DEF_STMT (t); - adjust = false; + /* iv_2 is actually one of the inputs to the phi. */ + iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx); + iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); + modify_before_test = false; } - /* Make sure it is indeed an and stmt (b_6 = _1 & b_11). */ - if (!is_gimple_assign (and_stmt) - || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR) + /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1). */ + if (!is_gimple_assign (iv_2_stmt) + || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR) return false; - tree b_11 = gimple_assign_rhs1 (and_stmt); - tree _1 = gimple_assign_rhs2 (and_stmt); + tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); + tree _1 = gimple_assign_rhs2 (iv_2_stmt); - /* Check that _1 is defined by _b11 + -1 (_1 = b_11 + -1). - Also make sure that b_11 is the same in and_stmt and _1 defining stmt. + /* Check that _1 is defined by (_1 = iv_1 + -1). + Also make sure that _1 is the same in and_stmt and _1 defining stmt. Also canonicalize if _1 and _b11 are revrsed. */ - if (ssa_defined_by_minus_one_stmt_p (b_11, _1)) - std::swap (b_11, _1); - else if (ssa_defined_by_minus_one_stmt_p (_1, b_11)) + if (ssa_defined_by_minus_one_stmt_p (iv_1, _1)) + std::swap (iv_1, _1); + else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1)) ; else return false; - /* Check the recurrence: - ... = PHI <b_5(2), b_6(3)>. */ - gimple *phi = SSA_NAME_DEF_STMT (b_11); + + /* Check the recurrence. */ + gimple *phi = SSA_NAME_DEF_STMT (iv_1); if (gimple_code (phi) != GIMPLE_PHI || (gimple_bb (phi) != loop_latch_edge (loop)->dest) - || (gimple_assign_lhs (and_stmt) - != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx))) + || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx))) return false; - /* We found a match. Get the corresponding popcount builtin. */ + /* We found a match. */ tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx); - if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node)) - fn = builtin_decl_implicit (BUILT_IN_POPCOUNT); - else if (TYPE_PRECISION (TREE_TYPE (src)) - == TYPE_PRECISION (long_integer_type_node)) - fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL); - else if (TYPE_PRECISION (TREE_TYPE (src)) - == TYPE_PRECISION (long_long_integer_type_node) - || (TYPE_PRECISION (TREE_TYPE (src)) - == 2 * TYPE_PRECISION (long_long_integer_type_node))) - fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL); + int src_precision = TYPE_PRECISION (TREE_TYPE (src)); - if (!fn) + /* Get the corresponding popcount builtin. */ + tree expr = build_popcount_expr (src); + + if (!expr) return false; - /* Update NITER params accordingly */ - tree utype = unsigned_type_for (TREE_TYPE (src)); - src = fold_convert (utype, src); - if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node)) - src = fold_convert (unsigned_type_node, src); - tree call; - if (TYPE_PRECISION (TREE_TYPE (src)) - == 2 * TYPE_PRECISION (long_long_integer_type_node)) + max = src_precision; + + tree may_be_zero = boolean_false_node; + + if (modify_before_test) { - int prec = TYPE_PRECISION (long_long_integer_type_node); - tree src1 = fold_convert (long_long_unsigned_type_node, - fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), - unshare_expr (src), - build_int_cst (integer_type_node, - prec))); - tree src2 = fold_convert (long_long_unsigned_type_node, src); - call = build_call_expr (fn, 1, src1); - call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call, - build_call_expr (fn, 1, src2)); - call = fold_convert (utype, call); + expr = fold_build2 (MINUS_EXPR, integer_type_node, expr, + integer_one_node); + max = max - 1; + may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, + build_zero_cst (TREE_TYPE (src))); } - else - call = fold_convert (utype, build_call_expr (fn, 1, src)); - if (adjust) - iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1)); - else - iter = call; - if (TREE_CODE (call) == INTEGER_CST) - max = tree_to_uhwi (call); - else - max = TYPE_PRECISION (TREE_TYPE (src)); - if (adjust) - max = max - 1; + expr = fold_convert (unsigned_type_node, expr); - niter->niter = iter; niter->assumptions = boolean_true_node; + niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero); + niter->niter = simplify_using_initial_conditions(loop, expr); - if (adjust) - { - tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, - build_zero_cst (TREE_TYPE (src))); - niter->may_be_zero - = simplify_using_initial_conditions (loop, may_be_zero); - } + if (TREE_CODE (niter->niter) == INTEGER_CST) + niter->max = tree_to_uhwi (niter->niter); else - niter->may_be_zero = boolean_false_node; + niter->max = max; - niter->max = max; niter->bound = NULL_TREE; niter->cmp = ERROR_MARK; return true; } +/* See if LOOP contains a bit counting idiom. The idiom consists of two parts: + 1. A modification to the induction variabler;. + 2. A test to determine whether or not to exit the loop. + + These can come in either order - i.e.: + + <bb 3> + iv_1 = PHI <src(2), iv_2(4)> + if (test (iv_1)) + goto <bb 4> + else + goto <bb 5> + + <bb 4> + iv_2 = modify (iv_1) + goto <bb 3> + + OR + + <bb 3> + iv_1 = PHI <src(2), iv_2(4)> + iv_2 = modify (iv_1) + + <bb 4> + if (test (iv_2)) + goto <bb 3> + else + goto <bb 5> + + The second form can be generated by copying the loop header out of the loop. + + In the first case, the number of latch executions will be equal to the + number of induction variable modifications required before the test fails. + + In the second case (modify_before_test), if we assume that the number of + modifications required before the test fails is nonzero, then the number of + latch executions will be one less than this number. + + If we recognise the pattern, then we update niter accordingly, and return + true. */ + +static bool +number_of_iterations_bitcount (loop_p loop, edge exit, + enum tree_code code, + class tree_niter_desc *niter) +{ + return number_of_iterations_popcount (loop, exit, code, niter); +} + /* Substitute NEW_TREE for OLD in EXPR and fold the result. If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead all SSA names are replaced with the result of calling the VALUEIZE @@ -2758,7 +2785,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit, tree iv0_niters = NULL_TREE; if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), op0, &iv0, safe ? &iv0_niters : NULL, false)) - return number_of_iterations_popcount (loop, exit, code, niter); + return number_of_iterations_bitcount (loop, exit, code, niter); tree iv1_niters = NULL_TREE; if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), op1, &iv1, safe ? &iv1_niters : NULL, false))