From 1081f4cb34ea22e6ba07ddcb88cada3ec60bc9c4 Mon Sep 17 00:00:00 2001 From: Richard Biener <rguenther@suse.de> Date: Thu, 17 Oct 2024 10:27:58 +0200 Subject: [PATCH] tree-optimization/117172 - single lane SLP for non-linear inductions The following adds single-lane SLP support for vectorizing non-linear inductions. This fixes a bunch of i386 specific testcases with --param vect-force-slp=1. PR tree-optimization/117172 * tree-vect-loop.cc (vectorizable_nonlinear_induction): Add single-lane SLP support. --- gcc/tree-vect-loop.cc | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d1f1edc704c3..50a1531f4c3a 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10006,10 +10006,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, gcc_assert (induction_type > vect_step_op_add); - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + ncopies = vect_get_num_copies (loop_vinfo, slp_node, vectype); gcc_assert (ncopies >= 1); /* FORNOW. Only handle nonlinear induction in the same loop. */ @@ -10024,9 +10021,10 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, iv_loop = loop; gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); - /* TODO: Support slp for nonlinear iv. There should be separate vector iv - update for each iv and a permutation to generate wanted vector iv. */ - if (slp_node) + /* TODO: Support multi-lane SLP for nonlinear iv. There should be separate + vector iv update for each iv and a permutation to generate wanted + vector iv. */ + if (slp_node && SLP_TREE_LANES (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10237,8 +10235,13 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; + if (slp_node) + slp_node->push_vec_def (induction_phi); + else + { + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); + *vec_stmt = induction_phi; + } /* In case that vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -10268,7 +10271,10 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, induction_type); gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); new_stmt = SSA_NAME_DEF_STMT (vec_def); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + if (slp_node) + slp_node->push_vec_def (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); } } -- GitLab