diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c b/gcc/testsuite/gcc.target/aarch64/pr110449.c index bb3b6dcfe08d3bd2a763990d1d7458036383eed7..51ca3f4b816c4dd931a18324d26ecb11f5382fd9 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr110449.c +++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c @@ -1,8 +1,10 @@ /* { dg-do compile } */ /* { dg-options "-Ofast -mcpu=neoverse-n2 --param aarch64-vect-unroll-limit=2" } */ -/* { dg-final { scan-assembler-not "8.0e\\+0" } } */ +/* { dg-final { scan-assembler {, #?8.0e\+0} } } */ +/* { dg-final { scan-assembler-not {\tmov\tv} } } */ -/* Calcualte the vectorized induction with smaller step for an unrolled loop. +/* Insert the induction IV updates before the exit condition, rather than + at the start of the loop body. before (suggested_unroll_factor=2): fmov s30, 8.0e+0 @@ -19,15 +21,16 @@ bne .L6 after: - fmov s31, 4.0e+0 - dup v29.4s, v31.s[0] - .L6: - fadd v30.4s, v31.4s, v29.4s - stp q31, q30, [x0] - add x0, x0, 32 - fadd v31.4s, v29.4s, v30.4s - cmp x0, x1 - bne .L6 */ + fmov s31, 8.0e+0 + fmov s29, 4.0e+0 + dup v31.4s, v31.s[0] + dup v29.4s, v29.s[0] + .L2: + fadd v30.4s, v0.4s, v29.4s + stp q0, q30, [x0], 32 + fadd v0.4s, v0.4s, v31.4s + cmp x1, x0 + bne .L2 */ void foo2 (float *arr, float freq, float step) diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 6ceb9df370b25786c18af6289b6146fcb474109e..2907fa6532d831f0001fa1df9a7623c06849c91f 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -47,6 +47,39 @@ along with GCC; see the file COPYING3. If not see so that we can free them all at once. */ static bitmap_obstack loop_renamer_obstack; +/* Insert IV increment statements STMTS before or after INCR_POS; + AFTER selects which. INCR_POS and AFTER can be computed using + standard_iv_increment_position. */ + +void +insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after, + gimple_seq stmts) +{ + /* Prevent the increment from inheriting a bogus location if it is not put + immediately after a statement whose location is known. */ + if (after) + { + gimple_stmt_iterator gsi = *incr_pos; + if (!gsi_end_p (gsi)) + gsi_next_nondebug (&gsi); + if (gsi_end_p (gsi)) + { + edge e = single_succ_edge (gsi_bb (*incr_pos)); + gimple_seq_set_location (stmts, e->goto_locus); + } + gsi_insert_seq_after (incr_pos, stmts, GSI_NEW_STMT); + } + else + { + gimple_stmt_iterator gsi = *incr_pos; + if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi))) + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi)) + gimple_seq_set_location (stmts, gimple_location (gsi_stmt (gsi))); + gsi_insert_seq_before (incr_pos, stmts, GSI_NEW_STMT); + } +} + /* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP. If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration. If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration. @@ -63,7 +96,6 @@ create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop, gimple_stmt_iterator *incr_pos, bool after, tree *var_before, tree *var_after) { - gassign *stmt; gphi *phi; tree initial, step1; gimple_seq stmts; @@ -126,30 +158,10 @@ create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop, if (stmts) gsi_insert_seq_on_edge_immediate (pe, stmts); - stmt = gimple_build_assign (va, incr_op, vb, step); - /* Prevent the increment from inheriting a bogus location if it is not put - immediately after a statement whose location is known. */ - if (after) - { - gimple_stmt_iterator gsi = *incr_pos; - if (!gsi_end_p (gsi)) - gsi_next_nondebug (&gsi); - if (gsi_end_p (gsi)) - { - edge e = single_succ_edge (gsi_bb (*incr_pos)); - gimple_set_location (stmt, e->goto_locus); - } - gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT); - } - else - { - gimple_stmt_iterator gsi = *incr_pos; - if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi))) - gsi_next_nondebug (&gsi); - if (!gsi_end_p (gsi)) - gimple_set_location (stmt, gimple_location (gsi_stmt (gsi))); - gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT); - } + gimple_seq incr_stmts = nullptr; + gimple_seq_add_stmt (&incr_stmts, + gimple_build_assign (va, incr_op, vb, step)); + insert_iv_increment (incr_pos, after, incr_stmts); initial = force_gimple_operand (base, &stmts, true, var); if (stmts) diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h index b1f65e3c0717e64e0723f892f40d5bb499c30f81..80f680565c0bb1872d56e2089d7ed959a1d19c07 100644 --- a/gcc/tree-ssa-loop-manip.h +++ b/gcc/tree-ssa-loop-manip.h @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see typedef void (*transform_callback)(class loop *, void *); +extern void insert_iv_increment (gimple_stmt_iterator *, bool, gimple_seq); extern void create_iv (tree, tree_code, tree, tree, class loop *, gimple_stmt_iterator *, bool, tree *, tree *); extern void rewrite_into_loop_closed_ssa (bitmap, unsigned); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 03426207879fe506f7d31c61512c74fe27fb8281..eea0b89db69e280046821ac7f5704ff3e19bcdbe 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10580,6 +10580,10 @@ vectorizable_induction (loop_vec_info loop_vinfo, [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */ if (slp_node) { + gimple_stmt_iterator incr_si; + bool insert_after; + standard_iv_increment_position (iv_loop, &incr_si, &insert_after); + /* The initial values are vectorized, but any lanes > group_size need adjustment. */ slp_tree init_node @@ -10810,7 +10814,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up); vec_def = gimple_convert (&stmts, vectype, vec_def); - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + insert_iv_increment (&incr_si, insert_after, stmts); add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION);