diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c new file mode 100644 index 0000000000000000000000000000000000000000..650a3bfbfb1dd44afc2d58bbe85f75f1d28b9bd0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_float } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo3 (float *restrict a, int *restrict c) +{ +#pragma GCC unroll 8 + for (int i = 0; i < 8; i++) + c[i] = a[i] > 1.0; +} + +/* { dg-final { scan-tree-dump "vectorized using SLP" "slp1" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c new file mode 100644 index 0000000000000000000000000000000000000000..37d60fa76351c13980427751be4450c14617a9a9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +#include <stdbool.h> + +void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride) +{ + if (stride <= 1) + return; + + bool ai = a[0]; + + for (int i = 0; i < n; i++) + { + int res = c[i]; + int t = b[i+stride]; + if (ai) + t = res; + c[i] = t; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c new file mode 100644 index 0000000000000000000000000000000000000000..5e1aedf3726b073c132bb64a9b474592ceb8e9b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_masked_store } */ + +/* { dg-additional-options "-mavx2" { target avx2 } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +void foo3 (unsigned long long *restrict a, int *restrict b, int *restrict c, int n, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < n; i++) + { + int res = c[i]; + int t = b[i+stride]; + if (a[i]) + t = res; + c[i] = t; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0f54ebe7dc48d173a620c91b61304db23c271a35..5cd4bdb32e040d83d4ceadaaf94187823f6cf070 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -12356,6 +12356,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); } + /* Generate the loop invariant statements. */ + if (!gimple_seq_empty_p (LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "------>generating loop invariant statements\n"); + gimple_stmt_iterator gsi; + gsi = gsi_after_labels (loop_preheader_edge (loop)->src); + gsi_insert_seq_before (&gsi, LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo), + GSI_CONTINUE_LINKING); + } + /* FORNOW: the vectorizer supports only loops which body consist of one basic block (header + empty latch). When the vectorizer will support more involved loop forms, the order by which the BBs are diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index a2bf90a575ae84c2ab25dc5a066f0cdb660d7e65..e7e877dd2adb55262822f1660f8d92b42d44e6d0 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -183,6 +183,17 @@ append_pattern_def_seq (vec_info *vinfo, new_stmt); } + +/* Add NEW_STMT to VINFO's invariant pattern definition statements. These + statements are not vectorized but are materialized as scalar in the loop + preheader. */ + +static inline void +append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt) +{ + gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt); +} + /* The caller wants to perform new operations on vect_external variable VAR, so that the result of the operations would also be vect_external. Return the edge on which the operations can be performed, if one exists. @@ -6055,12 +6066,34 @@ vect_recog_bool_pattern (vec_info *vinfo, var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo); else if (integer_type_for_mask (var, vinfo)) return NULL; + else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE + && !vect_get_internal_def (vinfo, var)) + { + /* If the condition is already a boolean then manually convert it to a + mask of the given integer type but don't set a vectype. */ + tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL); + pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var, + build_all_ones_cst (type), + build_zero_cst (type)); + append_inv_pattern_def_seq (vinfo, pattern_stmt); + var = lhs_ivar; + } + + tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL); + pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var, + build_zero_cst (TREE_TYPE (var))); + + tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var)); + if (!new_vectype) + return NULL; + + new_vectype = truth_type_for (new_vectype); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype, + TREE_TYPE (var)); lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); pattern_stmt - = gimple_build_assign (lhs, COND_EXPR, - build2 (NE_EXPR, boolean_type_node, - var, build_int_cst (TREE_TYPE (var), 0)), + = gimple_build_assign (lhs, COND_EXPR, lhs_var, gimple_assign_rhs2 (last_stmt), gimple_assign_rhs3 (last_stmt)); *type_out = vectype; diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9c817de18bd14e16653882c07c3cdb200206b026..600987dd6e5d506aa5fbb02350f9dab77793d382 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -9159,6 +9159,20 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, vect_location = saved_vect_location; } + + + /* Generate the invariant statements. */ + if (!gimple_seq_empty_p (bb_vinfo->inv_pattern_def_seq)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "------>generating invariant statements\n"); + + gimple_stmt_iterator gsi; + gsi = gsi_after_labels (bb_vinfo->bbs[0]); + gsi_insert_seq_after (&gsi, bb_vinfo->inv_pattern_def_seq, + GSI_CONTINUE_LINKING); + } } else { diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 45003f762ddfd74e82c068ef5a77ce6e3963b196..b72b54d666879d8485f8d972b4e8d9dc64bc86b3 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -12736,11 +12736,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, /* Invariant comparison. */ if (!vectype) { - if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) - vectype = mask_type; - else - vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), - slp_node); + vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node); if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) return false; } diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc index 4279b6db4cfccda759c3dc9c630f999b7e697fc7..d4ab47349a3a80181710698274acfcf80963231f 100644 --- a/gcc/tree-vectorizer.cc +++ b/gcc/tree-vectorizer.cc @@ -466,7 +466,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_) shared (shared_), stmt_vec_info_ro (false), bbs (NULL), - nbbs (0) + nbbs (0), + inv_pattern_def_seq (NULL) { stmt_vec_infos.create (50); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 53105f9292f78c922925813bca9c5af5263fb4e0..490061aea2f6d465d9589eb97bbd34a920d76b1c 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -512,6 +512,12 @@ public: /* The count of the basic blocks in the vectorization region. */ unsigned int nbbs; + /* Used to keep a sequence of def stmts of a pattern stmt that are loop + invariant if they exists. + The sequence is emitted in the loop preheader should the loop be vectorized + and are reset when undoing patterns. */ + gimple_seq inv_pattern_def_seq; + private: stmt_vec_info new_stmt_vec_info (gimple *stmt); void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true); @@ -1042,6 +1048,7 @@ public: #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor +#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq #define LOOP_VINFO_FULLY_MASKED_P(L) \ (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \