From 4150bcd205ebb60b949224758c05012c0dfab7a7 Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Sun, 22 Sep 2024 13:38:49 +0100
Subject: [PATCH] middle-end: lower COND_EXPR into gimple form in
 vect_recog_bool_pattern

Currently the vectorizer cheats when lowering COND_EXPR during bool recog.
In the cases where the conditonal is loop invariant or non-boolean it instead
converts the operation back into GENERIC and hides much of the operation from
the analysis part of the vectorizer.

i.e.

  a ? b : c

is transformed into:

  a != 0 ? b : c

however by doing so we can't perform any optimization on the mask as they aren't
explicit until quite late during codegen.

To fix this this patch lowers booleans earlier and so ensures that we are always
in GIMPLE.

For when the value is a loop invariant boolean we have to generate an additional
conversion from bool to the integer mask form.

This is done by creating a loop invariant a ? -1 : 0 with the target mask
precision and then doing a normal != 0 comparison on that.

To support this the patch also adds the ability to during pattern matching
create a loop invariant pattern that won't be seen by the vectorizer and will
instead me materialized inside the loop preheader in the case of loops, or in
the case of BB vectorization it materializes it in the first BB in the region.

gcc/ChangeLog:

	* tree-vect-patterns.cc (append_inv_pattern_def_seq): New.
	(vect_recog_bool_pattern): Lower COND_EXPRs.
	* tree-vect-slp.cc (vect_slp_region): Materialize loop invariant
	statements.
	* tree-vect-loop.cc (vect_transform_loop): Likewise.
	* tree-vect-stmts.cc (vectorizable_comparison_1): Remove
	VECT_SCALAR_BOOLEAN_TYPE_P handling for vectype.
	* tree-vectorizer.cc (vec_info::vec_info): Initialize
	inv_pattern_def_seq.
	* tree-vectorizer.h (LOOP_VINFO_INV_PATTERN_DEF_SEQ): New.
	(class vec_info): Add inv_pattern_def_seq.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-conditional_store_1.c: New test.
	* gcc.dg/vect/vect-conditional_store_5.c: New test.
	* gcc.dg/vect/vect-conditional_store_6.c: New test.
---
 .../gcc.dg/vect/bb-slp-conditional_store_1.c  | 15 +++++++
 .../gcc.dg/vect/vect-conditional_store_5.c    | 28 +++++++++++++
 .../gcc.dg/vect/vect-conditional_store_6.c    | 24 ++++++++++++
 gcc/tree-vect-loop.cc                         | 12 ++++++
 gcc/tree-vect-patterns.cc                     | 39 +++++++++++++++++--
 gcc/tree-vect-slp.cc                          | 14 +++++++
 gcc/tree-vect-stmts.cc                        |  6 +--
 gcc/tree-vectorizer.cc                        |  3 +-
 gcc/tree-vectorizer.h                         |  7 ++++
 9 files changed, 139 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
new file mode 100644
index 000000000000..650a3bfbfb1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_float } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (float *restrict a, int *restrict c)
+{
+#pragma GCC unroll 8
+  for (int i = 0; i < 8; i++)
+    c[i] = a[i] > 1.0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized using SLP" "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
new file mode 100644
index 000000000000..37d60fa76351
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+#include <stdbool.h>
+
+void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  bool ai = a[0];
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (ai)
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c
new file mode 100644
index 000000000000..5e1aedf3726b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_masked_store } */
+
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
+
+void foo3 (unsigned long long *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+  if (stride <= 1)
+    return;
+
+  for (int i = 0; i < n; i++)
+    {
+      int res = c[i];
+      int t = b[i+stride];
+      if (a[i])
+        t = res;
+      c[i] = t;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0f54ebe7dc48..5cd4bdb32e04 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -12356,6 +12356,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
       vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
     }
 
+  /* Generate the loop invariant statements.  */
+  if (!gimple_seq_empty_p (LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo)))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "------>generating loop invariant statements\n");
+      gimple_stmt_iterator gsi;
+      gsi = gsi_after_labels (loop_preheader_edge (loop)->src);
+      gsi_insert_seq_before (&gsi, LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo),
+			     GSI_CONTINUE_LINKING);
+    }
+
   /* FORNOW: the vectorizer supports only loops which body consist
      of one basic block (header + empty latch). When the vectorizer will
      support more involved loop forms, the order by which the BBs are
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a2bf90a575ae..e7e877dd2adb 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -183,6 +183,17 @@ append_pattern_def_seq (vec_info *vinfo,
 				      new_stmt);
 }
 
+
+/* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
+   statements are not vectorized but are materialized as scalar in the loop
+   preheader.  */
+
+static inline void
+append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
+{
+  gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
+}
+
 /* The caller wants to perform new operations on vect_external variable
    VAR, so that the result of the operations would also be vect_external.
    Return the edge on which the operations can be performed, if one exists.
@@ -6055,12 +6066,34 @@ vect_recog_bool_pattern (vec_info *vinfo,
 	var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
       else if (integer_type_for_mask (var, vinfo))
 	return NULL;
+      else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
+	       && !vect_get_internal_def (vinfo, var))
+	{
+	  /* If the condition is already a boolean then manually convert it to a
+	     mask of the given integer type but don't set a vectype.  */
+	  tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
+	  pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
+					      build_all_ones_cst (type),
+					      build_zero_cst (type));
+	  append_inv_pattern_def_seq (vinfo, pattern_stmt);
+	  var = lhs_ivar;
+	}
+
+      tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+      pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
+					  build_zero_cst (TREE_TYPE (var)));
+
+      tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
+      if (!new_vectype)
+	return NULL;
+
+      new_vectype = truth_type_for (new_vectype);
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
+			      TREE_TYPE (var));
 
       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
       pattern_stmt 
-	= gimple_build_assign (lhs, COND_EXPR,
-			       build2 (NE_EXPR, boolean_type_node,
-				       var, build_int_cst (TREE_TYPE (var), 0)),
+	= gimple_build_assign (lhs, COND_EXPR, lhs_var,
 			       gimple_assign_rhs2 (last_stmt),
 			       gimple_assign_rhs3 (last_stmt));
       *type_out = vectype;
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9c817de18bd1..600987dd6e5d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -9159,6 +9159,20 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
 
 	      vect_location = saved_vect_location;
 	    }
+
+
+	  /* Generate the invariant statements.  */
+	  if (!gimple_seq_empty_p (bb_vinfo->inv_pattern_def_seq))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+			 "------>generating invariant statements\n");
+
+	      gimple_stmt_iterator gsi;
+	      gsi = gsi_after_labels (bb_vinfo->bbs[0]);
+	      gsi_insert_seq_after (&gsi, bb_vinfo->inv_pattern_def_seq,
+				    GSI_CONTINUE_LINKING);
+	    }
 	}
       else
 	{
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 45003f762ddf..b72b54d66687 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12736,11 +12736,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
   /* Invariant comparison.  */
   if (!vectype)
     {
-      if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
-	vectype = mask_type;
-      else
-	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
-					       slp_node);
+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
 	return false;
     }
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
index 4279b6db4cfc..d4ab47349a3a 100644
--- a/gcc/tree-vectorizer.cc
+++ b/gcc/tree-vectorizer.cc
@@ -466,7 +466,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
     shared (shared_),
     stmt_vec_info_ro (false),
     bbs (NULL),
-    nbbs (0)
+    nbbs (0),
+    inv_pattern_def_seq (NULL)
 {
   stmt_vec_infos.create (50);
 }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 53105f9292f7..490061aea2f6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -512,6 +512,12 @@ public:
   /* The count of the basic blocks in the vectorization region.  */
   unsigned int nbbs;
 
+  /* Used to keep a sequence of def stmts of a pattern stmt that are loop
+    invariant if they exists.
+    The sequence is emitted in the loop preheader should the loop be vectorized
+    and are reset when undoing patterns.  */
+  gimple_seq inv_pattern_def_seq;
+
 private:
   stmt_vec_info new_stmt_vec_info (gimple *stmt);
   void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
@@ -1042,6 +1048,7 @@ public:
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
+#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L)  (L)->inv_pattern_def_seq
 
 #define LOOP_VINFO_FULLY_MASKED_P(L)		\
   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)	\
-- 
GitLab