From 1b4dbccc1f828fa00e6acc8b88d24301c65552df Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 26 Sep 2019 16:52:50 +0000
Subject: [PATCH] tree-vect-loop.c (vect_analyze_loop_operations): Analyze
 loop-closed PHIs that are vect_internal_def.

2019-09-26  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_analyze_loop_operations): Analyze
	loop-closed PHIs that are vect_internal_def.
	(vect_create_epilog_for_reduction): Exit early for nested cycles.
	Simplify.
	(vectorizable_lc_phi): New.
	* tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi.
	(vect_transform_stmt): Likewise.
	* tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type.
	(vectorizable_lc_phi): Declare.

From-SVN: r276157
---
 gcc/ChangeLog         | 12 ++++++
 gcc/tree-vect-loop.c  | 98 ++++++++++++++++++++++++++++++++++++-------
 gcc/tree-vect-stmts.c |  8 +++-
 gcc/tree-vectorizer.h |  2 +
 4 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 19892af07c06..66d7d86ba2f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2019-09-26  Richard Biener  <rguenther@suse.de>
+
+	* tree-vect-loop.c (vect_analyze_loop_operations): Analyze
+	loop-closed PHIs that are vect_internal_def.
+	(vect_create_epilog_for_reduction): Exit early for nested cycles.
+	Simplify.
+	(vectorizable_lc_phi): New.
+	* tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi.
+	(vect_transform_stmt): Likewise.
+	* tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type.
+	(vectorizable_lc_phi): Declare.
+
 2019-09-26  Richard Biener  <rguenther@suse.de>
 
 	* tree-vect-loop.c (vect_analyze_loop_operations): Also call
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 1a561f9d16f2..237d28b3ce3d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1519,12 +1519,16 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
                   phi_op = PHI_ARG_DEF (phi, 0);
 		  stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
 		  if (!op_def_info)
-		    return opt_result::failure_at (phi, "unsupported phi");
+		    return opt_result::failure_at (phi, "unsupported phi\n");
 
 		  if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
 		      && (STMT_VINFO_RELEVANT (op_def_info)
 			  != vect_used_in_outer_by_reduction))
-		    return opt_result::failure_at (phi, "unsupported phi");
+		    return opt_result::failure_at (phi, "unsupported phi\n");
+
+		  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+		      && !vectorizable_lc_phi (stmt_info, NULL, NULL))
+		    return opt_result::failure_at (phi, "unsupported phi\n");
                 }
 
               continue;
@@ -4396,6 +4400,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
         }
     }
 
+  /* For vectorizing nested cycles the above is all we need to do.  */
+  if (nested_in_vect_loop && !double_reduc)
+    return;
+
   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
      which is updated with the current index of the loop for every match of
      the original loop's cond_expr (VEC_STMT).  This results in a vector
@@ -4588,16 +4596,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
   bitsize = TYPE_SIZE (scalar_type);
 
-  /* In case this is a reduction in an inner-loop while vectorizing an outer
-     loop - we don't need to extract a single scalar result at the end of the
-     inner-loop (unless it is double reduction, i.e., the use of reduction is
-     outside the outer-loop).  The final vector of partial results will be used
-     in the vectorized outer-loop, or reduced to a scalar result at the end of
-     the outer-loop.  */
-  if (nested_in_vect_loop && !double_reduc)
-    ;
-  else
-    {
   /* SLP reduction without reduction chain, e.g.,
      # a1 = phi <a2, a0>
      # b1 = phi <b2, b0>
@@ -5313,7 +5311,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
 
       new_phis[0] = epilog_stmt;
     }
-    }
 
   if (double_reduc)
     loop = loop->inner;
@@ -5473,7 +5470,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
           if (double_reduc)
             loop = outer_loop;
           else
-            continue;
+	    gcc_unreachable ();
         }
 
       phis.create (3);
@@ -7167,6 +7164,76 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Vectorizes LC PHIs of nested cycles (sofar).  */
+
+bool
+vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
+		     slp_tree slp_node)
+{
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  if (!loop_vinfo
+      || !is_a <gphi *> (stmt_info->stmt)
+      || gimple_phi_num_args (stmt_info->stmt) != 1)
+    return false;
+
+  /* To handle the nested_cycle_def for double-reductions we have to
+     refactor epilogue generation more.  */
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+      /* && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def */)
+    return false;
+
+  if (!vec_stmt) /* transformation not required.  */
+    {
+      STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
+      return true;
+    }
+
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+  basic_block bb = gimple_bb (stmt_info->stmt);
+  edge e = single_pred_edge (bb);
+  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  vec<tree> vec_oprnds = vNULL;
+  vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
+		     stmt_info, &vec_oprnds, NULL, slp_node);
+  if (slp_node)
+    {
+      unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+      gcc_assert (vec_oprnds.length () == vec_num);
+      for (unsigned i = 0; i < vec_num; i++)
+	{
+	  /* Create the vectorized LC PHI node.  */
+	  gphi *new_phi = create_phi_node (vec_dest, bb);
+	  add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
+	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
+	  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
+	}
+    }
+  else
+    {
+      unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      stmt_vec_info prev_phi_info = NULL;
+      for (unsigned i = 0; i < ncopies; i++)
+	{
+	  if (i != 0)
+	    vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
+	  /* Create the vectorized LC PHI node.  */
+	  gphi *new_phi = create_phi_node (vec_dest, bb);
+	  add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
+	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
+	  if (i == 0)
+	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
+	  else
+	    STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
+	  prev_phi_info = new_phi_info;
+	}
+    }
+  vec_oprnds.release ();
+
+  return true;
+}
+
+
 /* Function vect_min_worthwhile_factor.
 
    For a loop where we could vectorize the operation indicated by CODE,
@@ -8399,7 +8466,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
-	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
 	      && ! PURE_SLP_STMT (stmt_info))
 	    {
 	      if (dump_enabled_p ())
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b1e97f85d96b..5734068eabc1 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10671,7 +10671,8 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
 	  || vectorizable_condition (stmt_info, NULL, NULL, false, -1, node,
 				     cost_vec)
 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
-				      cost_vec));
+				      cost_vec)
+	  || vectorizable_lc_phi (stmt_info, NULL, node));
   else
     {
       if (bb_vinfo)
@@ -10820,6 +10821,11 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       gcc_assert (done);
       break;
 
+    case lc_phi_info_type:
+      done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
+      gcc_assert (done);
+      break;
+
     default:
       if (!STMT_VINFO_LIVE_P (stmt_info))
 	{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f140405bbd62..1ab4af7236f9 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -694,6 +694,7 @@ enum stmt_vec_info_type {
   type_promotion_vec_info_type,
   type_demotion_vec_info_type,
   type_conversion_vec_info_type,
+  lc_phi_info_type,
   loop_exit_ctrl_vec_info_type
 };
 
@@ -1653,6 +1654,7 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
 extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
 				    stmt_vec_info *, slp_tree,
 				    stmt_vector_for_cost *);
+extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
 					stmt_vector_for_cost *,
-- 
GitLab