From 70c3db511ba14ff5fa68cb41d0714a9fb957ea5d Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao.liu@intel.com> Date: Mon, 25 Mar 2024 21:28:14 -0700 Subject: [PATCH] Enable vectorization for unknown tripcount in very cheap cost model but disable epilog vectorization. gcc/ChangeLog: * tree-vect-loop.cc (vect_analyze_loop_costing): Enable vectorization for LOOP_VINFO_PEELING_FOR_NITER in very cheap cost model. (vect_analyze_loop): Disable epilogue vectorization in very cheap cost model. * doc/invoke.texi: Adjust documents for very-cheap cost model. --- gcc/doc/invoke.texi | 11 ++++------- gcc/tree-vect-loop.cc | 6 +++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c0c8bf1c29a9..12477e6f9df3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14315,13 +14315,10 @@ counts that will likely execute faster than when executing the original scalar loop. The @samp{cheap} model disables vectorization of loops where doing so would be cost prohibitive for example due to required runtime checks for data dependence or alignment but otherwise -is equal to the @samp{dynamic} model. The @samp{very-cheap} model only -allows vectorization if the vector code would entirely replace the -scalar code that is being vectorized. For example, if each iteration -of a vectorized loop would only be able to handle exactly four iterations -of the scalar loop, the @samp{very-cheap} model would only allow -vectorization if the scalar iteration count is known to be a multiple -of four. +is equal to the @samp{dynamic} model. The @samp{very-cheap} model disables +vectorization of loops when any runtime check for data dependence or alignment +is required, it also disables vectorization of epilogue loops but otherwise is +equal to the @samp{cheap} model. The default cost model depends on other optimization flags and is either @samp{dynamic} or @samp{cheap}. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 9be50aaa621c..ade72a5124f7 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2375,8 +2375,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo, a copy of the scalar code (even if we might be able to vectorize it). */ if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) - || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3683,7 +3682,8 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, /* No code motion support for multiple epilogues so for now not supported when multiple exits. */ && !LOOP_VINFO_EARLY_BREAKS (first_loop_vinfo) - && !loop->simduid); + && !loop->simduid + && loop_cost_model (loop) > VECT_COST_MODEL_VERY_CHEAP); if (!vect_epilogues) return first_loop_vinfo; -- GitLab