Skip to content
Snippets Groups Projects
Commit 83869ff4 authored by Juzhe-Zhong's avatar Juzhe-Zhong Committed by Pan Li
Browse files

RISC-V: Refine LMUL computation for MASK_LEN_LOAD/MASK_LEN_STORE IFN

Notice a case has "Maximum lmul = 16" which is incorrect.
Correct LMUL estimation for MASK_LEN_LOAD/MASK_LEN_STORE.

Committed.

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (variable_vectorized_p): New function.
	(compute_nregs_for_mode): Refine LMUL.
	(max_number_of_live_regs): Ditto.
	(compute_estimated_lmul): Ditto.
	(has_unexpected_spills_p): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c: New test.
parent 49b2387b
No related branches found
No related tags found
No related merge requests found
......@@ -230,6 +230,42 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
return mode1_size >= mode2_size ? mode1 : mode2;
}
/* Return true if the variable should be counted into liveness. */
static bool
variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
{
if (!var)
return false;
gimple *stmt = STMT_VINFO_STMT (stmt_info);
enum stmt_vec_info_type type
= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
{
if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE
|| gimple_call_internal_fn (stmt) == IFN_MASK_LOAD)
{
/* .MASK_LOAD (_5, 32B, _33)
^ ^ ^
Only the 3rd argument will be vectorized and consume
a vector register. */
if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
|| (is_gimple_reg (var) && !POINTER_TYPE_P (TREE_TYPE (var))))
return true;
else
return false;
}
}
if (lhs_p)
return is_gimple_reg (var)
&& (!POINTER_TYPE_P (TREE_TYPE (var))
|| type != store_vec_info_type);
else
return poly_int_tree_p (var)
|| (is_gimple_val (var)
&& (!POINTER_TYPE_P (TREE_TYPE (var))
|| type != load_vec_info_type));
}
/* Compute local live ranges of each vectorized variable.
Note that we only compute local live ranges (within a block) since
local live ranges information is accurate enough for us to determine
......@@ -277,13 +313,8 @@ compute_local_live_ranges (
{
unsigned int point = program_point.point;
gimple *stmt = program_point.stmt;
stmt_vec_info stmt_info = program_point.stmt_info;
tree lhs = gimple_get_lhs (stmt);
enum stmt_vec_info_type type
= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
if (lhs != NULL_TREE && is_gimple_reg (lhs)
&& (!POINTER_TYPE_P (TREE_TYPE (lhs))
|| type != store_vec_info_type))
if (variable_vectorized_p (program_point.stmt_info, lhs, true))
{
biggest_mode = get_biggest_mode (biggest_mode,
TYPE_MODE (TREE_TYPE (lhs)));
......@@ -307,10 +338,8 @@ compute_local_live_ranges (
TODO: We may elide the cases that the unnecessary IMM in
the future. */
if (poly_int_tree_p (var)
|| (is_gimple_val (var)
&& (!POINTER_TYPE_P (TREE_TYPE (var))
|| type != load_vec_info_type)))
if (variable_vectorized_p (program_point.stmt_info, var,
false))
{
biggest_mode
= get_biggest_mode (biggest_mode,
......@@ -383,7 +412,9 @@ compute_nregs_for_mode (loop_vec_info loop_vinfo, machine_mode mode,
unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant ();
gcc_assert (biggest_size >= mode_size);
unsigned int ratio = biggest_size / mode_size;
return MAX (lmul / ratio, 1) * rgroup_size;
/* RVV mask bool modes always consume 1 vector register regardless LMUL. */
unsigned int nregs = mode == BImode ? 1 : lmul / ratio;
return MAX (nregs, 1) * rgroup_size;
}
/* This function helps to determine whether current LMUL will cause
......@@ -414,7 +445,9 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const basic_block bb,
pair live_range = (*iter).second;
for (i = live_range.first + 1; i <= live_range.second; i++)
{
machine_mode mode = TYPE_MODE (TREE_TYPE (var));
machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
? BImode
: TYPE_MODE (TREE_TYPE (var));
unsigned int nregs
= compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul);
live_vars_vec[i] += nregs;
......@@ -508,8 +541,12 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
{
int estimated_vf = vect_vf_for_cost (loop_vinfo);
return estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
/ TARGET_MIN_VLEN;
int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
/ TARGET_MIN_VLEN;
if (estimated_lmul > RVV_M8)
return regno_alignment;
else
return estimated_lmul;
}
else
{
......@@ -733,6 +770,7 @@ has_unexpected_spills_p (loop_vec_info loop_vinfo)
live_ranges_per_bb, &biggest_mode);
int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
gcc_assert (lmul <= RVV_M8);
/* TODO: We calculate the maximum live vars base on current STMTS
sequence. We can support live range shrink if it can give us
big improvement in the future. */
......
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
void
foo (int *__restrict a, int *__restrict b, int *__restrict c, int *__restrict d,
int *__restrict e, int *__restrict f, int n)
{
for (int i = 0; i < n; i++)
a[i] = b[i] != f[i] ? c[i] * d[i] : e[i];
}
/* { dg-final { scan-assembler {e32,m4} } } */
/* { dg-final { scan-assembler-not {jr} } } */
/* { dg-final { scan-assembler-times {ret} 1 } } */
/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
/* { dg-final { scan-tree-dump-not "Maximum lmul = 16" "vect" } } */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment