From 83869ff4bcd694634fca969993af4c0dbc51e2bb Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong <juzhe.zhong@rivai.ai> Date: Thu, 4 Jan 2024 14:52:33 +0800 Subject: [PATCH] RISC-V: Refine LMUL computation for MASK_LEN_LOAD/MASK_LEN_STORE IFN Notice a case has "Maximum lmul = 16" which is incorrect. Correct LMUL estimation for MASK_LEN_LOAD/MASK_LEN_STORE. Committed. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (variable_vectorized_p): New function. (compute_nregs_for_mode): Refine LMUL. (max_number_of_live_regs): Ditto. (compute_estimated_lmul): Ditto. (has_unexpected_spills_p): Ditto. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c: New test. --- gcc/config/riscv/riscv-vector-costs.cc | 66 +++++++++++++++---- .../costmodel/riscv/rvv/dynamic-lmul4-11.c | 16 +++++ 2 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index b9fdfdc5e3a9..21f8a81c89cb 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -230,6 +230,42 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2) return mode1_size >= mode2_size ? mode1 : mode2; } +/* Return true if the variable should be counted into liveness. */ +static bool +variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p) +{ + if (!var) + return false; + gimple *stmt = STMT_VINFO_STMT (stmt_info); + enum stmt_vec_info_type type + = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); + if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) + { + if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE + || gimple_call_internal_fn (stmt) == IFN_MASK_LOAD) + { + /* .MASK_LOAD (_5, 32B, _33) + ^ ^ ^ + Only the 3rd argument will be vectorized and consume + a vector register. */ + if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE + || (is_gimple_reg (var) && !POINTER_TYPE_P (TREE_TYPE (var)))) + return true; + else + return false; + } + } + if (lhs_p) + return is_gimple_reg (var) + && (!POINTER_TYPE_P (TREE_TYPE (var)) + || type != store_vec_info_type); + else + return poly_int_tree_p (var) + || (is_gimple_val (var) + && (!POINTER_TYPE_P (TREE_TYPE (var)) + || type != load_vec_info_type)); +} + /* Compute local live ranges of each vectorized variable. Note that we only compute local live ranges (within a block) since local live ranges information is accurate enough for us to determine @@ -277,13 +313,8 @@ compute_local_live_ranges ( { unsigned int point = program_point.point; gimple *stmt = program_point.stmt; - stmt_vec_info stmt_info = program_point.stmt_info; tree lhs = gimple_get_lhs (stmt); - enum stmt_vec_info_type type - = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); - if (lhs != NULL_TREE && is_gimple_reg (lhs) - && (!POINTER_TYPE_P (TREE_TYPE (lhs)) - || type != store_vec_info_type)) + if (variable_vectorized_p (program_point.stmt_info, lhs, true)) { biggest_mode = get_biggest_mode (biggest_mode, TYPE_MODE (TREE_TYPE (lhs))); @@ -307,10 +338,8 @@ compute_local_live_ranges ( TODO: We may elide the cases that the unnecessary IMM in the future. */ - if (poly_int_tree_p (var) - || (is_gimple_val (var) - && (!POINTER_TYPE_P (TREE_TYPE (var)) - || type != load_vec_info_type))) + if (variable_vectorized_p (program_point.stmt_info, var, + false)) { biggest_mode = get_biggest_mode (biggest_mode, @@ -383,7 +412,9 @@ compute_nregs_for_mode (loop_vec_info loop_vinfo, machine_mode mode, unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant (); gcc_assert (biggest_size >= mode_size); unsigned int ratio = biggest_size / mode_size; - return MAX (lmul / ratio, 1) * rgroup_size; + /* RVV mask bool modes always consume 1 vector register regardless LMUL. */ + unsigned int nregs = mode == BImode ? 1 : lmul / ratio; + return MAX (nregs, 1) * rgroup_size; } /* This function helps to determine whether current LMUL will cause @@ -414,7 +445,9 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const basic_block bb, pair live_range = (*iter).second; for (i = live_range.first + 1; i <= live_range.second; i++) { - machine_mode mode = TYPE_MODE (TREE_TYPE (var)); + machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE + ? BImode + : TYPE_MODE (TREE_TYPE (var)); unsigned int nregs = compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul); live_vars_vec[i] += nregs; @@ -508,8 +541,12 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U)) { int estimated_vf = vect_vf_for_cost (loop_vinfo); - return estimated_vf * GET_MODE_BITSIZE (mode).to_constant () - / TARGET_MIN_VLEN; + int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant () + / TARGET_MIN_VLEN; + if (estimated_lmul > RVV_M8) + return regno_alignment; + else + return estimated_lmul; } else { @@ -733,6 +770,7 @@ has_unexpected_spills_p (loop_vec_info loop_vinfo) live_ranges_per_bb, &biggest_mode); int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode); + gcc_assert (lmul <= RVV_M8); /* TODO: We calculate the maximum live vars base on current STMTS sequence. We can support live range shrink if it can give us big improvement in the future. */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c new file mode 100644 index 000000000000..48b24279b553 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-11.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c, int *__restrict d, + int *__restrict e, int *__restrict f, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i] != f[i] ? c[i] * d[i] : e[i]; +} + +/* { dg-final { scan-assembler {e32,m4} } } */ +/* { dg-final { scan-assembler-not {jr} } } */ +/* { dg-final { scan-assembler-times {ret} 1 } } */ +/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 16" "vect" } } */ -- GitLab