From 330782a1b6cfe881ad884617ffab441aeb1c2b5c Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao.liu@intel.com> Date: Mon, 14 Oct 2024 17:16:13 +0800 Subject: [PATCH] Canonicalize (vec_merge (fma op2 op1 op3) op1 mask) to (vec_merge (fma op1 op2 op3) op1 mask). For x86 masked fma, there're 2 rtl representations 1) (vec_merge (fma op2 op1 op3) op1 mask) 2) (vec_merge (fma op1 op2 op3) op1 mask). 5894(define_insn "<avx512>_fmadd_<mode>_mask<round_name>" 5895 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") 5896 (vec_merge:VFH_AVX512VL 5897 (fma:VFH_AVX512VL 5898 (match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0") 5899 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") 5900 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")) 5901 (match_dup 1) 5902 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] 5903 "TARGET_AVX512F && <round_mode_condition>" 5904 "@ 5905 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} 5906 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" 5907 [(set_attr "type" "ssemuladd") 5908 (set_attr "prefix" "evex") 5909 (set_attr "mode" "<MODE>")]) Here op1 has constraint "0", and the scecond op1 is (match_dup 1), we once tried to replace it with (match_operand:M 5 "nonimmediate_operand" "0")) to enable more flexibility for pattern match and recog, but it triggered an ICE in reload(reload can handle at most one perand with "0" constraint). So we need either add 2 patterns in the backend or just do the canonicalization in the middle-end. gcc/ChangeLog: PR middle-end/117072 * combine.cc (maybe_swap_commutative_operands): Canonicalize (vec_merge (fma op2 op1 op3) op1 mask) to (vec_merge (fma op1 op2 op3) op1 mask). --- gcc/combine.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gcc/combine.cc b/gcc/combine.cc index fef06a6cdc08..3400dfebd848 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -5656,6 +5656,31 @@ maybe_swap_commutative_operands (rtx x) SUBST (XEXP (x, 1), temp); } + /* Canonicalize (vec_merge (fma op2 op1 op3) op1 mask) to + (vec_merge (fma op1 op2 op3) op1 mask). */ + if (GET_CODE (x) == VEC_MERGE + && GET_CODE (XEXP (x, 0)) == FMA) + { + rtx fma_op1 = XEXP (XEXP (x, 0), 0); + rtx fma_op2 = XEXP (XEXP (x, 0), 1); + rtx masked_op = XEXP (x, 1); + if (rtx_equal_p (masked_op, fma_op2)) + { + if (GET_CODE (fma_op1) == NEG) + { + /* Keep the negate canonicalized to the first operand. */ + fma_op1 = XEXP (fma_op1, 0); + SUBST (XEXP (XEXP (XEXP (x, 0), 0), 0), fma_op2); + SUBST (XEXP (XEXP (x, 0), 1), fma_op1); + } + else + { + SUBST (XEXP (XEXP (x, 0), 0), fma_op2); + SUBST (XEXP (XEXP (x, 0), 1), fma_op1); + } + } + } + unsigned n_elts = 0; if (GET_CODE (x) == VEC_MERGE && CONST_INT_P (XEXP (x, 2)) -- GitLab