From 3b1decef83003db9cf8667977c293435c0f3d024 Mon Sep 17 00:00:00 2001 From: "Hu, Lin1" <lin1.hu@intel.com> Date: Mon, 2 Sep 2024 10:24:36 +0800 Subject: [PATCH] i386: Optimize generate insn for AVX10.2 compare gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_fp_compare): Add UNSPEC to support the optimization. * config/i386/i386.cc (ix86_fp_compare_code_to_integer): Add NE/EQ. * config/i386/i386.md (*cmpx<unord><MODEF:mode>): New define_insn. (*cmpx<unord>hf): Ditto. * config/i386/predicates.md (ix86_trivial_fp_comparison_operator): Add ne/eq. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-compare-1b.c: New test. --- gcc/config/i386/i386-expand.cc | 5 + gcc/config/i386/i386.cc | 5 + gcc/config/i386/i386.md | 31 +++++- gcc/config/i386/predicates.md | 12 +++ .../gcc.target/i386/avx10_2-compare-1b.c | 96 +++++++++++++++++++ 5 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-compare-1b.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index d692008ffe7e..53327544620f 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2916,6 +2916,11 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) switch (ix86_fp_comparison_strategy (code)) { case IX86_FPCMP_COMI: + tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); + if (TARGET_AVX10_2_256 && (code == EQ || code == NE)) + tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_OPTCOMX); + if (unordered_compare) + tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); cmp_mode = CCFPmode; emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); break; diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 546c964d2a47..7af9ceca429f 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16634,6 +16634,11 @@ ix86_fp_compare_code_to_integer (enum rtx_code code) return LEU; case LTGT: return NE; + case EQ: + case NE: + if (TARGET_AVX10_2_256) + return code; + /* FALLTHRU. */ default: return UNKNOWN; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b56a51be09fb..0fae3c1eb878 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -117,6 +117,7 @@ UNSPEC_STC UNSPEC_PUSHFL UNSPEC_POPFL + UNSPEC_OPTCOMX ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -1736,7 +1737,7 @@ (compare:CC (match_operand:XF 1 "nonmemory_operand") (match_operand:XF 2 "nonmemory_operand"))) (set (pc) (if_then_else - (match_operator 0 "ix86_fp_comparison_operator" + (match_operator 0 "ix86_fp_comparison_operator_xf" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) @@ -1753,7 +1754,7 @@ (compare:CC (match_operand:XF 2 "nonmemory_operand") (match_operand:XF 3 "nonmemory_operand"))) (set (match_operand:QI 0 "register_operand") - (match_operator 1 "ix86_fp_comparison_operator" + (match_operator 1 "ix86_fp_comparison_operator_xf" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_80387" @@ -2017,6 +2018,32 @@ (set_attr "bdver1_decode" "double") (set_attr "znver1_decode" "double")]) +(define_insn "*cmpx<unord><MODEF:mode>" + [(set (reg:CCFP FLAGS_REG) + (unspec:CCFP [ + (compare:CCFP + (match_operand:MODEF 0 "register_operand" "v") + (match_operand:MODEF 1 "nonimmediate_operand" "vm"))] + UNSPEC_OPTCOMX))] + "TARGET_AVX10_2_256" + "%v<unord>comx<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODEF:MODE>")]) + +(define_insn "*cmpx<unord>hf" + [(set (reg:CCFP FLAGS_REG) + (unspec:CCFP [ + (compare:CCFP + (match_operand:HF 0 "register_operand" "v") + (match_operand:HF 1 "nonimmediate_operand" "vm"))] + UNSPEC_OPTCOMX))] + "TARGET_AVX10_2_256" + "v<unord>comxsh\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_insn "*cmpi<unord><MODEF:mode>" [(set (reg:CCFP FLAGS_REG) (compare:CCFP diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index ab6a2e14d355..053312bbe27c 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1633,7 +1633,13 @@ }) ;; Return true if this comparison only requires testing one flag bit. +;; VCOMX/VUCOMX set ZF, SF, OF, differently from COMI/UCOMI. (define_predicate "ix86_trivial_fp_comparison_operator" + (if_then_else (match_test "TARGET_AVX10_2_256") + (match_code "gt,ge,unlt,unle,eq,uneq,ne,ltgt,ordered,unordered") + (match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered"))) + +(define_predicate "ix86_trivial_fp_comparison_operator_xf" (match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered")) ;; Return true if we know how to do this comparison. Others require @@ -1645,6 +1651,12 @@ (match_operand 0 "comparison_operator") (match_operand 0 "ix86_trivial_fp_comparison_operator"))) +(define_predicate "ix86_fp_comparison_operator_xf" + (if_then_else (match_test "ix86_fp_comparison_strategy (GET_CODE (op)) + == IX86_FPCMP_ARITH") + (match_operand 0 "comparison_operator") + (match_operand 0 "ix86_trivial_fp_comparison_operator_xf"))) + ;; Return true if we can perform this comparison on TImode operands. (define_predicate "ix86_timode_comparison_operator" (if_then_else (match_test "TARGET_64BIT") diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-compare-1b.c b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1b.c new file mode 100644 index 000000000000..15989eca3297 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1b.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx10.2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times "comi" 6 } } */ +/* { dg-final { scan-assembler-times "comx" 12 } } */ + +#include <immintrin.h> + +int is_equal_sd (double a, double b) +{ + return a == b; +} + +int is_not_equal_sd (double a, double b) +{ + return a != b; +} + +int is_equal_ss (float a, float b) +{ + return a == b; +} + +int is_not_equal_ss (float a, float b) +{ + return a != b; +} + +int is_equal_sh (_Float16 a, _Float16 b) +{ + return a == b; +} + +int is_not_equal_sh (_Float16 a, _Float16 b) +{ + return a != b; +} + +int is_unordered_or_equal_sd (double a, double b) +{ + return __builtin_isunordered (a, b) || a == b; +} + +int is_unordered_or_nonequal_sd (double a, double b) +{ + return __builtin_isunordered (a, b) || a != b; +} + +int is_unordered_or_equal_ss (float a, float b) +{ + return __builtin_isunordered (a, b) || a == b; +} + +int is_unordered_or_nonequal_ss (float a, float b) +{ + return __builtin_isunordered (a, b) || a != b; +} + +int is_unordered_or_equal_sh (_Float16 a, _Float16 b) +{ + return __builtin_isunordered (a, b) || a == b; +} + +int is_unordered_or_nonequal_sh (_Float16 a, _Float16 b) +{ + return __builtin_isunordered (a, b) || a != b; +} + +int is_ordered_and_equal_sd (double a, double b) +{ + return !__builtin_isunordered (a, b) && a == b; +} + +int is_ordered_and_nonequal_sd (double a, double b) +{ + return !__builtin_isunordered (a, b) && a != b; +} + +int is_ordered_and_equal_ss (float a, float b) +{ + return !__builtin_isunordered (a, b) && a == b; +} + +int is_ordered_and_nonequal_ss (float a, float b) +{ + return !__builtin_isunordered (a, b) && a != b; +} + +int is_ordered_and_equal_sh (_Float16 a, _Float16 b) +{ + return !__builtin_isunordered (a, b) && a == b; +} + +int is_ordered_and_nonequal_sh (_Float16 a, _Float16 b) +{ + return !__builtin_isunordered (a, b) && a != b; +} -- GitLab