From 19b24f4aae6c59d70cc882623b1e9b279b3872f3 Mon Sep 17 00:00:00 2001 From: Hongyu Wang <hongyu.wang@intel.com> Date: Tue, 12 Nov 2024 13:04:46 +0800 Subject: [PATCH] i386: Fix cstorebf4 fp comparison operand [PR117495] For cstorebf4 it uses comparison_operator for BFmode compare, which is incorrect when directly uses ix86_expand_setcc as it does not canonicalize the input comparison to correct the compare code by swapping operands. The original code without AVX10.2 calls emit_store_flag_force, who actually calls to emit_store_flags_1 and recurisive calls to this expander again with swapped operand and flag. Therefore, we can avoid do the redundant recurisive call by adjusting the comparison_operator to ix86_fp_comparison_operator, and calls ix86_expand_setcc directly. gcc/ChangeLog: PR target/117495 * config/i386/i386.md (cstorebf4): Use ix86_fp_comparison_operator and calls ix86_expand_setcc directly. gcc/testsuite/ChangeLog: PR target/117495 * gcc.target/i386/pr117495.c: New test. --- gcc/config/i386/i386.md | 18 +++++++--------- gcc/testsuite/gcc.target/i386/pr117495.c | 26 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr117495.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f4aae80b7a95..03b0f548467e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1853,23 +1853,19 @@ (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand") (match_operand:BF 3 "cmp_fp_expander_operand"))) (set (match_operand:QI 0 "register_operand") - (match_operator 1 "comparison_operator" + (match_operator 1 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" { - if (TARGET_AVX10_2_256 && !flag_trapping_math) - ix86_expand_setcc (operands[0], GET_CODE (operands[1]), - operands[2], operands[3]); - else + rtx op2 = operands[2], op3 = operands[3]; + if (!TARGET_AVX10_2_256 || flag_trapping_math) { - rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]); - rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]); - rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]), - op1, op2, SFmode, 0, 1); - if (!rtx_equal_p (res, operands[0])) - emit_move_insn (operands[0], res); + op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]); + op3 = ix86_expand_fast_convert_bf_to_sf (operands[3]); } + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + op2, op3); DONE; }) diff --git a/gcc/testsuite/gcc.target/i386/pr117495.c b/gcc/testsuite/gcc.target/i386/pr117495.c new file mode 100644 index 000000000000..274b6cef361e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117495.c @@ -0,0 +1,26 @@ +/* PR target/117495 */ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -fno-trapping-math" } */ +/* { dg-final { scan-assembler-times "vcomsbf16" 2 } } */ + +__attribute__((target("avx10.2"))) +int foo (int b, int x) +{ + return (__bf16) b < x; +} + +int foo2 (int b, int x) +{ + return (__bf16) b < x; +} + +__attribute__((target("avx10.2"))) +int foo3 (__bf16 b, __bf16 x) +{ + return (__bf16) b < x; +} + +int foo4 (__bf16 b, __bf16 x) +{ + return (__bf16) b < x; +} -- GitLab