From 62df24e50039ae04aa3b940e680cffd9041ef5bf Mon Sep 17 00:00:00 2001 From: Levy Hsu <admin@levyhsu.com> Date: Tue, 27 Aug 2024 14:22:20 +0930 Subject: [PATCH] i386: Support partial vectorized V2BF/V4BF smaxmin This patch supports sminmax for partial vectorized V2BF/V4BF. gcc/ChangeLog: * config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test. --- gcc/config/i386/mmx.md | 19 ++++++++++ .../avx10_2-partial-bf-vector-smaxmin-1.c | 36 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 076ea2e2fb24..fac90cfd4d4c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2098,6 +2098,25 @@ DONE; }) +(define_expand "<code><mode>3" + [(set (match_operand:VBF_32_64 0 "register_operand") + (smaxmin:VBF_32_64 + (match_operand:VBF_32_64 1 "nonimmediate_operand") + (match_operand:VBF_32_64 2 "nonimmediate_operand")))] + "TARGET_AVX10_2_256" +{ + rtx op0 = gen_reg_rtx (V8BFmode); + rtx op1 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[1]), <MODE>mode); + rtx op2 = lowpart_subreg (V8BFmode, + force_reg (<MODE>mode, operands[2]), <MODE>mode); + + emit_insn (gen_<code>v8bf3 (op0, op1, op2)); + + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode)); + DONE; +}) + (define_expand "sqrt<mode>2" [(set (match_operand:VHF_32_64 0 "register_operand") (sqrt:VHF_32_64 diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c new file mode 100644 index 000000000000..0a7cc58e29d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx10.2 -Ofast" } */ +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ + +void +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} -- GitLab