From 3f1a08d9d731975d4061c306837ab28d52f37c7e Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao.liu@intel.com> Date: Mon, 24 May 2021 10:57:52 +0800 Subject: [PATCH] For 128/256-bit vec_cond_expr, When mask operands is lt reg const0_rtx, blendv can be used instead of avx512 mask. gcc/ChangeLog: PR target/100648 * config/i386/sse.md (*avx_cmp<mode>3_lt): New define_insn_and_split. (*avx_cmp<mode>3_ltint): Ditto. (*avx2_pcmp<mode>3_3): Ditto. (*avx2_pcmp<mode>3_4): Ditto. (*avx2_pcmp<mode>3_5): Ditto. gcc/testsuite/ChangeLog: PR target/100648 * g++.target/i386/avx2-pr54700-2.C: Adjust testcase. * g++.target/i386/avx512vl-pr54700-1a.C: New test. * g++.target/i386/avx512vl-pr54700-1b.C: New test. * g++.target/i386/avx512vl-pr54700-2a.C: New test. * g++.target/i386/avx512vl-pr54700-2b.C: New test. * gcc.target/i386/avx512vl-pr100648.c: New test. * gcc.target/i386/avx512vl-blendv-1.c: New test. * gcc.target/i386/avx512vl-blendv-2.c: New test. --- gcc/config/i386/sse.md | 152 ++++++++++++++++++ .../g++.target/i386/avx2-pr54700-2.C | 8 +- .../g++.target/i386/avx512vl-pr54700-1a.C | 9 ++ .../g++.target/i386/avx512vl-pr54700-1b.C | 9 ++ .../g++.target/i386/avx512vl-pr54700-2a.C | 17 ++ .../g++.target/i386/avx512vl-pr54700-2b.C | 17 ++ .../gcc.target/i386/avx512vl-blendv-1.c | 51 ++++++ .../gcc.target/i386/avx512vl-blendv-2.c | 41 +++++ .../gcc.target/i386/avx512vl-pr100648.c | 21 +++ 9 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 310063544a61..ffcc0c819645 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3048,6 +3048,68 @@ UNSPEC_PCMP))] "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);") +(define_insn_and_split "*avx_cmp<mode>3_lt" + [(set (match_operand:VF_128_256 0 "register_operand") + (vec_merge:VF_128_256 + (match_operand:VF_128_256 1 "vector_operand") + (match_operand:VF_128_256 2 "vector_operand") + (unspec:<avx512fmaskmode> + [(match_operand:<sseintvecmode> 3 "register_operand") + (match_operand:<sseintvecmode> 4 "const0_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* LT or GE 0 */ + && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2])) + || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VF_128_256 + [(match_dup 2) + (match_dup 1) + (lt:VF_128_256 + (match_dup 3) + (match_dup 4))] + UNSPEC_BLENDV))] +{ + if (INTVAL (operands[5]) == 5) + std::swap (operands[1], operands[2]); +}) + +(define_insn_and_split "*avx_cmp<mode>3_ltint" + [(set (match_operand:VI48_AVX 0 "register_operand") + (vec_merge:VI48_AVX + (match_operand:VI48_AVX 1 "vector_operand") + (match_operand:VI48_AVX 2 "vector_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI48_AVX 3 "register_operand") + (match_operand:VI48_AVX 4 "const0_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* LT or GE 0 */ + && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2])) + || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<ssebytemode> + [(match_dup 2) + (match_dup 1) + (subreg:<ssebytemode> + (lt:VI48_AVX + (match_dup 3) + (match_dup 4)) 0)] + UNSPEC_BLENDV))] +{ + if (INTVAL (operands[5]) == 5) + std::swap (operands[1], operands[2]); + operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]); + operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]); + operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]); +}) + (define_insn "avx_vmcmp<mode>3" [(set (match_operand:VF_128 0 "register_operand" "=x") (vec_merge:VF_128 @@ -13063,6 +13125,96 @@ DONE; }) +(define_insn_and_split "*avx2_pcmp<mode>3_3" + [(set (match_operand:VI1_AVX2 0 "register_operand") + (vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 1 "vector_operand") + (match_operand:VI1_AVX2 2 "vector_operand") + (unspec:<avx512fmaskmode> + [(match_operand:VI1_AVX2 3 "register_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* LT or GE 0 */ + && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2])) + || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VI1_AVX2 + [(match_dup 2) + (match_dup 1) + (lt:VI1_AVX2 + (match_dup 3) + (match_dup 4))] + UNSPEC_BLENDV))] +{ + if (INTVAL (operands[5]) == 5) + std::swap (operands[1], operands[2]); +}) + +(define_insn_and_split "*avx2_pcmp<mode>3_4" + [(set (match_operand:VI1_AVX2 0 "register_operand") + (vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 1 "vector_operand") + (match_operand:VI1_AVX2 2 "vector_operand") + (unspec:<avx512fmaskmode> + [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0) + (match_operand:VI1_AVX2 4 "const0_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT + && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE> + /* LT or GE 0 */ + && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1])) + || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VI1_AVX2 + [(match_dup 1) + (match_dup 2) + (lt:VI1_AVX2 + (match_dup 3) + (match_dup 4))] + UNSPEC_BLENDV))] +{ + if (INTVAL (operands[5]) == 1) + std::swap (operands[1], operands[2]); + operands[3] = gen_lowpart (<MODE>mode, operands[3]); +}) + +(define_insn_and_split "*avx2_pcmp<mode>3_5" + [(set (match_operand:VI1_AVX2 0 "register_operand") + (vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 1 "vector_operand") + (match_operand:VI1_AVX2 2 "vector_operand") + (unspec:<avx512fmaskmode> + [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand")) + (match_operand:VI1_AVX2 4 "const0_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* LT or GE 0 */ + && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1])) + || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VI1_AVX2 + [(match_dup 1) + (match_dup 2) + (lt:VI1_AVX2 + (match_dup 3) + (match_dup 4))] + UNSPEC_BLENDV))] +{ + if (INTVAL (operands[5]) == 1) + std::swap (operands[1], operands[2]); +}) + (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (unspec:<avx512fmaskmode> diff --git a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C index c9054e5dee7b..e7a85c34b175 100644 --- a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C +++ b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C @@ -2,9 +2,15 @@ /* { dg-do run { target avx2 } } */ /* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */ -#include "avx2-check.h" +#ifndef CHECK_H +#define CHECK_H "avx2-check.h" +#endif +#ifndef TEST #define TEST avx2_test +#endif + +#include CHECK_H #include "avx2-pr54700-1.C" diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C new file mode 100644 index 000000000000..fedc3aad0190 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C @@ -0,0 +1,9 @@ +/* PR target/100648 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ +/* { dg-final { scan-assembler-times "vblendvps" 4 } } */ +/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */ + +#include "avx2-pr54700-1.C" diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C new file mode 100644 index 000000000000..03f934356505 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C @@ -0,0 +1,9 @@ +/* PR target/100648 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c++14 -mavx512vl -mavx512bw -mno-xop" } */ +/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "pblendvb" 2 } } */ +/* { dg-final { scan-assembler-times "blendvps" 4 } } */ +/* { dg-final { scan-assembler-times "blendvpd" 4 } } */ + +#include "sse4_1-pr54700-1.C" diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C new file mode 100644 index 000000000000..687a8c4fd8f0 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C @@ -0,0 +1,17 @@ +/* PR target/100648 */ +/* { dg-do run { target avx2 } } */ +/* { dg-options "-O2 -std=c++14 -mavx2 -mavx512vl -mavx512bw" } */ + +#ifndef CHECK_H +#define CHECK_H "avx512f-helper.h" +#endif + +#ifndef TEST +#define TEST_test_256 +#endif + +#include CHECK_H +#include "avx2-pr54700-2.C" + +#define AVX512VL +#define AVX512BW diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C new file mode 100644 index 000000000000..40450a90c52b --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C @@ -0,0 +1,17 @@ +/* PR target/pr100648 */ +/* { dg-do run { target sse4 } } */ +/* { dg-options "-O2 -std=c++14 -msse4 -mavx512vl -mavx512bw -mno-xop" } */ + +#ifndef CHECK_H +#define CHECK_H "avx512f-helper.h" +#endif + +#ifndef TEST +#define TEST_test_128 +#endif + +#include CHECK_H +#include "sse4_1-pr54700-2.C" + +#define AVX512VL +#define AVX512BW diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c new file mode 100644 index 000000000000..6aa004b5e9fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl" } */ +/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */ +/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */ +/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */ +/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */ +/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */ +/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); +typedef float v8sf __attribute__ ((vector_size (32))); +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v4df __attribute__ ((vector_size (32))); +typedef char v16qi __attribute__ ((vector_size (16))); +typedef char v32qi __attribute__ ((vector_size (32))); + +v4sf +foo (v4sf a, v4sf b, v4sf c) +{ + return __builtin_ia32_blendvps (a, b, c); +} + +v8sf +foo2 (v8sf a, v8sf b, v8sf c) +{ + return __builtin_ia32_blendvps256 (a, b, c); +} + +v2df +foo3 (v2df a, v2df b, v2df c) +{ + return __builtin_ia32_blendvpd (a, b, c); +} + +v4df +foo4 (v4df a, v4df b, v4df c) +{ + return __builtin_ia32_blendvpd256 (a, b, c); +} + +v16qi +foo5 (v16qi a, v16qi b, v16qi c) +{ + return __builtin_ia32_pblendvb128 (a, b, c); +} + +v32qi +foo6 (v32qi a, v32qi b, v32qi c) +{ + return __builtin_ia32_pblendvb256 (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c new file mode 100644 index 000000000000..daddcd5a6859 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl" } */ +/* { dg-final { scan-assembler-not {pblendv} } } */ +/* { dg-final { scan-assembler-not {blendvp} } } */ + +#include <x86intrin.h> +__m128 +foo (__m128 a, __m128 b) +{ + return _mm_blendv_ps (a, b, _mm_setzero_ps ()); +} + +__m256 +foo2 (__m256 a, __m256 b) +{ + return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0)); +} + +__m128d +foo3 (__m128d a, __m128d b, __m128d c) +{ + return _mm_blendv_pd (a, b, _mm_set1_pd (1.0)); +} + +__m256d +foo4 (__m256d a, __m256d b, __m256d c) +{ + return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3)); +} + +__m128i +foo5 (__m128i a, __m128i b, __m128i c) +{ + return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3)); +} + +__m256i +foo6 (__m256i a, __m256i b, __m256i c) +{ + return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c new file mode 100644 index 000000000000..2ef7c7bcd964 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c @@ -0,0 +1,21 @@ +/* PR target/100648. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */ +/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */ +/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */ +/* { dg-final { scan-assembler-not "\tvpandn" } } */ +#include <x86intrin.h> + +__m256i +f1 (__m256i a, __m256i b, __m256i mask) +{ + return _mm256_blendv_epi8(a, b, + _mm256_andnot_si256(mask, _mm256_set1_epi8(255))); +} + +__m128i +f2 (__m128i a, __m128i b, __m128i mask) +{ + return _mm_blendv_epi8(a, b, + _mm_andnot_si128(mask, _mm_set1_epi8(255))); +} -- GitLab