From 3f1a08d9d731975d4061c306837ab28d52f37c7e Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Mon, 24 May 2021 10:57:52 +0800
Subject: [PATCH] For 128/256-bit vec_cond_expr, When mask operands is lt reg
 const0_rtx, blendv can be used instead of avx512 mask.

gcc/ChangeLog:

	PR target/100648
	* config/i386/sse.md (*avx_cmp<mode>3_lt): New
	define_insn_and_split.
	(*avx_cmp<mode>3_ltint): Ditto.
	(*avx2_pcmp<mode>3_3): Ditto.
	(*avx2_pcmp<mode>3_4): Ditto.
	(*avx2_pcmp<mode>3_5): Ditto.

gcc/testsuite/ChangeLog:

	PR target/100648
	* g++.target/i386/avx2-pr54700-2.C: Adjust testcase.
	* g++.target/i386/avx512vl-pr54700-1a.C: New test.
	* g++.target/i386/avx512vl-pr54700-1b.C: New test.
	* g++.target/i386/avx512vl-pr54700-2a.C: New test.
	* g++.target/i386/avx512vl-pr54700-2b.C: New test.
	* gcc.target/i386/avx512vl-pr100648.c: New test.
	* gcc.target/i386/avx512vl-blendv-1.c: New test.
	* gcc.target/i386/avx512vl-blendv-2.c: New test.
---
 gcc/config/i386/sse.md                        | 152 ++++++++++++++++++
 .../g++.target/i386/avx2-pr54700-2.C          |   8 +-
 .../g++.target/i386/avx512vl-pr54700-1a.C     |   9 ++
 .../g++.target/i386/avx512vl-pr54700-1b.C     |   9 ++
 .../g++.target/i386/avx512vl-pr54700-2a.C     |  17 ++
 .../g++.target/i386/avx512vl-pr54700-2b.C     |  17 ++
 .../gcc.target/i386/avx512vl-blendv-1.c       |  51 ++++++
 .../gcc.target/i386/avx512vl-blendv-2.c       |  41 +++++
 .../gcc.target/i386/avx512vl-pr100648.c       |  21 +++
 9 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 310063544a61..ffcc0c819645 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3048,6 +3048,68 @@
 	  UNSPEC_PCMP))]
   "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
 
+(define_insn_and_split "*avx_cmp<mode>3_lt"
+ [(set (match_operand:VF_128_256  0 "register_operand")
+	(vec_merge:VF_128_256
+	  (match_operand:VF_128_256 1 "vector_operand")
+	  (match_operand:VF_128_256 2 "vector_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:<sseintvecmode> 3 "register_operand")
+	     (match_operand:<sseintvecmode> 4 "const0_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  /* LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+      || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VF_128_256
+	  [(match_dup 2)
+	   (match_dup 1)
+	   (lt:VF_128_256
+	     (match_dup 3)
+	     (match_dup 4))]
+	    UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 5)
+    std::swap (operands[1], operands[2]);
+})
+
+(define_insn_and_split "*avx_cmp<mode>3_ltint"
+ [(set (match_operand:VI48_AVX  0 "register_operand")
+	(vec_merge:VI48_AVX
+	  (match_operand:VI48_AVX 1 "vector_operand")
+	  (match_operand:VI48_AVX 2 "vector_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI48_AVX 3 "register_operand")
+	     (match_operand:VI48_AVX 4 "const0_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  /* LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+      || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<ssebytemode>
+	  [(match_dup 2)
+	   (match_dup 1)
+	   (subreg:<ssebytemode>
+	     (lt:VI48_AVX
+	      (match_dup 3)
+	      (match_dup 4)) 0)]
+	    UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 5)
+    std::swap (operands[1], operands[2]);
+  operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
+  operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
+  operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
+})
+
 (define_insn "avx_vmcmp<mode>3"
   [(set (match_operand:VF_128 0 "register_operand" "=x")
 	(vec_merge:VF_128
@@ -13063,6 +13125,96 @@
   DONE;
 })
 
+(define_insn_and_split "*avx2_pcmp<mode>3_3"
+ [(set (match_operand:VI1_AVX2  0 "register_operand")
+	(vec_merge:VI1_AVX2
+	  (match_operand:VI1_AVX2 1 "vector_operand")
+	  (match_operand:VI1_AVX2 2 "vector_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI1_AVX2 3 "register_operand")
+	     (match_operand:VI1_AVX2 4 "const0_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  /* LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+      || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	  [(match_dup 2)
+	   (match_dup 1)
+	   (lt:VI1_AVX2
+	     (match_dup 3)
+	     (match_dup 4))]
+	     UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 5)
+    std::swap (operands[1], operands[2]);
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_4"
+ [(set (match_operand:VI1_AVX2  0 "register_operand")
+	(vec_merge:VI1_AVX2
+	  (match_operand:VI1_AVX2 1 "vector_operand")
+	  (match_operand:VI1_AVX2 2 "vector_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
+	     (match_operand:VI1_AVX2 4 "const0_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+  && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
+  /* LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
+      || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (lt:VI1_AVX2
+	     (match_dup 3)
+	     (match_dup 4))]
+	     UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 1)
+    std::swap (operands[1], operands[2]);
+  operands[3] = gen_lowpart (<MODE>mode, operands[3]);
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_5"
+ [(set (match_operand:VI1_AVX2  0 "register_operand")
+	(vec_merge:VI1_AVX2
+	  (match_operand:VI1_AVX2 1 "vector_operand")
+	  (match_operand:VI1_AVX2 2 "vector_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
+	     (match_operand:VI1_AVX2 4 "const0_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  /* LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
+      || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (lt:VI1_AVX2
+	     (match_dup 3)
+	     (match_dup 4))]
+	     UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 1)
+    std::swap (operands[1], operands[2]);
+})
+
 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(unspec:<avx512fmaskmode>
diff --git a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
index c9054e5dee7b..e7a85c34b175 100644
--- a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
+++ b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
@@ -2,9 +2,15 @@
 /* { dg-do run { target avx2 } } */
 /* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */
 
-#include "avx2-check.h"
+#ifndef CHECK_H
+#define CHECK_H "avx2-check.h"
+#endif
 
+#ifndef TEST
 #define TEST avx2_test
+#endif
+
+#include CHECK_H
 
 #include "avx2-pr54700-1.C"
 
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C
new file mode 100644
index 000000000000..fedc3aad0190
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C
@@ -0,0 +1,9 @@
+/* PR target/100648 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+/* { dg-final { scan-assembler-times "vblendvps" 4 } } */
+/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */
+
+#include "avx2-pr54700-1.C"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C
new file mode 100644
index 000000000000..03f934356505
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C
@@ -0,0 +1,9 @@
+/* PR target/100648 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++14 -mavx512vl -mavx512bw -mno-xop" } */
+/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "pblendvb" 2 } } */
+/* { dg-final { scan-assembler-times "blendvps" 4 } } */
+/* { dg-final { scan-assembler-times "blendvpd" 4 } } */
+
+#include "sse4_1-pr54700-1.C"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C
new file mode 100644
index 000000000000..687a8c4fd8f0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C
@@ -0,0 +1,17 @@
+/* PR target/100648  */
+/* { dg-do run { target avx2 } } */
+/* { dg-options "-O2 -std=c++14 -mavx2 -mavx512vl -mavx512bw" } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx512f-helper.h"
+#endif
+
+#ifndef TEST
+#define TEST_test_256
+#endif
+
+#include CHECK_H
+#include "avx2-pr54700-2.C"
+
+#define AVX512VL
+#define AVX512BW
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C
new file mode 100644
index 000000000000..40450a90c52b
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C
@@ -0,0 +1,17 @@
+/* PR target/pr100648 */
+/* { dg-do run { target sse4 } } */
+/* { dg-options "-O2 -std=c++14 -msse4 -mavx512vl -mavx512bw -mno-xop" } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx512f-helper.h"
+#endif
+
+#ifndef TEST
+#define TEST_test_128
+#endif
+
+#include CHECK_H
+#include "sse4_1-pr54700-2.C"
+
+#define AVX512VL
+#define AVX512BW
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c
new file mode 100644
index 000000000000..6aa004b5e9fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+
+v4sf
+foo (v4sf a, v4sf b, v4sf c)
+{
+  return __builtin_ia32_blendvps (a, b, c);
+}
+
+v8sf
+foo2 (v8sf a, v8sf b, v8sf c)
+{
+  return __builtin_ia32_blendvps256 (a, b, c);
+}
+
+v2df
+foo3 (v2df a, v2df b, v2df c)
+{
+  return __builtin_ia32_blendvpd (a, b, c);
+}
+
+v4df
+foo4 (v4df a, v4df b, v4df c)
+{
+  return __builtin_ia32_blendvpd256 (a, b, c);
+}
+
+v16qi
+foo5 (v16qi a, v16qi b, v16qi c)
+{
+  return __builtin_ia32_pblendvb128 (a, b, c);
+}
+
+v32qi
+foo6 (v32qi a, v32qi b, v32qi c)
+{
+  return __builtin_ia32_pblendvb256 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c
new file mode 100644
index 000000000000..daddcd5a6859
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-not {pblendv} } } */
+/* { dg-final { scan-assembler-not {blendvp} } } */
+
+#include <x86intrin.h>
+__m128
+foo (__m128 a, __m128 b)
+{
+  return _mm_blendv_ps (a, b, _mm_setzero_ps ());
+}
+
+__m256
+foo2 (__m256 a, __m256 b)
+{
+  return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0));
+}
+
+__m128d
+foo3 (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_blendv_pd (a, b, _mm_set1_pd (1.0));
+}
+
+__m256d
+foo4 (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3));
+}
+
+__m128i
+foo5 (__m128i a, __m128i b, __m128i c)
+{
+  return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3));
+}
+
+__m256i
+foo6 (__m256i a, __m256i b, __m256i c)
+{
+  return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c
new file mode 100644
index 000000000000..2ef7c7bcd964
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c
@@ -0,0 +1,21 @@
+/* PR target/100648.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tvpandn" } } */
+#include <x86intrin.h>
+
+__m256i
+f1 (__m256i a, __m256i b, __m256i mask)
+{
+  return _mm256_blendv_epi8(a, b, 
+    _mm256_andnot_si256(mask, _mm256_set1_epi8(255)));
+}
+
+__m128i
+f2 (__m128i a, __m128i b, __m128i mask)
+{
+  return _mm_blendv_epi8(a, b, 
+    _mm_andnot_si128(mask, _mm_set1_epi8(255)));
+}
-- 
GitLab