From f6fe2962daf7b8d8532c768c3b9eab00f99cce5b Mon Sep 17 00:00:00 2001
From: "Zhang, Jun" <jun.zhang@intel.com>
Date: Mon, 26 Aug 2024 10:53:52 +0800
Subject: [PATCH] AVX10.2: Support vector copy instructions

gcc/ChangeLog:

	* config.gcc: Add avx10_2copyintrin.h.
	* config/i386/i386.md (avx10_2): New isa attribute.
	* config/i386/immintrin.h: Include avx10_2copyintrin.h.
	* config/i386/sse.md
	(sse_movss_<mode>): Add new constraints to handle AVX10.2.
	(vec_set<mode>_0): Ditto.
	(@vec_set<mode>_0): Ditto.
	(vec_set<mode>_0): Ditto.
	(avx512fp16_mov<mode>): Ditto.
	(*vec_set<mode>_0_1): New split.
	* config/i386/avx10_2copyintrin.h: New file.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx10_2-vmovd-1.c: New test.
	* gcc.target/i386/avx10_2-vmovd-2.c: Ditto.
	* gcc.target/i386/avx10_2-vmovw-1.c: Ditto.
	* gcc.target/i386/avx10_2-vmovw-2.c: Ditto.
---
 gcc/config.gcc                                |   3 +-
 gcc/config/i386/avx10_2copyintrin.h           |  38 +++++
 gcc/config/i386/i386.md                       |   3 +-
 gcc/config/i386/immintrin.h                   |   2 +
 gcc/config/i386/sse.md                        | 138 +++++++++++-------
 .../gcc.target/i386/avx10_2-vmovd-1.c         |  48 ++++++
 .../gcc.target/i386/avx10_2-vmovd-2.c         |  44 ++++++
 .../gcc.target/i386/avx10_2-vmovw-1.c         |  69 +++++++++
 .../gcc.target/i386/avx10_2-vmovw-2.c         |  64 ++++++++
 9 files changed, 356 insertions(+), 53 deletions(-)
 create mode 100644 gcc/config/i386/avx10_2copyintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cd8a34b292fd..e887c9c74321 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -457,7 +457,8 @@ i[34567]86-*-* | x86_64-*-*)
 		       avx10_2convertintrin.h avx10_2-512convertintrin.h
 		       avx10_2bf16intrin.h avx10_2-512bf16intrin.h
 		       avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
-		       avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h"
+		       avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
+		       avx10_2copyintrin.h"
 	;;
 ia64-*-*)
 	extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2copyintrin.h b/gcc/config/i386/avx10_2copyintrin.h
new file mode 100644
index 000000000000..f1150c71dbf0
--- /dev/null
+++ b/gcc/config/i386/avx10_2copyintrin.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of GCC.
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2COPYINTRIN_H_INCLUDED
+#define _AVX10_2COPYINTRIN_H_INCLUDED
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi32 (__m128i __A)
+{
+  return _mm_set_epi32 (0, 0, 0, ((__v4si) __A)[0]);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi16 (__m128i __A)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi) __A)[0]);
+}
+
+#endif /* _AVX10_2COPYINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 36108e5c2c9e..34f9214115ea 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -579,7 +579,7 @@
 		    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
 		    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
 		    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-		    vaes_avx512vl,noapx_nf"
+		    vaes_avx512vl,noapx_nf,avx10_2"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -976,6 +976,7 @@
 	   (symbol_ref "TARGET_APX_NDD && Pmode == DImode")
 	 (eq_attr "isa" "vaes_avx512vl")
 	   (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
+	 (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256")
 
 	 (eq_attr "mmx_isa" "native")
 	   (symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 0d5af155c367..6b8035e64671 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -160,4 +160,6 @@
 
 #include <avx10_2-512minmaxintrin.h>
 
+#include <avx10_2copyintrin.h>
+
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f5956f170625..a6d844d00b4c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11505,19 +11505,20 @@
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
 (define_insn "sse_movss_<mode>"
-  [(set (match_operand:VI4F_128 0 "register_operand"   "=x,v")
+  [(set (match_operand:VI4F_128 0 "register_operand"   "=x,v,v")
 	(vec_merge:VI4F_128
-	  (match_operand:VI4F_128 2 "register_operand" " x,v")
-	  (match_operand:VI4F_128 1 "register_operand" " 0,v")
+	  (match_operand:VI4F_128 2 "register_operand" " x,v,v")
+	  (match_operand:VI4F_128 1 "reg_or_0_operand" " 0,v,C")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    movss\t{%2, %0|%0, %2}
-   vmovss\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+   vmovss\t{%2, %1, %0|%0, %1, %2}
+   vmovd\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "noavx,avx,avx10_2")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,maybe_evex")
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "mode" "SF,SF,SI")])
 
 (define_insn "avx2_vec_dup<mode>"
   [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
@@ -11687,18 +11688,19 @@
 ;; see comment above inline_secondary_memory_needed function in i386.cc
 (define_insn "vec_set<mode>_0"
   [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
-	  "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
+	  "=Yr,*x,v,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
 	(vec_merge:VI4F_128
 	  (vec_duplicate:VI4F_128
 	    (match_operand:<ssescalarmode> 2 "general_operand"
-	  " Yr,*x,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
+	  " Yr,*x,v,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
 	  (match_operand:VI4F_128 1 "nonimm_or_0_operand"
-	  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
+	  " C , C,C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    insertps\t{$0xe, %2, %0|%0, %2, 0xe}
    insertps\t{$0xe, %2, %0|%0, %2, 0xe}
+   vmovd\t{%2, %0|%0, %2}
    vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
    %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
    %vmovd\t{%2, %0|%0, %2}
@@ -11712,22 +11714,24 @@
    #
    #"
   [(set (attr "isa")
-     (cond [(eq_attr "alternative" "0,1,8,9")
+     (cond [(eq_attr "alternative" "0,1,9,10")
 	      (const_string "sse4_noavx")
-	    (eq_attr "alternative" "2,7,10")
+		(eq_attr "alternative" "2")
+		  (const_string "avx10_2")
+	    (eq_attr "alternative" "3,8,11")
 	      (const_string "avx")
-	    (eq_attr "alternative" "3,4")
+	    (eq_attr "alternative" "4,5")
 	      (const_string "sse2")
-	    (eq_attr "alternative" "5,6")
+	    (eq_attr "alternative" "6,7")
 	      (const_string "noavx")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "0,1,2,8,9,10")
+     (cond [(eq_attr "alternative" "0,1,3,9,10,11")
 	      (const_string "sselog")
-	    (eq_attr "alternative" "12")
-	      (const_string "imov")
 	    (eq_attr "alternative" "13")
+	      (const_string "imov")
+	    (eq_attr "alternative" "14")
 	      (const_string "fmov")
 	   ]
 	   (const_string "ssemov")))
@@ -11736,45 +11740,46 @@
 		   (const_string "gpr16")
 		   (const_string "*")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "8,9,10")
+     (if_then_else (eq_attr "alternative" "9,10,11")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "8,9,10")
+     (if_then_else (eq_attr "alternative" "9,10,11")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "prefix")
-     (cond [(eq_attr "alternative" "0,1,5,6,8,9")
+     (cond [(eq_attr "alternative" "0,1,6,7,9,10")
 	      (const_string "orig")
-	    (eq_attr "alternative" "2")
+	    (eq_attr "alternative" "2,3")
 	      (const_string "maybe_evex")
-	    (eq_attr "alternative" "3,4")
+	    (eq_attr "alternative" "4,5")
 	      (const_string "maybe_vex")
-	    (eq_attr "alternative" "7,10")
+	    (eq_attr "alternative" "8,11")
 	      (const_string "vex")
 	   ]
 	   (const_string "*")))
-   (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
+   (set_attr "mode" "SF,SF,SI,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "4")
+     (cond [(eq_attr "alternative" "5")
 	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
 	   ]
 	   (symbol_ref "true")))])
 
 (define_insn "@vec_set<mode>_0"
   [(set (match_operand:V8_128 0 "register_operand"
-	  "=v,v,v,x,x,Yr,*x,x,x,x,v,v")
+	  "=v,v,v,v,x,x,Yr,*x,x,x,x,v,v")
 	(vec_merge:V8_128
 	  (vec_duplicate:V8_128
 	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand"
-	  " r,m,v,r,m,Yr,*x,r,m,x,r,m"))
+	  " r,m,v,v,r,m,Yr,*x,r,m,x,r,m"))
 	  (match_operand:V8_128 1 "reg_or_0_operand"
-	  " C,C,v,0,0,0 ,0 ,x,x,x,v,v")
+	  " C,C,C,v,0,0,0 ,0 ,x,x,x,v,v")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "@
    vmovw\t{%k2, %0|%0, %k2}
    vmovw\t{%2, %0|%0, %2}
+   vmovw\t{%2, %0|%0, %2}
    vmovsh\t{%2, %1, %0|%0, %1, %2}
    pinsrw\t{$0, %k2, %0|%0, %k2, 0}
    pinsrw\t{$0, %2, %0|%0, %2, 0}
@@ -11786,65 +11791,92 @@
    vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0}
    vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}"
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "0,1,2")
+	(cond [(eq_attr "alternative" "0,1,3")
 		 (const_string "avx512fp16")
-	       (eq_attr "alternative" "3,4")
+	       (eq_attr "alternative" "2")
+		 (const_string "avx10_2")
+	       (eq_attr "alternative" "4,5")
 		 (const_string "noavx")
-	       (eq_attr "alternative" "5,6")
+	       (eq_attr "alternative" "6,7")
 		 (const_string "sse4_noavx")
-	       (eq_attr "alternative" "7,8,9")
+	       (eq_attr "alternative" "8,9,10")
 		 (const_string "avx")
-	       (eq_attr "alternative" "10,11")
+	       (eq_attr "alternative" "11,12")
 		 (const_string "avx512bw")
 	      ]
 	      (const_string "*")))
    (set (attr "type")
-     (if_then_else (eq_attr "alternative" "0,1,2,5,6,9")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,6,7,10")
 		   (const_string "ssemov")
 		   (const_string "sselog")))
    (set (attr "prefix_data16")
-     (if_then_else (eq_attr "alternative" "3,4")
+     (if_then_else (eq_attr "alternative" "4,5")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "5,6,9")
+     (if_then_else (eq_attr "alternative" "6,7,10")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "*")
 		   (const_string "1")))
    (set (attr "prefix")
-	(cond [(eq_attr "alternative" "0,1,2,10,11")
+	(cond [(eq_attr "alternative" "0,1,2,3,11,12")
 		 (const_string "evex")
-	       (eq_attr "alternative" "7,8,9")
+	       (eq_attr "alternative" "8,9,10")
 		 (const_string "vex")
 	      ]
 	      (const_string "orig")))
    (set (attr "mode")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "HF")
 		   (const_string "TI")))
    (set (attr "enabled")
      (cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == V8BFmode"))
-		 (eq_attr "alternative" "2"))
+		 (eq_attr "alternative" "3"))
 	      (symbol_ref "false")
 	   ]
 	   (const_string "*")))])
 
+(define_insn_and_split "*vec_set<mode>_0_1"
+  [(set (match_operand:V8_128 0 "register_operand")
+	(vec_merge:V8_128
+	  (vec_duplicate:V8_128
+	    (vec_select:<ssescalarmode>
+	      (match_operand:V8_128 2 "nonimmediate_operand")
+	      (parallel [(const_int 0)])))
+	  (match_operand:V8_128 1 "reg_or_0_operand")
+	  (const_int 1)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(vec_merge:V8_128
+	  (vec_duplicate:V8_128 (match_dup 2))
+	  (match_dup 1)
+	  (const_int 1)))]
+{
+  if (register_operand (operands[2], <MODE>mode))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  operands[2] = gen_lowpart (<ssescalarmode>mode, operands[2]);
+})
+
 ;; vmovw clears also the higer bits
 (define_insn "vec_set<mode>_0"
-  [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v")
+  [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v,v")
 	(vec_merge:VI2F_256_512
 	  (vec_duplicate:VI2F_256_512
-	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
+	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,v"))
 	  (match_operand:VI2F_256_512 1 "const0_operand")
 	  (const_int 1)))]
   "TARGET_AVX512FP16"
   "@
    vmovw\t{%k2, %x0|%x0, %k2}
+   vmovw\t{%2, %x0|%x0, %2}
    vmovw\t{%2, %x0|%x0, %2}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "isa" "*,*,avx10_2")
+   (set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
@@ -11889,16 +11921,20 @@
 })
 
 (define_insn "avx512fp16_mov<mode>"
-  [(set (match_operand:V8_128 0 "register_operand" "=v")
+  [(set (match_operand:V8_128 0 "register_operand" "=v,v")
 	(vec_merge:V8_128
-	  (match_operand:V8_128 2 "register_operand" "v")
-	  (match_operand:V8_128 1 "register_operand" "v")
+	  (match_operand:V8_128 2 "register_operand" "v,v")
+	  (match_operand:V8_128 1 "reg_or_0_operand" "v,C")
 	  (const_int 1)))]
-  "TARGET_AVX512FP16"
-  "vmovsh\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemov")
+  "TARGET_AVX512FP16
+  || (TARGET_AVX10_2_256 && const0_operand (operands[1], <MODE>mode))"
+  "@
+    vmovsh\t{%2, %1, %0|%0, %1, %2}
+    vmovw\t{%2, %0|%2, %0}"
+  [(set_attr "isa" "*,avx10_2")
+   (set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "HF")])
+   (set_attr "mode" "HF,HI")])
 
 ;; A subset is vec_setv4sf.
 (define_insn "*vec_setv4sf_sse4_1"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
new file mode 100644
index 000000000000..275bbade1066
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovd\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovss\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 3 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%edi, %xmm0" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 4 { target { ! ia32 } } } } */
+
+
+#include<immintrin.h>
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef float v4sf __attribute__((vector_size(16)));
+
+v4si
+__attribute__((noipa, unused))
+f1 (int a)
+{
+  return __extension__(v4si){a, 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f2 (float a)
+{
+  return __extension__(v4sf){a, 0, 0, 0};
+}
+
+v4si
+__attribute__((noipa, unused))
+f3 (v4si a)
+{
+  return __extension__(v4si){a[0], 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f4 (v4sf a)
+{
+  return __extension__(v4sf){a[0], 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f5 (__m128i a)
+{
+  return _mm_set_epi32 (0, 0, 0,((__v4si)a)[0]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
new file mode 100644
index 000000000000..7d659300d817
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovd-1.c"
+
+static void
+TEST (void)
+{
+  union128i_d u1, s1;
+  int e1[4] = {0};
+
+  s1.x = _mm_set_epi32(-12876, -12886, -12776, 3376590);
+  e1[0] = s1.a[0];
+
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f1((int)s1.a[0]);
+  if (check_union128i_d (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f2(((float*)s1.a)[0]);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f3((v4si)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f4((v4sf)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f5((__m128i)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
new file mode 100644
index 000000000000..ec19a9a263ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovw\t4\\(%esp\\), %xmm0" 3 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t8\\(%ebp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%edi, %xmm0" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 7 { target { ! ia32 } } } } */
+
+#include<immintrin.h>
+
+typedef _Float16 v8hf __attribute__((vector_size(16)));
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+
+v8hf
+__attribute__((noipa, unused))
+f1 (_Float16 a)
+{
+  return __extension__(v8hf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f2 (__bf16 a)
+{
+  return __extension__(v8bf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f3 (short a)
+{
+  return __extension__(v8hi){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hf
+__attribute__((noipa, unused))
+f4 (v8hf a)
+{
+  return __extension__(v8hf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f5 (v8bf a)
+{
+  return __extension__(v8bf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f6 (v8hi a)
+{
+  return __extension__(v8hi){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f7 (__m128i a)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi)a)[0]);
+}
+
+__m256h
+__attribute__((noipa, unused))
+f8 (_Float16 a)
+{
+  return _mm256_set_ph (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
new file mode 100644
index 000000000000..d63739e68874
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovw-1.c"
+
+static void
+TEST (void)
+{
+  union128i_w u1, s1;
+  union256i_w u2, s2;
+  short e1[8] = {0};
+  short e2[16] = {0};
+
+  s1.x = _mm_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158);
+  e1[0] = s1.a[0];
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f1(((_Float16*)s1.a)[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f2(((__bf16*)s1.a)[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f3((short)s1.a[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f4((v8hf)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f5((v8bf)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f6((v8hi)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f7((__m128i)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  s2.x = _mm256_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158,
+                          -12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158);
+  e2[0] = s2.a[0];  
+  u2.x = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+  u2.x = (__m256i)f8(((_Float16*)s2.a)[0]);
+  if (check_union256i_w (u2, e2))
+    abort ();
+}
-- 
GitLab