diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index baa5be485e9fa2dc81af5a9ab8fc57d2993cab7e..e57f6f45e24163b193e4c33dba7894252f5f451b 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -2512,6 +2512,531 @@ _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C) return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A); } +/* Intrinsics vcvtph2dq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epi32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2udq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2qq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi64(A, B) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epi64(A, B, C) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2uqq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu64(A, B) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu64(A, B, C) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2w. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi16(A, B) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvt_roundph_epi16(A, B, C) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2uw. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, (B))) + +#define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index e9478792a033b3681d8c5e6c3ddc044e5812841a..92195f2cd8181d8c895e5006ae0bbda62bb48367 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -930,6 +930,351 @@ _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A, #endif /* __OPTIMIZE__ */ +/* Intrinsics vcvtph2dq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2udq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__B, + (__v8si) _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2qq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi64 (__m128h __A) +{ + return + __builtin_ia32_vcvtph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2uqq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2w. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2uw. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 126cc0c45ce54436c6e3bbf7e82da702023d3ed8..87ea9dfa6a90528191f6a99d3c6697d9ded1c85c 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1311,21 +1311,30 @@ DEF_FUNCTION_TYPE (SI, V32HF, INT, USI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF) DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI) +DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI) +DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI) +DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI) +DEF_FUNCTION_TYPE (V8SI, V8HF, V8SI, UQI) +DEF_FUNCTION_TYPE (V8HI, V8HF, V8HI, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI) DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT) +DEF_FUNCTION_TYPE (V8DI, V8HF, V8DI, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF) +DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI) +DEF_FUNCTION_TYPE (V16SI, V16HF, V16SI, UHI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI) DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT) +DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI, INT) DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index b8f782b27502648ee5e872474e714987e4e1068e..6381653ecd2c5b6635e60187f7e55c850f799428 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2831,6 +2831,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getmantv16hf_mask, "__builtin_ia32_getmantph256_mask", IX86_BUILTIN_GETMANTPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getmantv8hf_mask, "__builtin_ia32_getmantph128_mask", IX86_BUILTIN_GETMANTPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_movhf_mask, "__builtin_ia32_vmovsh_mask", IX86_BUILTIN_VMOVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v4si_mask, "__builtin_ia32_vcvtph2dq128_mask", IX86_BUILTIN_VCVTPH2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v8si_mask, "__builtin_ia32_vcvtph2dq256_mask", IX86_BUILTIN_VCVTPH2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v4si_mask, "__builtin_ia32_vcvtph2udq128_mask", IX86_BUILTIN_VCVTPH2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v8si_mask, "__builtin_ia32_vcvtph2udq256_mask", IX86_BUILTIN_VCVTPH2UDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v2di_mask, "__builtin_ia32_vcvtph2qq128_mask", IX86_BUILTIN_VCVTPH2QQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v4di_mask, "__builtin_ia32_vcvtph2qq256_mask", IX86_BUILTIN_VCVTPH2QQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v2di_mask, "__builtin_ia32_vcvtph2uqq128_mask", IX86_BUILTIN_VCVTPH2UQQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v4di_mask, "__builtin_ia32_vcvtph2uqq256_mask", IX86_BUILTIN_VCVTPH2UQQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v8hi_mask, "__builtin_ia32_vcvtph2w128_mask", IX86_BUILTIN_VCVTPH2W128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v16hi_mask, "__builtin_ia32_vcvtph2w256_mask", IX86_BUILTIN_VCVTPH2W256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v8hi_mask, "__builtin_ia32_vcvtph2uw128_mask", IX86_BUILTIN_VCVTPH2UW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v16hi_mask, "__builtin_ia32_vcvtph2uw256_mask", IX86_BUILTIN_VCVTPH2UW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3058,6 +3070,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 7e830873691be259ebcce5ba0ff2955e0214da27..990d013d73ca5c7bf6ac5b49f17d16b3aeafd5d0 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -9743,9 +9743,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HF_FTYPE_V16HF_V16HF_UHI: case V8SF_FTYPE_V8HI_V8SF_UQI: case V4SF_FTYPE_V8HI_V4SF_UQI: + case V8SI_FTYPE_V8HF_V8SI_UQI: case V8SI_FTYPE_V8SF_V8SI_UQI: case V4SI_FTYPE_V4SF_V4SI_UQI: + case V4SI_FTYPE_V8HF_V4SI_UQI: + case V4DI_FTYPE_V8HF_V4DI_UQI: case V4DI_FTYPE_V4SF_V4DI_UQI: + case V2DI_FTYPE_V8HF_V2DI_UQI: case V2DI_FTYPE_V4SF_V2DI_UQI: case V8HF_FTYPE_V8HF_V8HF_UQI: case V4SF_FTYPE_V4DI_V4SF_UQI: @@ -9756,6 +9760,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16QI_FTYPE_V16HI_V16QI_UHI: case V16QI_FTYPE_V4SI_V16QI_UQI: case V16QI_FTYPE_V8SI_V16QI_UQI: + case V8HI_FTYPE_V8HF_V8HI_UQI: case V8HI_FTYPE_V4SI_V8HI_UQI: case V8HI_FTYPE_V8SI_V8HI_UQI: case V16QI_FTYPE_V2DI_V16QI_UQI: @@ -9813,6 +9818,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8DI_FTYPE_DI_V8DI_UQI: case V16SF_FTYPE_V8SF_V16SF_UHI: case V16SI_FTYPE_V8SI_V16SI_UHI: + case V16HI_FTYPE_V16HF_V16HI_UHI: case V16HI_FTYPE_V16HI_V16HI_UHI: case V8HI_FTYPE_V16QI_V8HI_UQI: case V16HI_FTYPE_V16QI_V16HI_UHI: @@ -10679,7 +10685,9 @@ ix86_expand_round_builtin (const struct builtin_description *d, break; case V8SF_FTYPE_V8DF_V8SF_QI_INT: case V8DF_FTYPE_V8DF_V8DF_QI_INT: + case V32HI_FTYPE_V32HF_V32HI_USI_INT: case V8SI_FTYPE_V8DF_V8SI_QI_INT: + case V8DI_FTYPE_V8HF_V8DI_UQI_INT: case V8DI_FTYPE_V8DF_V8DI_QI_INT: case V8SF_FTYPE_V8DI_V8SF_QI_INT: case V8DF_FTYPE_V8DI_V8DF_QI_INT: @@ -10688,6 +10696,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8DI_FTYPE_V8SF_V8DI_QI_INT: case V16SF_FTYPE_V16SI_V16SF_HI_INT: case V16SI_FTYPE_V16SF_V16SI_HI_INT: + case V16SI_FTYPE_V16HF_V16SI_UHI_INT: case V8DF_FTYPE_V8SF_V8DF_QI_INT: case V16SF_FTYPE_V16HI_V16SF_HI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5dbbed0c09d573c790d5845e6e35467ed3a66f51..dec31f4bda382066bebc083834edfba723d45ff6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -738,6 +738,11 @@ [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI") (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")]) +(define_mode_attr sseintconvert + [(V32HI "w") (V16HI "w") (V8HI "w") + (V16SI "dq") (V8SI "dq") (V4SI "dq") + (V8DI "qq") (V4DI "qq") (V2DI "qq")]) + ;; All 128bit vector integer modes (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) @@ -984,6 +989,12 @@ (V4SF "v2sf") (V32HF "v16hf") (V16HF "v8hf") (V8HF "v4hf")]) +;; Mapping of vector modes to vector hf modes of conversion. +(define_mode_attr ssePHmode + [(V32HI "V32HF") (V16HI "V16HF") (V8HI "V8HF") + (V16SI "V16HF") (V8SI "V8HF") (V4SI "V8HF") + (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")]) + ;; Mapping of vector modes to packed single mode of the same size (define_mode_attr ssePSmode [(V16SI "V16SF") (V8DF "V16SF") @@ -5713,6 +5724,30 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel half-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_int_iterator UNSPEC_US_FIX_NOTRUNC + [UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_FIX_NOTRUNC]) + +(define_int_attr sseintconvertsignprefix + [(UNSPEC_UNSIGNED_FIX_NOTRUNC "u") + (UNSPEC_FIX_NOTRUNC "")]) + +(define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>" + [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v") + (unspec:VI248_AVX512VL + [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 3a96e5864185f7618fe9007081a7ff482a0475e9..1214a7d0b2f87356d43dff63c94efd3da0a2fd42 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -719,6 +719,12 @@ #define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) #define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index aafcd4145308f77eea202c5d3e4c0cceddd5fb6a..21fb919c919a96d76050f14fc58c2c2ef4d10810 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -736,6 +736,12 @@ #define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) #define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 04163874f90bfb741a387e971786a79c18112f44..32aa4518703591dc5d4186eb1be8a4e3f41c73ea 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -678,6 +678,12 @@ test_1 (_mm_roundscale_ph, __m128h, __m128h, 123) test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123) test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) @@ -710,6 +716,12 @@ test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123) test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123) test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) @@ -748,6 +760,12 @@ test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123) test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123) test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 008600a393ddcf257bff6a54ee4a35375c4b5001..44ac10d602fa4ccd901cc826dd1523b27ad9e6ac 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -783,6 +783,12 @@ test_1 (_mm_roundscale_ph, __m128h, __m128h, 123) test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123) test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) @@ -814,6 +820,12 @@ test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123) test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123) test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) @@ -851,6 +863,12 @@ test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123) test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123) test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 8b600282c6729c8bcec11e0964c3234c29427974..f023d4fa2a4a4857cf1d6374bf051e151713112c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -737,6 +737,12 @@ #define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) #define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)