diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h index 4ad339bbbf97f264408f5b15829562bcf6cc7fb4..dfbdfc3e51bb12dc696d8340b39c19f74bfa9290 100644 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ b/gcc/config/i386/avx10_2-512convertintrin.h @@ -540,6 +540,30 @@ _mm512_maskz_cvtnesph_phf8 (__mmask32 __U, __m512h __A) (__mmask32) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpbf8_ph (__m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpbf8_ph (__m512h __S, __mmask16 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( + (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpbf8_ph (__mmask16 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + #ifdef __DISABLE_AVX10_2_512__ #undef __DISABLE_AVX10_2_512__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index ac62d1290a5c2b059639004a6f9b286fd811cfa9..8d2c1a54147ad261150e6c4a7984c1f9be6d7875 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -970,6 +970,54 @@ _mm256_maskz_cvtnesph_phf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpbf8_ph (__m128i __A) +{ + return (__m128h) _mm_castsi128_ph ((__m128i) _mm_slli_epi16 ( + (__m128i) _mm_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtpbf8_ph (__m128h __S, __mmask8 __U, __m128i __A) +{ + return (__m128h) _mm_castsi128_ph ((__m128i) _mm_mask_slli_epi16 ( + (__m128i) __S, __U, (__m128i) _mm_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtpbf8_ph (__mmask8 __U, __m128i __A) +{ + return (__m128h) _mm_castsi128_ph ((__m128i) _mm_slli_epi16 ( + (__m128i) _mm_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtpbf8_ph (__m128i __A) +{ + return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_slli_epi16 ( + (__m256i) _mm256_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtpbf8_ph (__m256h __S, __mmask8 __U, __m128i __A) +{ + return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_mask_slli_epi16 ( + (__m256i) __S, __U, (__m256i) _mm256_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtpbf8_ph (__mmask8 __U, __m128i __A) +{ + return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_slli_epi16 ( + (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + #ifdef __DISABLE_AVX10_2_256__ #undef __DISABLE_AVX10_2_256__ #pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c index bbbff186d0a2efd9dca1640694b4885c80f174da..f67138c237ca1ec58a08b25209686661566a5928 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c @@ -45,13 +45,17 @@ /* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %zmm\[0-9]\+, %zmm\[0-9]\+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %zmm\[0-9]\+, %zmm\[0-9]\+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%zmm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> -volatile __m256i x256i; +volatile __m256i x256i, z1; volatile __m512i x512i; volatile __m512 x, a1, b1; -volatile __m512h y, x512h; +volatile __m512h y, x512h, z; volatile __mmask16 m16; volatile __mmask32 m32; volatile __mmask64 m64; @@ -174,3 +178,11 @@ avx10_2_512_vcvtneph2hf8s_test (void) x256i = _mm512_mask_cvtnesph_phf8 (x256i, m32, x512h); x256i = _mm512_maskz_cvtnesph_phf8 (m32, x512h); } + +void extern +avx10_2_512_cvtbf8_fp16_test (void) +{ + y = _mm512_cvtpbf8_ph (z1); + y = _mm512_mask_cvtpbf8_ph (z, m16, z1); + y = _mm512_maskz_cvtpbf8_ph (m16, z1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c index 015474f8cf3344a95ef6aaaa62b2f293d2a39fc3..9c3e85718f2f1a485f24c609cecae5eff21d8404 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c @@ -87,14 +87,22 @@ /* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %ymm\[0-9]\+, %ymm\[0-9]\+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %ymm\[0-9]\+, %ymm\[0-9]\+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %xmm\[0-9]\+, %xmm\[0-9]\+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %xmm\[0-9]\+, %xmm\[0-9]\+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> volatile __m128 x1,a1,b1; volatile __m256 x2,a2,b2; -volatile __m128h y,x128h; -volatile __m256h y2,x256h; -volatile __m128i x128i; +volatile __m128h y,x128h,z; +volatile __m256h y2,x256h,z2; +volatile __m128i x128i,z3; volatile __m256i x256i; volatile __mmask8 m8; volatile __mmask16 m16; @@ -272,3 +280,15 @@ avx10_2_vcvtneph2hf8s_test (void) x128i = _mm256_mask_cvtnesph_phf8 (x128i, m16, x256h); x128i = _mm256_maskz_cvtnesph_phf8 (m16, x256h); } + +void extern +avx10_2_cvtbf8_fp16_test (void) +{ + y = _mm_cvtpbf8_ph (z3); + y = _mm_mask_cvtpbf8_ph (z, m8, z3); + y = _mm_maskz_cvtpbf8_ph (m8, z3); + + y2 = _mm256_cvtpbf8_ph (z3); + y2 = _mm256_mask_cvtpbf8_ph (z2, m8, z3); + y2 = _mm256_maskz_cvtpbf8_ph (m8, z3); +}