diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1630efd1ac0a854ba6c5f95c6e46bd399f060a5a..111a1470953337c18aad91ae5a04ae502172a7d4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-12-29 Jakub Jelinek <jakub@redhat.com> + + PR target/93078 + * config/i386/i386-builtins.c (ix86_builtin_vectorized_function): + Remove CASE_CFN_RINT handling. + * config/i386/i386-builtin.def (IX86_BUILTIN_RINTPD, + IX86_BUILTIN_RINTPS, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_RINTPS256): + Remove. + * config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders + with VF iterator. + 2019-12-29 Richard Sandiford <richard.sandiford@arm.com> * tree-vect-stmts.c (vect_get_strided_load_store_ops): Copy diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a6500f9d9b597516b8708072b591566bd549be36..fd9c2723c569a9faec636a76e88acc5be919062f 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -913,7 +913,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundss, "__builtin_ia32_round BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND) @@ -924,7 +923,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND) @@ -1047,7 +1045,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps2 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF) @@ -1058,7 +1055,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND) diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 4646d044086ada9bbcf5e71ea3a5c44866c0933f..1d4db2b1a70eebafef97270e323f0ef128a0e4a6 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -1661,27 +1661,6 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, } break; - CASE_CFN_RINT: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_RINTPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_RINTPD256); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_RINTPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_RINTPS256); - } - break; - CASE_CFN_FMA: if (out_mode == DFmode && in_mode == DFmode) { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index bbceb8b83ad0822a28b69e4a5144daf9145b6b87..b3ef215da5e8952b5e106db6bd56a2a1c0b7ba80 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17977,6 +17977,24 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) +(define_expand "nearbyint<mode>2" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1" + "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);") + +(define_expand "rint<mode>2" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1" + "operands[2] = GEN_INT (ROUND_MXCSR);") + (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d3079d30304459d1b2ff0dbede2db889d7a875b4..8de7d8eb783aee1fae148a7bdce1e7e1dac90850 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-12-29 Jakub Jelinek <jakub@redhat.com> + + PR target/93078 + * gcc.target/i386/sse4_1-pr93078.c: New test. + * gcc.target/i386/avx-pr93078.c: New test. + * gcc.target/i386/avx512f-pr93078.c: New test. + 2019-12-29 Richard Sandiford <richard.sandiford@arm.com> * gcc.dg/vect/vect-strided-epilogue-1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/avx-pr93078.c b/gcc/testsuite/gcc.target/i386/avx-pr93078.c new file mode 100644 index 0000000000000000000000000000000000000000..3fedeaa0c656e98f59374c9a12af76d4f98c5b75 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-pr93078.c @@ -0,0 +1,9 @@ +/* PR target/93078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx -mno-avx2 -mprefer-vector-width=256 -masm=att" } */ +/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$12,\[^\n\r]*%y" } } */ +/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$4,\[^\n\r]*%y" } } */ +/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$12,\[^\n\r]*%y" } } */ +/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$4,\[^\n\r]*%y" } } */ + +#include "sse4_1-pr93078.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c b/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c new file mode 100644 index 0000000000000000000000000000000000000000..72d6c25bff14ac383903258a9e5a3eb8eb8a5df0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c @@ -0,0 +1,9 @@ +/* PR target/93078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mprefer-vector-width=512 -masm=att" } */ +/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$12,\[^\n\r]*%z" } } */ +/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$4,\[^\n\r]*%z" } } */ +/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$12,\[^\n\r]*%z" } } */ +/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$4,\[^\n\r]*%z" } } */ + +#include "sse4_1-pr93078.c" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c new file mode 100644 index 0000000000000000000000000000000000000000..9ad0813388ff67ad687216896177bab7fe5b4903 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c @@ -0,0 +1,42 @@ +/* PR target/93078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse4.1 -mno-sse4.2 -masm=att" } */ +/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$12," } } */ +/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$4," } } */ +/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$12," } } */ +/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$4," } } */ + +float a[16], b[16]; +double c[8], d[8]; + +void +foo (void) +{ + int i; + for (i = 0; i < 16; ++i) + b[i] = __builtin_nearbyintf (a[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 16; ++i) + b[i] = __builtin_rintf (a[i]); +} + +void +baz (void) +{ + int i; + for (i = 0; i < 8; ++i) + d[i] = __builtin_nearbyint (c[i]); +} + +void +qux (void) +{ + int i; + for (i = 0; i < 8; ++i) + d[i] = __builtin_rint (c[i]); +}