From 6ec067548fa994158819db0a62a8b5356d452c2c Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Sun, 29 Dec 2019 12:03:25 +0100
Subject: [PATCH] re PR target/93078 (Missing fma and round functions
 auto-vectorization with x86-64 (sse2))

	PR target/93078
	* config/i386/i386-builtins.c (ix86_builtin_vectorized_function):
	Remove CASE_CFN_RINT handling.
	* config/i386/i386-builtin.def (IX86_BUILTIN_RINTPD,
	IX86_BUILTIN_RINTPS, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_RINTPS256):
	Remove.
	* config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders
	with VF iterator.

	* gcc.target/i386/sse4_1-pr93078.c: New test.
	* gcc.target/i386/avx-pr93078.c: New test.
	* gcc.target/i386/avx512f-pr93078.c: New test.

From-SVN: r279754
---
 gcc/ChangeLog                                 | 11 +++++
 gcc/config/i386/i386-builtin.def              |  4 --
 gcc/config/i386/i386-builtins.c               | 21 ----------
 gcc/config/i386/sse.md                        | 18 ++++++++
 gcc/testsuite/ChangeLog                       |  7 ++++
 gcc/testsuite/gcc.target/i386/avx-pr93078.c   |  9 ++++
 .../gcc.target/i386/avx512f-pr93078.c         |  9 ++++
 .../gcc.target/i386/sse4_1-pr93078.c          | 42 +++++++++++++++++++
 8 files changed, 96 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-pr93078.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr93078.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1630efd1ac0a..111a14709533 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2019-12-29  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/93078
+	* config/i386/i386-builtins.c (ix86_builtin_vectorized_function):
+	Remove CASE_CFN_RINT handling.
+	* config/i386/i386-builtin.def (IX86_BUILTIN_RINTPD,
+	IX86_BUILTIN_RINTPS, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_RINTPS256):
+	Remove.
+	* config/i386/sse.md (nearbyint<mode>2, rint<mode>2): New expanders
+	with VF iterator.
+
 2019-12-29  Richard Sandiford  <richard.sandiford@arm.com>
 
 	* tree-vect-stmts.c (vect_get_strided_load_store_ops): Copy
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a6500f9d9b59..fd9c2723c569 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -913,7 +913,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundss, "__builtin_ia32_round
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
@@ -924,7 +923,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND)
@@ -1047,7 +1045,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps2
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND)
 
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF)
@@ -1058,7 +1055,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND)
 
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index 4646d044086a..1d4db2b1a70e 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1661,27 +1661,6 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	}
       break;
 
-    CASE_CFN_RINT:
-      /* The round insn does not trap on denormals.  */
-      if (flag_trapping_math || !TARGET_SSE4_1)
-	break;
-
-      if (out_mode == DFmode && in_mode == DFmode)
-	{
-	  if (out_n == 2 && in_n == 2)
-	    return ix86_get_builtin (IX86_BUILTIN_RINTPD);
-	  else if (out_n == 4 && in_n == 4)
-	    return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
-	}
-      if (out_mode == SFmode && in_mode == SFmode)
-	{
-	  if (out_n == 4 && in_n == 4)
-	    return ix86_get_builtin (IX86_BUILTIN_RINTPS);
-	  else if (out_n == 8 && in_n == 8)
-	    return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
-	}
-      break;
-
     CASE_CFN_FMA:
       if (out_mode == DFmode && in_mode == DFmode)
 	{
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index bbceb8b83ad0..b3ef215da5e8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17977,6 +17977,24 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
+(define_expand "nearbyint<mode>2"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF
+	  [(match_operand:VF 1 "vector_operand")
+	   (match_dup 2)]
+	  UNSPEC_ROUND))]
+  "TARGET_SSE4_1"
+  "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
+
+(define_expand "rint<mode>2"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF
+	  [(match_operand:VF 1 "vector_operand")
+	   (match_dup 2)]
+	  UNSPEC_ROUND))]
+  "TARGET_SSE4_1"
+  "operands[2] = GEN_INT (ROUND_MXCSR);")
+
 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
 	(unspec:VF_128_256
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d3079d303044..8de7d8eb783a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-12-29  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/93078
+	* gcc.target/i386/sse4_1-pr93078.c: New test.
+	* gcc.target/i386/avx-pr93078.c: New test.
+	* gcc.target/i386/avx512f-pr93078.c: New test.
+
 2019-12-29  Richard Sandiford  <richard.sandiford@arm.com>
 
 	* gcc.dg/vect/vect-strided-epilogue-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr93078.c b/gcc/testsuite/gcc.target/i386/avx-pr93078.c
new file mode 100644
index 000000000000..3fedeaa0c656
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-pr93078.c
@@ -0,0 +1,9 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx -mno-avx2 -mprefer-vector-width=256 -masm=att" } */
+/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundps\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$12,\[^\n\r]*%y" } } */
+/* { dg-final { scan-assembler "vroundpd\[ \t]\+\\\$4,\[^\n\r]*%y" } } */
+
+#include "sse4_1-pr93078.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c b/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c
new file mode 100644
index 000000000000..72d6c25bff14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr93078.c
@@ -0,0 +1,9 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512f -mprefer-vector-width=512 -masm=att" } */
+/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscaleps\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$12,\[^\n\r]*%z" } } */
+/* { dg-final { scan-assembler "vrndscalepd\[ \t]\+\\\$4,\[^\n\r]*%z" } } */
+
+#include "sse4_1-pr93078.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c
new file mode 100644
index 000000000000..9ad0813388ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr93078.c
@@ -0,0 +1,42 @@
+/* PR target/93078 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4.1 -mno-sse4.2 -masm=att" } */
+/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$12," } } */
+/* { dg-final { scan-assembler "roundps\[ \t]\+\\\$4," } } */
+/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$12," } } */
+/* { dg-final { scan-assembler "roundpd\[ \t]\+\\\$4," } } */
+
+float a[16], b[16];
+double c[8], d[8];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 16; ++i)
+    b[i] = __builtin_nearbyintf (a[i]);
+}
+
+void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 16; ++i)
+    b[i] = __builtin_rintf (a[i]);
+}
+
+void
+baz (void)
+{
+  int i;
+  for (i = 0; i < 8; ++i)
+    d[i] = __builtin_nearbyint (c[i]);
+}
+
+void
+qux (void)
+{
+  int i;
+  for (i = 0; i < 8; ++i)
+    d[i] = __builtin_rint (c[i]);
+}
-- 
GitLab