From 3eb9f6eab9802d5ae65ead6b1f2ae6fe0833e06e Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Wed, 5 Jun 2024 19:32:16 +0100
Subject: [PATCH] AArch64: enable new predicate tuning for Neoverse cores.

This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
It is kept off for generic codegen.

Note the reason for the +sve even though they are in aarch64-sve.exp is if the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
the intrinsics end up being disabled because the -march is preferred over the
-mcpu even though the -mcpu comes later.

This prevents the tests from failing in such runs.

gcc/ChangeLog:

	* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
	AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
	* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
	AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
	* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
	AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
	* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
	* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
	* gcc.target/aarch64/sve/pred_clobber_4.c: New test.
---
 gcc/config/aarch64/tuning_models/neoversen2.h |  3 ++-
 gcc/config/aarch64/tuning_models/neoversev1.h |  3 ++-
 gcc/config/aarch64/tuning_models/neoversev2.h |  3 ++-
 .../gcc.target/aarch64/sve/pred_clobber_1.c   | 22 ++++++++++++++++++
 .../gcc.target/aarch64/sve/pred_clobber_2.c   | 22 ++++++++++++++++++
 .../gcc.target/aarch64/sve/pred_clobber_3.c   | 23 +++++++++++++++++++
 .../gcc.target/aarch64/sve/pred_clobber_4.c   | 22 ++++++++++++++++++
 7 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c

diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762f..be9a48ac3adc 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5..0fc41ce6a41b 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
   (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b..f76e4ef358f7 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 000000000000..25129e8d6f27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**	...
+**	ptrue	p([1-3]).b, all
+**	cmplo	p0.h, p\1/z, z0.h, z[0-9]+.h
+**	...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 000000000000..58badb66a43b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**	...
+**	ptrue	p([1-9][0-9]?).b, all
+**	cmplo	p0.h, p\1/z, z0.h, z[0-9]+.h
+**	...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 000000000000..c67c2bd3422e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**	...
+**	ptrue	p([1-9][0-9]?).b, all
+**	cmplo	p0.h, p\1/z, z0.h, z[0-9]+.h
+**	...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 000000000000..c0120afe5d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+**	...
+**	ptrue	p0.b, all
+**	cmplo	p0.h, p0/z, z0.h, z[0-9]+.h
+**	...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+    svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+    use (p0);
+}
-- 
GitLab