From 9a99559a478111f7fbeec29bd78344df7651c707 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <ktkachov@nvidia.com>
Date: Wed, 11 Sep 2024 06:58:35 -0700
Subject: [PATCH] aarch64: Define l1_cache_line_size for -mcpu=neoverse-v2

This is a small patch that sets the L1 cache line size for Neoverse V2.
Unlike the other cache-related constants in there this value is not used just
for SW prefetch generation (which we want to avoid for Neoverse V2 presently).
It's also used to set std::hardware_destructive_interference_size.
See the links and recent discussions in PR116662 for reference.
Some CPU tunings in aarch64 set this value to something useful, but for
generic tuning we use the conservative 256, which forces 256-byte alignment
in such atomic structures.  Using a smaller value can decrease the size of such
structs during layout and should not present an ABI problem as
std::hardware_destructive_interference_size is not intended to be used for structs
in an external interface, and GCC warns about such uses.
Another place where the L1 cache line size is used is in phiopt for
-fhoist-adjacent-loads where conditional accesses to adjacent struct members
can be speculatively loaded as long as they are within the same L1 cache line.
e.g.
struct S { int i; int j; };

int
bar (struct S *x, int y)
{
  int r;
  if (y)
    r = x->i;
  else
    r = x->j;
  return r;
}

The Neoverse V2 L1 cache line is 64 bytes according to the TRM, so set it to
that. The rest of the prefetch parameters inherit from the generic tuning so
we don't do anything extra for software prefeteches.

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>

	* config/aarch64/tuning_models/neoversev2.h (neoversev2_prefetch_tune):
	Define.
	(neoversev2_tunings): Use it.
---
 gcc/config/aarch64/tuning_models/neoversev2.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index 52aad7d4a433..e7e37e6b3b6e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -206,6 +206,19 @@ static const struct cpu_vector_cost neoversev2_vector_cost =
   &neoversev2_vec_issue_info /* issue_info  */
 };
 
+/* Prefetch settings.  Disable software prefetch generation but set L1 cache
+   line size.  */
+static const cpu_prefetch_tune neoversev2_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
 static const struct tune_params neoversev2_tunings =
 {
   &cortexa76_extra_costs,
@@ -244,7 +257,7 @@ static const struct tune_params neoversev2_tunings =
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
    | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &neoversev2_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
-- 
GitLab