From 2713f6bb90765de81954275a30c62432d30e1d68 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra <wilco.dijkstra@arm.com> Date: Thu, 14 Nov 2024 14:34:17 +0000 Subject: [PATCH] AArch64: Add FULLY_PIPELINED_FMA to tune baseline Add FULLY_PIPELINED_FMA to tune baseline - this is a generic feature that is already enabled for some cores, but benchmarking it shows it is faster on all modern cores (SPECFP improves ~0.17% on Neoverse V1 and 0.04% on Neoverse N1). gcc: * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_BASE): Add AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA. * config/aarch64/tuning_models/ampere1b.h: Remove redundant AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA. * config/aarch64/tuning_models/neoversev2.h: Likewise. --- gcc/config/aarch64/aarch64-tuning-flags.def | 3 ++- gcc/config/aarch64/tuning_models/ampere1b.h | 3 +-- gcc/config/aarch64/tuning_models/neoversev2.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index 7ebf390ef818..60967aac9037 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -49,6 +49,7 @@ AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW) /* Baseline tuning settings suitable for all modern cores. */ -#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND) +#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND \ + | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA) #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h index c541623993d2..2ad6003d65c1 100644 --- a/gcc/config/aarch64/tuning_models/ampere1b.h +++ b/gcc/config/aarch64/tuning_models/ampere1b.h @@ -103,8 +103,7 @@ static const struct tune_params ampere1b_tunings = 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ (AARCH64_EXTRA_TUNE_BASE - | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA - | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ + | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ &ere1b_prefetch_tune, AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h index 4fabe4df2de7..b000fb465709 100644 --- a/gcc/config/aarch64/tuning_models/neoversev2.h +++ b/gcc/config/aarch64/tuning_models/neoversev2.h @@ -220,8 +220,7 @@ static const struct tune_params neoversev2_tunings = (AARCH64_EXTRA_TUNE_BASE | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT - | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW - | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ &generic_armv9a_prefetch_tune, AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ -- GitLab