Skip to content
Snippets Groups Projects
Commit 2713f6bb authored by Wilco Dijkstra's avatar Wilco Dijkstra
Browse files

AArch64: Add FULLY_PIPELINED_FMA to tune baseline

Add FULLY_PIPELINED_FMA to tune baseline - this is a generic feature that is
already enabled for some cores, but benchmarking it shows it is faster on all
modern cores (SPECFP improves ~0.17% on Neoverse V1 and 0.04% on Neoverse N1).

gcc:
	* config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_BASE):
	Add AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
	* config/aarch64/tuning_models/ampere1b.h: Remove redundant
	AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
	* config/aarch64/tuning_models/neoversev2.h: Likewise.
parent 625ea3c6
No related branches found
No related tags found
No related merge requests found
...@@ -49,6 +49,7 @@ AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) ...@@ -49,6 +49,7 @@ AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW) AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
/* Baseline tuning settings suitable for all modern cores. */ /* Baseline tuning settings suitable for all modern cores. */
#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND) #define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND \
| AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
#undef AARCH64_EXTRA_TUNING_OPTION #undef AARCH64_EXTRA_TUNING_OPTION
...@@ -103,8 +103,7 @@ static const struct tune_params ampere1b_tunings = ...@@ -103,8 +103,7 @@ static const struct tune_params ampere1b_tunings =
0, /* max_case_values. */ 0, /* max_case_values. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
| AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */
&ampere1b_prefetch_tune, &ampere1b_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
......
...@@ -220,8 +220,7 @@ static const struct tune_params neoversev2_tunings = ...@@ -220,8 +220,7 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
| AARCH64_EXTRA_TUNE_AVOID_PRED_RMW | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
| AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */
&generic_armv9a_prefetch_tune, &generic_armv9a_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment