From 44c4a72061e86259d3defd3d1c7911f453043e3c Mon Sep 17 00:00:00 2001 From: Haochen Jiang <haochen.jiang@intel.com> Date: Wed, 26 Feb 2025 11:28:45 +0800 Subject: [PATCH] i386: Treat Granite Rapids/Granite Rapids-D/Diamond Rapids similar as Sapphire Rapids in x86-tune.def Since GNR, GNR-D, DMR are both P-core based, we should treat them just like SPR for now. gcc/ChangeLog: * config/i386/x86-tune.def (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): Add GNR, GNR-D, DMR. (X86_TUNE_AVOID_256FMA_CHAINS): Ditto. (X86_TUNE_AVX512_MOVE_BY_PIECES): Ditto. (X86_TUNE_AVX512_STORE_BY_PIECES): Ditto. --- gcc/config/i386/x86-tune.def | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index df7b4ed22bc8..0bdad7234a6a 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -87,7 +87,8 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, several insns to break false dependency on the dest register for GLC micro-architecture. */ DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, - "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_CORE_HYBRID + "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS + | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID | m_CORE_ATOM) /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies @@ -527,7 +528,8 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER smaller FMA chain. */ DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ZNVER5 | m_CORE_HYBRID - | m_SAPPHIRERAPIDS | m_CORE_ATOM | m_GENERIC) + | m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D + | m_DIAMONDRAPIDS | m_CORE_ATOM | m_GENERIC) /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or smaller FMA chain. */ @@ -594,12 +596,14 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", - m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D + | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5) /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", - m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) + m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D + | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5) /* X86_TUNE_AVX512_TWO_EPILOGUES: Use two vector epilogues for 512-bit vectorized loops. */ -- GitLab