diff --git a/gcc/common/config/aarch64/cpuinfo.h b/gcc/common/config/aarch64/cpuinfo.h new file mode 100644 index 0000000000000000000000000000000000000000..1690b6eee48e960d0ae675f8e8b05e6f182b56a3 --- /dev/null +++ b/gcc/common/config/aarch64/cpuinfo.h @@ -0,0 +1,94 @@ +/* CPU feature detection for AArch64 architecture. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This enum is used in libgcc feature detection, and in the function + multiversioning implementation in aarch64.cc. The enum should use the same + values as the corresponding enum in LLVM's compiler-rt, to faciliate + compatibility between compilers. */ + +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_RCPC3, + FEAT_MAX, + FEAT_EXT = 62, /* Reserved to indicate presence of additional features field + in __aarch64_cpu_features. */ + FEAT_INIT /* Used as flag of features initialization completion. */ +}; diff --git a/gcc/config/aarch64/aarch64-feature-deps.h b/gcc/config/aarch64/aarch64-feature-deps.h index 7b85a8860de57f6727644c03296cef192ad0990c..8f20582e1efdd4817138480bee8cdb27fa7f3dfe 100644 --- a/gcc/config/aarch64/aarch64-feature-deps.h +++ b/gcc/config/aarch64/aarch64-feature-deps.h @@ -115,6 +115,13 @@ get_flags_off (aarch64_feature_flags mask) constexpr auto cpu_##CORE_IDENT = ARCH_IDENT ().enable | get_enable FEATURES; #include "config/aarch64/aarch64-cores.def" +/* Define fmv_deps_<NAME> variables for each FMV feature, giving the transitive + closure of all the features that the FMV feature enables. */ +#define AARCH64_FMV_FEATURE(A, FEAT_NAME, OPT_FLAGS) \ + constexpr auto fmv_deps_##FEAT_NAME = get_enable OPT_FLAGS; +#include "config/aarch64/aarch64-option-extensions.def" + + } } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 5aa37ac4e0edacfeffbbf6944a19311455d49288..d8118d8579ce3b24d85063c55587c80a2ea4d5eb 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -17,17 +17,22 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* This is a list of ISA extentsions in AArch64. +/* This is a list of ISA extensions in AArch64. - Before using #include to read this file, define a macro: + Before using #include to read this file, define one of the following + macros: AARCH64_OPT_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, EXPLICIT_OFF, FEATURE_STRING) + AARCH64_FMV_FEATURE(NAME, FEAT_NAME, IDENT) + - NAME is the name of the extension, represented as a string constant. - IDENT is the canonical internal name for this flag. + - FEAT_NAME is the unprefixed name used in the CPUFeatures enum. + - REQUIRES is a list of features that must be enabled whenever this feature is enabled. The relationship is implicitly transitive: if A appears in B's REQUIRES and B appears in C's REQUIRES then @@ -58,45 +63,96 @@ that are required. Their order is not important. An empty string means do not detect this feature during auto detection. - The list of features must follow topological order wrt REQUIRES - and EXPLICIT_ON. For example, if A is in B's REQUIRES list, A must - come before B. This is enforced by aarch64-feature-deps.h. + - OPT_FLAGS is a list of feature IDENTS that should be enabled (along with + their transitive dependencies) when the specified FMV feature is present. + + Where a feature is present as both an extension and a function + multiversioning feature, and IDENT matches the FEAT_NAME suffix, then these + can be listed here simultaneously using the macro: + + AARCH64_OPT_FMV_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, + EXPLICIT_OFF, FEATURE_STRING) + + The list of features extensions must follow topological order wrt REQUIRES + and EXPLICIT_ON. For example, if A is in B's REQUIRES list, A must come + before B. This is enforced by aarch64-feature-deps.h. + + The list of multiversioning features must be ordered by increasing priority, + as defined in https://github.com/ARM-software/acle/blob/main/main/acle.md NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in config.gcc. */ +#ifndef AARCH64_OPT_EXTENSION +#define AARCH64_OPT_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, \ + EXPLICIT_OFF, FEATURE_STRING) +#endif + +#ifndef AARCH64_FMV_FEATURE +#define AARCH64_FMV_FEATURE(NAME, FEAT_NAME, OPT_FLAGS) +#endif + +#define AARCH64_OPT_FMV_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, \ + EXPLICIT_OFF, FEATURE_STRING) \ +AARCH64_OPT_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, EXPLICIT_OFF, \ + FEATURE_STRING) \ +AARCH64_FMV_FEATURE(NAME, IDENT, (IDENT)) + + AARCH64_OPT_EXTENSION("fp", FP, (), (), (), "fp") AARCH64_OPT_EXTENSION("simd", SIMD, (FP), (), (), "asimd") -AARCH64_OPT_EXTENSION("crc", CRC, (), (), (), "crc32") +AARCH64_OPT_FMV_EXTENSION("rng", RNG, (), (), (), "rng") -AARCH64_OPT_EXTENSION("lse", LSE, (), (), (), "atomics") +AARCH64_OPT_FMV_EXTENSION("flagm", FLAGM, (), (), (), "flagm") -/* +nofp16 disables an implicit F16FML, even though an implicit F16FML - does not imply F16. See F16FML for more details. */ -AARCH64_OPT_EXTENSION("fp16", F16, (FP), (), (F16FML), "fphp asimdhp") +AARCH64_FMV_FEATURE("flagm2", FLAGM2, (FLAGM)) + +AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML)) + +AARCH64_OPT_FMV_EXTENSION("dotprod", DOTPROD, (SIMD), (), (), "asimddp") -AARCH64_OPT_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") +AARCH64_OPT_FMV_EXTENSION("sm4", SM4, (SIMD), (), (), "sm3 sm4") /* An explicit +rdma implies +simd, but +rdma+nosimd still enables scalar RDMA instructions. */ AARCH64_OPT_EXTENSION("rdma", RDMA, (), (SIMD), (), "asimdrdm") -AARCH64_OPT_EXTENSION("dotprod", DOTPROD, (SIMD), (), (), "asimddp") +AARCH64_FMV_FEATURE("rmd", RDM, (RDMA)) + +AARCH64_OPT_FMV_EXTENSION("lse", LSE, (), (), (), "atomics") + +AARCH64_FMV_FEATURE("fp", FP, (FP)) + +AARCH64_FMV_FEATURE("simd", SIMD, (SIMD)) + +AARCH64_OPT_FMV_EXTENSION("crc", CRC, (), (), (), "crc32") + +AARCH64_FMV_FEATURE("sha1", SHA1, ()) -AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes") +AARCH64_OPT_FMV_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2") -AARCH64_OPT_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2") +AARCH64_FMV_FEATURE("sha3", SHA3, (SHA3)) + +AARCH64_OPT_FMV_EXTENSION("aes", AES, (SIMD), (), (), "aes") + +AARCH64_FMV_FEATURE("pmull", PMULL, ()) /* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them (such as SHA3 and the SVE2 crypto extensions). */ AARCH64_OPT_EXTENSION("crypto", CRYPTO, (AES, SHA2), (), (AES, SHA2, SM4), "aes pmull sha1 sha2") +/* Listing sha3 after crypto means we pass "+aes+sha3" to the assembler + instead of "+sha3+crypto". */ AARCH64_OPT_EXTENSION("sha3", SHA3, (SHA2), (), (), "sha3 sha512") -AARCH64_OPT_EXTENSION("sm4", SM4, (SIMD), (), (), "sm3 sm4") +/* +nofp16 disables an implicit F16FML, even though an implicit F16FML + does not imply F16. See F16FML for more details. */ +AARCH64_OPT_EXTENSION("fp16", F16, (FP), (), (F16FML), "fphp asimdhp") + +AARCH64_FMV_FEATURE("fp16", FP16, (F16)) /* An explicit +fp16fml implies +fp16, but a dependence on it does not. Thus -march=armv8.4-a implies F16FML but not F16. -march=armv8.4-a+fp16 @@ -104,60 +160,120 @@ AARCH64_OPT_EXTENSION("sm4", SM4, (SIMD), (), (), "sm3 sm4") -march=armv8.4-a+nofp16+fp16 enables F16 but not F16FML. */ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") -AARCH64_OPT_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve") +AARCH64_FMV_FEATURE("dit", DIT, ()) -AARCH64_OPT_EXTENSION("profile", PROFILE, (), (), (), "") +AARCH64_FMV_FEATURE("dpb", DPB, ()) -AARCH64_OPT_EXTENSION("rng", RNG, (), (), (), "rng") +AARCH64_FMV_FEATURE("dpb2", DPB2, ()) -AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") +AARCH64_FMV_FEATURE("jscvt", JSCVT, ()) -AARCH64_OPT_EXTENSION("sb", SB, (), (), (), "sb") +AARCH64_FMV_FEATURE("fcma", FCMA, (SIMD)) -AARCH64_OPT_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") +AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") -AARCH64_OPT_EXTENSION("predres", PREDRES, (), (), (), "") +AARCH64_FMV_FEATURE("rcpc2", RCPC2, (RCPC)) -AARCH64_OPT_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2") +AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (), (), (), "rcpc3") -AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") +AARCH64_FMV_FEATURE("frintts", FRINTTS, ()) + +AARCH64_FMV_FEATURE("dgh", DGH, ()) + +AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") + +/* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16 + instructions. */ +AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") + +AARCH64_FMV_FEATURE("ebf16", EBF16, (BF16)) + +AARCH64_FMV_FEATURE("rpres", RPRES, ()) + +AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve") + +AARCH64_FMV_FEATURE("sve-bf16", SVE_BF16, (SVE, BF16)) + +AARCH64_FMV_FEATURE("sve-ebf16", SVE_EBF16, (SVE, BF16)) + +AARCH64_FMV_FEATURE("sve-i8mm", SVE_I8MM, (SVE, I8MM)) + +AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "f32mm") + +AARCH64_FMV_FEATURE("f32mm", SVE_F32MM, (F32MM)) + +AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "f64mm") + +AARCH64_FMV_FEATURE("f64mm", SVE_F64MM, (F64MM)) + +AARCH64_OPT_FMV_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2") AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes") -AARCH64_OPT_EXTENSION("sve2-sha3", SVE2_SHA3, (SVE2, SHA3), (), (), "svesha3") +AARCH64_FMV_FEATURE("sve2-aes", SVE_AES, (SVE2_AES)) + +AARCH64_FMV_FEATURE("sve2-pmull128", SVE_PMULL128, (SVE2)) AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (), "svebitperm") -AARCH64_OPT_EXTENSION("tme", TME, (), (), (), "") +AARCH64_FMV_FEATURE("sve2-bitperm", SVE_BITPERM, (SVE2_BITPERM)) -AARCH64_OPT_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") +AARCH64_OPT_EXTENSION("sve2-sha3", SVE2_SHA3, (SVE2, SHA3), (), (), "svesha3") -AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "f32mm") +AARCH64_FMV_FEATURE("sve2-sha3", SVE_SHA3, (SVE2_SHA3)) -AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "f64mm") +AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") -/* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16 - instructions. */ -AARCH64_OPT_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") +AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4)) + +AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") + +AARCH64_OPT_FMV_EXTENSION("memtag", MEMTAG, (), (), (), "") + +AARCH64_FMV_FEATURE("memtag2", MEMTAG2, (MEMTAG)) + +AARCH64_FMV_FEATURE("memtag3", MEMTAG3, (MEMTAG)) -AARCH64_OPT_EXTENSION("flagm", FLAGM, (), (), (), "flagm") +AARCH64_OPT_FMV_EXTENSION("sb", SB, (), (), (), "sb") + +AARCH64_OPT_FMV_EXTENSION("predres", PREDRES, (), (), (), "") + +AARCH64_OPT_FMV_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") + +AARCH64_FMV_FEATURE("ssbs2", SSBS2, (SSBS)) + +AARCH64_FMV_FEATURE("bti", BTI, ()) + +AARCH64_OPT_EXTENSION("profile", PROFILE, (), (), (), "") + +AARCH64_OPT_EXTENSION("tme", TME, (), (), (), "") AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca pacg") AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") -AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") +AARCH64_FMV_FEATURE("ls64", LS64, ()) -AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") +AARCH64_FMV_FEATURE("ls64_v", LS64_V, ()) -AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") +AARCH64_FMV_FEATURE("ls64_accdata", LS64_ACCDATA, (LS64)) -AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "") +AARCH64_FMV_FEATURE("wfxt", WFXT, ()) AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "") -AARCH64_OPT_EXTENSION("sme2", SME2, (SME), (), (), "sme2") +AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64)) + +AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "") + +AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64)) + +AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2") + +AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + +AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") AARCH64_OPT_EXTENSION("d128", D128, (), (), (), "d128") @@ -165,5 +281,6 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") -AARCH64_OPT_EXTENSION("rcpc3", RCPC3, (), (), (), "rcpc3") +#undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION +#undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 776fb36f21e8c7d83a4ce446ccc51d661b4e7bbd..f9850320f61c5ddccf47e6583d304e5f405a484f 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -85,6 +85,7 @@ #include "aarch64-feature-deps.h" #include "config/arm/aarch-common.h" #include "config/arm/aarch-common-protos.h" +#include "common/config/aarch64/cpuinfo.h" #include "ssa.h" #include "except.h" #include "tree-pass.h" @@ -19375,6 +19376,8 @@ aarch64_process_target_attr (tree args) return true; } +static bool aarch64_process_target_version_attr (tree args); + /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to process attribute ((target ("..."))). */ @@ -19430,6 +19433,20 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int) TREE_TARGET_OPTION (target_option_current_node)); ret = aarch64_process_target_attr (args); + ret = aarch64_process_target_attr (args); + if (ret) + { + tree version_attr = lookup_attribute ("target_version", + DECL_ATTRIBUTES (fndecl)); + if (version_attr != NULL_TREE) + { + /* Reapply any target_version attribute after target attribute. + This should be equivalent to applying the target_version once + after processing all target attributes. */ + tree version_args = TREE_VALUE (version_attr); + ret = aarch64_process_target_version_attr (version_args); + } + } /* Set up any additional state. */ if (ret) @@ -19460,6 +19477,829 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int) return ret; } +typedef unsigned long long aarch64_fmv_feature_mask; + +typedef struct +{ + const char *name; + aarch64_fmv_feature_mask feature_mask; + aarch64_feature_flags opt_flags; +} aarch64_fmv_feature_datum; + +#define AARCH64_FMV_FEATURE(NAME, FEAT_NAME, C) \ + {NAME, 1ULL << FEAT_##FEAT_NAME, ::feature_deps::fmv_deps_##FEAT_NAME}, + +/* FMV features are listed in priority order, to make it easier to sort target + strings. */ +static aarch64_fmv_feature_datum aarch64_fmv_feature_data[] = { +#include "config/aarch64/aarch64-option-extensions.def" +}; + +/* Parse a function multiversioning feature string STR, as found in a + target_version or target_clones attribute. + + If ISA_FLAGS is nonnull, then update it with the specified architecture + features turned on. If FEATURE_MASK is nonnull, then assign to it a bitmask + representing the set of features explicitly specified in the feature string. + Return an aarch_parse_opt_result describing the result. + + When the STR string contains an invalid or duplicate extension, a copy of + the extension string is created and stored to INVALID_EXTENSION. */ + +static enum aarch_parse_opt_result +aarch64_parse_fmv_features (const char *str, aarch64_feature_flags *isa_flags, + aarch64_fmv_feature_mask *feature_mask, + std::string *invalid_extension) +{ + if (feature_mask) + *feature_mask = 0ULL; + + if (strcmp (str, "default") == 0) + return AARCH_PARSE_OK; + + while (str != NULL && *str != 0) + { + const char *ext; + size_t len; + + ext = strchr (str, '+'); + + if (ext != NULL) + len = ext - str; + else + len = strlen (str); + + if (len == 0) + return AARCH_PARSE_MISSING_ARG; + + static const int num_features = ARRAY_SIZE (aarch64_fmv_feature_data); + int i; + for (i = 0; i < num_features; i++) + { + if (strlen (aarch64_fmv_feature_data[i].name) == len + && strncmp (aarch64_fmv_feature_data[i].name, str, len) == 0) + { + if (isa_flags) + *isa_flags |= aarch64_fmv_feature_data[i].opt_flags; + if (feature_mask) + { + auto old_feature_mask = *feature_mask; + *feature_mask |= aarch64_fmv_feature_data[i].feature_mask; + if (*feature_mask == old_feature_mask) + { + /* Duplicate feature. */ + if (invalid_extension) + *invalid_extension = std::string (str, len); + return AARCH_PARSE_DUPLICATE_FEATURE; + } + } + break; + } + } + + if (i == num_features) + { + /* Feature not found in list. */ + if (invalid_extension) + *invalid_extension = std::string (str, len); + return AARCH_PARSE_INVALID_FEATURE; + } + + str = ext; + if (str) + /* Skip over the next '+'. */ + str++; + } + + return AARCH_PARSE_OK; +} + +/* Parse the tree in ARGS that contains the target_version attribute + information and update the global target options space. */ + +static bool +aarch64_process_target_version_attr (tree args) +{ + if (TREE_CODE (args) == TREE_LIST) + { + if (TREE_CHAIN (args)) + { + error ("attribute %<target_version%> has multiple values"); + return false; + } + args = TREE_VALUE (args); + } + + if (!args || TREE_CODE (args) != STRING_CST) + { + error ("attribute %<target_version%> argument not a string"); + return false; + } + + const char *str = TREE_STRING_POINTER (args); + + enum aarch_parse_opt_result parse_res; + auto isa_flags = aarch64_asm_isa_flags; + + std::string invalid_extension; + parse_res = aarch64_parse_fmv_features (str, &isa_flags, NULL, + &invalid_extension); + + if (parse_res == AARCH_PARSE_OK) + { + aarch64_set_asm_isa_flags (isa_flags); + return true; + } + + switch (parse_res) + { + case AARCH_PARSE_MISSING_ARG: + error ("missing value in %<target_version%> attribute"); + break; + + case AARCH_PARSE_INVALID_FEATURE: + error ("invalid feature modifier %qs of value %qs in " + "%<target_version%> attribute", invalid_extension.c_str (), + str); + break; + + case AARCH_PARSE_DUPLICATE_FEATURE: + error ("duplicate feature modifier %qs of value %qs in " + "%<target_version%> attribute", invalid_extension.c_str (), + str); + break; + + default: + gcc_unreachable (); + } + + return false; +} + +/* Implement TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P. This is used to + process attribute ((target_version ("..."))). */ + +static bool +aarch64_option_valid_version_attribute_p (tree fndecl, tree, tree args, int) +{ + struct cl_target_option cur_target; + bool ret; + tree new_target; + tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + + /* Save the current target options to restore at the end. */ + cl_target_option_save (&cur_target, &global_options, &global_options_set); + + /* If fndecl already has some target attributes applied to it, unpack + them so that we add this attribute on top of them, rather than + overwriting them. */ + if (existing_target) + { + struct cl_target_option *existing_options + = TREE_TARGET_OPTION (existing_target); + + if (existing_options) + cl_target_option_restore (&global_options, &global_options_set, + existing_options); + } + else + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (target_option_current_node)); + + ret = aarch64_process_target_version_attr (args); + + /* Set up any additional state. */ + if (ret) + { + aarch64_override_options_internal (&global_options); + new_target = build_target_option_node (&global_options, + &global_options_set); + } + else + new_target = NULL; + + if (fndecl && ret) + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; + + cl_target_option_restore (&global_options, &global_options_set, &cur_target); + + return ret; +} + +/* This parses the attribute arguments to target_version in DECL and the + feature mask required to select those targets. No adjustments are made to + add or remove redundant feature requirements. */ + +static aarch64_fmv_feature_mask +get_feature_mask_for_version (tree decl) +{ + tree version_attr = lookup_attribute ("target_version", + DECL_ATTRIBUTES (decl)); + if (version_attr == NULL) + return 0; + + const char *version_string = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE + (version_attr))); + enum aarch_parse_opt_result parse_res; + aarch64_fmv_feature_mask feature_mask; + + parse_res = aarch64_parse_fmv_features (version_string, NULL, &feature_mask, + NULL); + + /* We should have detected any errors before getting here. */ + gcc_assert (parse_res == AARCH_PARSE_OK); + + return feature_mask; +} + +/* Compare priorities of two feature masks. Return: + 1: mask1 is higher priority + -1: mask2 is higher priority + 0: masks are equal. */ + +static int +compare_feature_masks (aarch64_fmv_feature_mask mask1, + aarch64_fmv_feature_mask mask2) +{ + int pop1 = popcount_hwi (mask1); + int pop2 = popcount_hwi (mask2); + if (pop1 > pop2) + return 1; + if (pop2 > pop1) + return -1; + + auto diff_mask = mask1 ^ mask2; + if (diff_mask == 0ULL) + return 0; + for (int i = FEAT_MAX - 1; i > 0; i--) + { + auto bit_mask = aarch64_fmv_feature_data[i].feature_mask; + if (diff_mask & bit_mask) + return (mask1 & bit_mask) ? 1 : -1; + } + gcc_unreachable(); +} + +/* Compare priorities of two version decls. */ + +int +aarch64_compare_version_priority (tree decl1, tree decl2) +{ + auto mask1 = get_feature_mask_for_version (decl1); + auto mask2 = get_feature_mask_for_version (decl2); + + return compare_feature_masks (mask1, mask2); +} + +/* Build the struct __ifunc_arg_t type: + + struct __ifunc_arg_t + { + unsigned long _size; // Size of the struct, so it can grow. + unsigned long _hwcap; + unsigned long _hwcap2; + } + */ + +static tree +build_ifunc_arg_type () +{ + tree ifunc_arg_type = lang_hooks.types.make_type (RECORD_TYPE); + tree field1 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("_size"), + long_unsigned_type_node); + tree field2 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("_hwcap"), + long_unsigned_type_node); + tree field3 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("_hwcap2"), + long_unsigned_type_node); + + DECL_FIELD_CONTEXT (field1) = ifunc_arg_type; + DECL_FIELD_CONTEXT (field2) = ifunc_arg_type; + DECL_FIELD_CONTEXT (field3) = ifunc_arg_type; + + TYPE_FIELDS (ifunc_arg_type) = field1; + DECL_CHAIN (field1) = field2; + DECL_CHAIN (field2) = field3; + + layout_type (ifunc_arg_type); + + tree const_type = build_qualified_type (ifunc_arg_type, TYPE_QUAL_CONST); + tree pointer_type = build_pointer_type (const_type); + + return pointer_type; +} + +/* Make the resolver function decl to dispatch the versions of + a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is + ifunc alias that will point to the created resolver. Create an + empty basic block in the resolver and store the pointer in + EMPTY_BB. Return the decl of the resolver function. */ + +static tree +make_resolver_func (const tree default_decl, + const tree ifunc_alias_decl, + basic_block *empty_bb) +{ + tree decl, type, t; + + /* Create resolver function name based on default_decl. */ + tree decl_name = clone_function_name (default_decl, "resolver"); + const char *resolver_name = IDENTIFIER_POINTER (decl_name); + + /* The resolver function should have signature + (void *) resolver (uint64_t, const __ifunc_arg_t *) */ + type = build_function_type_list (ptr_type_node, + uint64_type_node, + build_ifunc_arg_type (), + NULL_TREE); + + decl = build_fn_decl (resolver_name, type); + SET_DECL_ASSEMBLER_NAME (decl, decl_name); + + DECL_NAME (decl) = decl_name; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 1; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + + /* Resolver is not external, body is generated. */ + DECL_EXTERNAL (decl) = 0; + DECL_EXTERNAL (ifunc_alias_decl) = 0; + + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + DECL_STATIC_CONSTRUCTOR (decl) = 0; + + if (DECL_COMDAT_GROUP (default_decl) + || TREE_PUBLIC (default_decl)) + { + /* In this case, each translation unit with a call to this + versioned function will put out a resolver. Ensure it + is comdat to keep just one copy. */ + DECL_COMDAT (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + } + else + TREE_PUBLIC (ifunc_alias_decl) = 0; + + /* Build result decl and add to function_decl. */ + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); + DECL_CONTEXT (t) = decl; + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_RESULT (decl) = t; + + /* Build parameter decls and add to function_decl. */ + tree arg1 = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("hwcap"), + uint64_type_node); + tree arg2 = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("arg"), + build_ifunc_arg_type()); + DECL_CONTEXT (arg1) = decl; + DECL_CONTEXT (arg2) = decl; + DECL_ARTIFICIAL (arg1) = 1; + DECL_ARTIFICIAL (arg2) = 1; + DECL_IGNORED_P (arg1) = 1; + DECL_IGNORED_P (arg2) = 1; + DECL_ARG_TYPE (arg1) = uint64_type_node; + DECL_ARG_TYPE (arg2) = build_ifunc_arg_type (); + DECL_ARGUMENTS (decl) = arg1; + TREE_CHAIN (arg1) = arg2; + + gimplify_function_tree (decl); + push_cfun (DECL_STRUCT_FUNCTION (decl)); + *empty_bb = init_lowered_empty_function (decl, false, + profile_count::uninitialized ()); + + cgraph_node::add_new_function (decl, true); + symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); + + pop_cfun (); + + gcc_assert (ifunc_alias_decl != NULL); + /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ + DECL_ATTRIBUTES (ifunc_alias_decl) + = make_attribute ("ifunc", resolver_name, + DECL_ATTRIBUTES (ifunc_alias_decl)); + + /* Create the alias for dispatch to resolver here. */ + cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); + return decl; +} + +/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL + to return a pointer to VERSION_DECL if all feature bits specified in + FEATURE_MASK are not set in MASK_VAR. This function will be called during + version dispatch to decide which function version to execute. It returns + the basic block at the end, to which more conditions can be added. */ +static basic_block +add_condition_to_bb (tree function_decl, tree version_decl, + aarch64_fmv_feature_mask feature_mask, + tree mask_var, basic_block new_bb) +{ + gimple *return_stmt; + tree convert_expr, result_var; + gimple *convert_stmt; + gimple *if_else_stmt; + + basic_block bb1, bb2, bb3; + edge e12, e23; + + gimple_seq gseq; + + push_cfun (DECL_STRUCT_FUNCTION (function_decl)); + + gcc_assert (new_bb != NULL); + gseq = bb_seq (new_bb); + + convert_expr = build1 (CONVERT_EXPR, ptr_type_node, + build_fold_addr_expr (version_decl)); + result_var = create_tmp_var (ptr_type_node); + convert_stmt = gimple_build_assign (result_var, convert_expr); + return_stmt = gimple_build_return (result_var); + + if (feature_mask == 0ULL) + { + /* Default version. */ + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + gimple_set_bb (convert_stmt, new_bb); + gimple_set_bb (return_stmt, new_bb); + pop_cfun (); + return new_bb; + } + + tree and_expr_var = create_tmp_var (long_long_unsigned_type_node); + tree and_expr = build2 (BIT_AND_EXPR, + long_long_unsigned_type_node, + mask_var, + build_int_cst (long_long_unsigned_type_node, + feature_mask)); + gimple *and_stmt = gimple_build_assign (and_expr_var, and_expr); + gimple_set_block (and_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (and_stmt, new_bb); + gimple_seq_add_stmt (&gseq, and_stmt); + + tree zero_llu = build_int_cst (long_long_unsigned_type_node, 0); + if_else_stmt = gimple_build_cond (EQ_EXPR, and_expr_var, zero_llu, + NULL_TREE, NULL_TREE); + gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (if_else_stmt, new_bb); + gimple_seq_add_stmt (&gseq, if_else_stmt); + + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + + bb1 = new_bb; + e12 = split_block (bb1, if_else_stmt); + bb2 = e12->dest; + e12->flags &= ~EDGE_FALLTHRU; + e12->flags |= EDGE_TRUE_VALUE; + + e23 = split_block (bb2, return_stmt); + + gimple_set_bb (convert_stmt, bb2); + gimple_set_bb (return_stmt, bb2); + + bb3 = e23->dest; + make_edge (bb1, bb3, EDGE_FALSE_VALUE); + + remove_edge (e23); + make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + + pop_cfun (); + + return bb3; +} + +/* This function generates the dispatch function for + multi-versioned functions. DISPATCH_DECL is the function which will + contain the dispatch logic. FNDECLS are the function choices for + dispatch, and is a tree chain. EMPTY_BB is the basic block pointer + in DISPATCH_DECL in which the dispatch code is generated. */ + +static int +dispatch_function_versions (tree dispatch_decl, + void *fndecls_p, + basic_block *empty_bb) +{ + gimple *ifunc_cpu_init_stmt; + gimple_seq gseq; + vec<tree> *fndecls; + + gcc_assert (dispatch_decl != NULL + && fndecls_p != NULL + && empty_bb != NULL); + + push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); + + gseq = bb_seq (*empty_bb); + /* Function version dispatch is via IFUNC. IFUNC resolvers fire before + constructors, so explicity call __init_cpu_features_resolver here. */ + tree init_fn_type = build_function_type_list (void_type_node, + long_unsigned_type_node, + build_ifunc_arg_type(), + NULL); + tree init_fn_id = get_identifier ("__init_cpu_features_resolver"); + tree init_fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, + init_fn_id, init_fn_type); + tree arg1 = DECL_ARGUMENTS (dispatch_decl); + tree arg2 = TREE_CHAIN (arg1); + ifunc_cpu_init_stmt = gimple_build_call (init_fn_decl, 2, arg1, arg2); + gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); + gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); + + /* Build the struct type for __aarch64_cpu_features. */ + tree global_type = lang_hooks.types.make_type (RECORD_TYPE); + tree field1 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("features"), + long_long_unsigned_type_node); + DECL_FIELD_CONTEXT (field1) = global_type; + TYPE_FIELDS (global_type) = field1; + layout_type (global_type); + + tree global_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier ("__aarch64_cpu_features"), + global_type); + DECL_EXTERNAL (global_var) = 1; + tree mask_var = create_tmp_var (long_long_unsigned_type_node); + + tree component_expr = build3 (COMPONENT_REF, long_long_unsigned_type_node, + global_var, field1, NULL_TREE); + gimple *component_stmt = gimple_build_assign (mask_var, component_expr); + gimple_set_block (component_stmt, DECL_INITIAL (dispatch_decl)); + gimple_set_bb (component_stmt, *empty_bb); + gimple_seq_add_stmt (&gseq, component_stmt); + + tree not_expr = build1 (BIT_NOT_EXPR, long_long_unsigned_type_node, mask_var); + gimple *not_stmt = gimple_build_assign (mask_var, not_expr); + gimple_set_block (not_stmt, DECL_INITIAL (dispatch_decl)); + gimple_set_bb (not_stmt, *empty_bb); + gimple_seq_add_stmt (&gseq, not_stmt); + + set_bb_seq (*empty_bb, gseq); + + pop_cfun (); + + /* fndecls_p is actually a vector. */ + fndecls = static_cast<vec<tree> *> (fndecls_p); + + /* At least one more version other than the default. */ + unsigned int num_versions = fndecls->length (); + gcc_assert (num_versions >= 2); + + struct function_version_info + { + tree version_decl; + aarch64_fmv_feature_mask feature_mask; + } *function_versions; + + function_versions = (struct function_version_info *) + XNEWVEC (struct function_version_info, (num_versions)); + + unsigned int actual_versions = 0; + + for (tree version_decl : *fndecls) + { + aarch64_fmv_feature_mask feature_mask; + /* Get attribute string, parse it and find the right features. */ + feature_mask = get_feature_mask_for_version (version_decl); + function_versions [actual_versions].version_decl = version_decl; + function_versions [actual_versions].feature_mask = feature_mask; + actual_versions++; + } + + auto compare_feature_version_info = [](const void *p1, const void *p2) { + const function_version_info v1 = *(const function_version_info *)p1; + const function_version_info v2 = *(const function_version_info *)p2; + return - compare_feature_masks (v1.feature_mask, v2.feature_mask); + }; + + /* Sort the versions according to descending order of dispatch priority. */ + qsort (function_versions, actual_versions, + sizeof (struct function_version_info), compare_feature_version_info); + + for (unsigned int i = 0; i < actual_versions; ++i) + *empty_bb = add_condition_to_bb (dispatch_decl, + function_versions[i].version_decl, + function_versions[i].feature_mask, + mask_var, + *empty_bb); + + free (function_versions); + return 0; +} + +/* Implement TARGET_GENERATE_VERSION_DISPATCHER_BODY. */ + +tree +aarch64_generate_version_dispatcher_body (void *node_p) +{ + tree resolver_decl; + basic_block empty_bb; + tree default_ver_decl; + struct cgraph_node *versn; + struct cgraph_node *node; + + struct cgraph_function_version_info *node_version_info = NULL; + struct cgraph_function_version_info *versn_info = NULL; + + node = (cgraph_node *)node_p; + + node_version_info = node->function_version (); + gcc_assert (node->dispatcher_function + && node_version_info != NULL); + + if (node_version_info->dispatcher_resolver) + return node_version_info->dispatcher_resolver; + + /* The first version in the chain corresponds to the default version. */ + default_ver_decl = node_version_info->next->this_node->decl; + + /* node is going to be an alias, so remove the finalized bit. */ + node->definition = false; + + resolver_decl = make_resolver_func (default_ver_decl, + node->decl, &empty_bb); + + node_version_info->dispatcher_resolver = resolver_decl; + + push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); + + auto_vec<tree, 2> fn_ver_vec; + + for (versn_info = node_version_info->next; versn_info; + versn_info = versn_info->next) + { + versn = versn_info->this_node; + /* Check for virtual functions here again, as by this time it should + have been determined if this function needs a vtable index or + not. This happens for methods in derived classes that override + virtual methods in base classes but are not explicitly marked as + virtual. */ + if (DECL_VINDEX (versn->decl)) + sorry ("virtual function multiversioning not supported"); + + fn_ver_vec.safe_push (versn->decl); + } + + dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); + cgraph_edge::rebuild_edges (); + pop_cfun (); + return resolver_decl; +} + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Returns the decl of the dispatcher function. */ + +tree +aarch64_get_function_versions_dispatcher (void *decl) +{ + tree fn = (tree) decl; + struct cgraph_node *node = NULL; + struct cgraph_node *default_node = NULL; + struct cgraph_function_version_info *node_v = NULL; + struct cgraph_function_version_info *first_v = NULL; + + tree dispatch_decl = NULL; + + struct cgraph_function_version_info *default_version_info = NULL; + + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); + + node = cgraph_node::get (fn); + gcc_assert (node != NULL); + + node_v = node->function_version (); + gcc_assert (node_v != NULL); + + if (node_v->dispatcher_resolver != NULL) + return node_v->dispatcher_resolver; + + /* Find the default version and make it the first node. */ + first_v = node_v; + /* Go to the beginning of the chain. */ + while (first_v->prev != NULL) + first_v = first_v->prev; + default_version_info = first_v; + while (default_version_info != NULL) + { + if (get_feature_mask_for_version + (default_version_info->this_node->decl) == 0ULL) + break; + default_version_info = default_version_info->next; + } + + /* If there is no default node, just return NULL. */ + if (default_version_info == NULL) + return NULL; + + /* Make default info the first node. */ + if (first_v != default_version_info) + { + default_version_info->prev->next = default_version_info->next; + if (default_version_info->next) + default_version_info->next->prev = default_version_info->prev; + first_v->prev = default_version_info; + default_version_info->next = first_v; + default_version_info->prev = NULL; + } + + default_node = default_version_info->this_node; + + if (targetm.has_ifunc_p ()) + { + struct cgraph_function_version_info *it_v = NULL; + struct cgraph_node *dispatcher_node = NULL; + struct cgraph_function_version_info *dispatcher_version_info = NULL; + + /* Right now, the dispatching is done via ifunc. */ + dispatch_decl = make_dispatcher_decl (default_node->decl); + TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn); + + dispatcher_node = cgraph_node::get_create (dispatch_decl); + gcc_assert (dispatcher_node != NULL); + dispatcher_node->dispatcher_function = 1; + dispatcher_version_info + = dispatcher_node->insert_new_function_version (); + dispatcher_version_info->next = default_version_info; + dispatcher_node->definition = 1; + + /* Set the dispatcher for all the versions. */ + it_v = default_version_info; + while (it_v != NULL) + { + it_v->dispatcher_resolver = dispatch_decl; + it_v = it_v->next; + } + } + else + { + error_at (DECL_SOURCE_LOCATION (default_node->decl), + "multiversioning needs %<ifunc%> which is not supported " + "on this target"); + } + + return dispatch_decl; +} + +/* This function returns true if FN1 and FN2 are versions of the same function, + that is, the target_version attributes of the function decls are different. + This assumes that FN1 and FN2 have the same signature. */ + +bool +aarch64_common_function_versions (tree fn1, tree fn2) +{ + if (TREE_CODE (fn1) != FUNCTION_DECL + || TREE_CODE (fn2) != FUNCTION_DECL) + return false; + + return (aarch64_compare_version_priority (fn1, fn2) != 0); +} + +/* Implement TARGET_MANGLE_DECL_ASSEMBLER_NAME, to add function multiversioning + suffixes. */ + +tree +aarch64_mangle_decl_assembler_name (tree decl, tree id) +{ + /* For function version, add the target suffix to the assembler name. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (decl)) + { + aarch64_fmv_feature_mask feature_mask = get_feature_mask_for_version (decl); + + /* No suffix for the default version. */ + if (feature_mask == 0ULL) + return id; + + std::string name = IDENTIFIER_POINTER (id); + name += "._"; + + for (int i = 0; i < FEAT_MAX; i++) + { + if (feature_mask & aarch64_fmv_feature_data[i].feature_mask) + { + name += "M"; + name += aarch64_fmv_feature_data[i].name; + } + } + + if (DECL_ASSEMBLER_NAME_SET_P (decl)) + SET_DECL_RTL (decl, NULL); + + id = get_identifier (name.c_str()); + } + return id; +} + /* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P. Use an opt-out rather than an opt-in list. */ @@ -29621,6 +30461,10 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_OPTION_VALID_ATTRIBUTE_P #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p +#undef TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P +#define TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P \ + aarch64_option_valid_version_attribute_p + #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function @@ -29990,6 +30834,23 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue +#undef TARGET_OPTION_FUNCTION_VERSIONS +#define TARGET_OPTION_FUNCTION_VERSIONS aarch64_common_function_versions + +#undef TARGET_COMPARE_VERSION_PRIORITY +#define TARGET_COMPARE_VERSION_PRIORITY aarch64_compare_version_priority + +#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY +#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ + aarch64_generate_version_dispatcher_body + +#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER +#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ + aarch64_get_function_versions_dispatcher + +#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME +#define TARGET_MANGLE_DECL_ASSEMBLER_NAME aarch64_mangle_decl_assembler_name + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h" diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 501bb7478a0755fa76c488ec03dcfab6c272851c..3ae42be770400da96ea3d9d25d6e1b2d393d034d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -1362,6 +1362,8 @@ extern enum aarch64_code_model aarch64_cmodel; (aarch64_cmodel == AARCH64_CMODEL_TINY \ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) +#define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0 + #define TARGET_SUPPORTS_WIDE_INT 1 /* Modes valid for AdvSIMD D registers, i.e. that fit in half a Q register. */ diff --git a/gcc/config/arm/aarch-common.h b/gcc/config/arm/aarch-common.h index f72e21127fc898f5ffa10e274820fa4316fd41cf..c384291991c26b6de2c821dc4eea95b1888db398 100644 --- a/gcc/config/arm/aarch-common.h +++ b/gcc/config/arm/aarch-common.h @@ -23,7 +23,7 @@ #define GCC_AARCH_COMMON_H /* Enum describing the various ways that the - aarch*_parse_{arch,tune,cpu,extension} functions can fail. + aarch*_parse_{arch,tune,cpu,extension,fmv_extension} functions can fail. This way their callers can choose what kind of error to give. */ enum aarch_parse_opt_result @@ -31,7 +31,8 @@ enum aarch_parse_opt_result AARCH_PARSE_OK, /* Parsing was successful. */ AARCH_PARSE_MISSING_ARG, /* Missing argument. */ AARCH_PARSE_INVALID_FEATURE, /* Invalid feature modifier. */ - AARCH_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */ + AARCH_PARSE_INVALID_ARG, /* Invalid arch, tune, cpu arg. */ + AARCH_PARSE_DUPLICATE_FEATURE /* Duplicate feature modifier. */ }; /* Function types -msign-return-address should sign. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c index fb5a7a18ad1a2d09ac4b231150a1bd9e72d6fab6..c175e22f88fd5165d42866eb989de8af3ee5f6c6 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\+crc\+crypto\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c index b29d50e1f79f92e45add1627904a695c511aec75..0264a4737103ce093741d4f0b161f78cd18a0d6a 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\+crc\+crypto\n} } } */ /* Test one with mixed order of feature bits. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c index b3613165a05bf2fd1e93e6d89361ae9969ed62ba..649e48e7e0b727cef573ff12f1e3efb90bbb5e7f 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\+crc\+crypto\+sve2\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c index a9dde5ffab1405488d15d58c6420890a1b16e16a..078d7bc899ccd01c70369fe985268949c351da0b 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\+crc\+crypto\+sve2\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c index 10325df4497227a80297a140c9e1d689fccf96ef..57eedb463091f051c19533c054204f9352b6446e 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8.6-a\+crc\+fp16\+aes\+sha3\+rng\+nopauth\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8.6-a\+rng\+crc\+aes\+sha3\+fp16\+nopauth\n} } } */ /* Test one where the boundary of buffer size would overwrite the last character read when stitching the fgets-calls together. With the diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_19.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_19.c index 980d3f79dfb03b0d8eb68f691bf2dedf80aed87d..a5b4b4d3442c6522a8cdadf4eebd3b5460e37213 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_19.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_19.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv9-a\+crc\+profile\+memtag\+sve2-sm4\+sve2-aes\+sve2-sha3\+sve2-bitperm\+i8mm\+bf16\+nopauth\n} } } */ +/* { dg-final { scan-assembler {\.arch armv9-a\+crc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+memtag\+profile\+nopauth\n} } } */ /* Test one that if the kernel doesn't report the availability of a mandatory feature that it has turned it off for whatever reason. As such compilers diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_20.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_20.c index 117df2b0b6cd5751d9f5175b4343aad9825a6c43..e12aa543d02924f268729f96fe1f17181287f097 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_20.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_20.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv9-a\+crc\+profile\+memtag\+sve2-sm4\+sve2-aes\+sve2-sha3\+sve2-bitperm\+i8mm\+bf16\n} } } */ +/* { dg-final { scan-assembler {\.arch armv9-a\+crc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+memtag\+profile\n} } } */ /* Check whether features that don't have a midr name during detection are correctly ignored. These features shouldn't affect the native detection. diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c index efbd02cbdc0638db85e776f1e79043709c11df21..920e1d65711cbcb77b07441597180c0159ccabf9 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+lse\+rcpc\+rdma\+dotprod\+fp16fml\+sb\+ssbs\+sve2-sm4\+sve2-aes\+sve2-sha3\+sve2-bitperm\+i8mm\+bf16\+flagm\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+dotprod\+rdma\+lse\+crc\+fp16fml\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c index d431d4938265d024891b464ac3d069607b21d8e7..416a29b514ab7599a7092e26e3716ec8a50cc895 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+lse\+rcpc\+rdma\+dotprod\+fp16fml\+sb\+ssbs\+sve2-sm4\+sve2-aes\+sve2-sha3\+sve2-bitperm\+i8mm\+bf16\+flagm\+pauth\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+dotprod\+rdma\+lse\+crc\+fp16fml\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values and that it enables optional features. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c index 20012beff7b85c14817e38437650d412ab7bb137..5d39a2d8c4da5de6ed7ec832db90ae7f625d997d 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16\n} } } */ -/* Test one where the feature bits for crypto and fp16 are given in - same order as declared in options file. */ +/* Test one where the crypto and fp16 options are specified in different + order from what is in the options file. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c index 70a7e62fdffc4a908df083505c03a5fde70ce883..67c4bc2b9ea446201666c9810c779d1823a5cb9b 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16\n} } } */ -/* Test one where the crypto and fp16 options are specified in different - order from what is in the options file. */ +/* Test one where the feature bits for crypto and fp16 are given in + same order as declared in options file. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_17.c b/gcc/testsuite/gcc.target/aarch64/options_set_17.c index 8b21e2e1a0a0d4c7daa13fc6c3e4968786474a74..b1603fbcf2a1b27c00fe2b47a8a65af551a3d726 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_17.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_17.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+dotprod\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+dotprod\+crc\n} } } */ /* dotprod needs to be emitted pre armv8.4. */ diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c index 634f591c194bc70048f714d7eb0ace1f2f4137ea..72185bee7cdd062e20a3e9f6f627f4120d1b2fb5 100644 --- a/libgcc/config/aarch64/cpuinfo.c +++ b/libgcc/config/aarch64/cpuinfo.c @@ -22,6 +22,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ +#include "common/config/aarch64/cpuinfo.h" + #if __has_include(<sys/auxv.h>) #include <sys/auxv.h> @@ -38,73 +40,6 @@ typedef struct __ifunc_arg_t { #if __has_include(<asm/hwcap.h>) #include <asm/hwcap.h> -/* CPUFeatures must correspond to the same AArch64 features in aarch64.cc */ -enum CPUFeatures { - FEAT_RNG, - FEAT_FLAGM, - FEAT_FLAGM2, - FEAT_FP16FML, - FEAT_DOTPROD, - FEAT_SM4, - FEAT_RDM, - FEAT_LSE, - FEAT_FP, - FEAT_SIMD, - FEAT_CRC, - FEAT_SHA1, - FEAT_SHA2, - FEAT_SHA3, - FEAT_AES, - FEAT_PMULL, - FEAT_FP16, - FEAT_DIT, - FEAT_DPB, - FEAT_DPB2, - FEAT_JSCVT, - FEAT_FCMA, - FEAT_RCPC, - FEAT_RCPC2, - FEAT_FRINTTS, - FEAT_DGH, - FEAT_I8MM, - FEAT_BF16, - FEAT_EBF16, - FEAT_RPRES, - FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, - FEAT_SVE_F32MM, - FEAT_SVE_F64MM, - FEAT_SVE2, - FEAT_SVE_AES, - FEAT_SVE_PMULL128, - FEAT_SVE_BITPERM, - FEAT_SVE_SHA3, - FEAT_SVE_SM4, - FEAT_SME, - FEAT_MEMTAG, - FEAT_MEMTAG2, - FEAT_MEMTAG3, - FEAT_SB, - FEAT_PREDRES, - FEAT_SSBS, - FEAT_SSBS2, - FEAT_BTI, - FEAT_LS64, - FEAT_LS64_V, - FEAT_LS64_ACCDATA, - FEAT_WFXT, - FEAT_SME_F64, - FEAT_SME_I64, - FEAT_SME2, - FEAT_RCPC3, - FEAT_MAX, - FEAT_EXT = 62, /* Reserved to indicate presence of additional features field - in __aarch64_cpu_features. */ - FEAT_INIT /* Used as flag of features initialization completion. */ -}; - /* Architecture features used in Function Multi Versioning. */ struct { unsigned long long features;