diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am index 389f3dd1c6030cd27441200d073765a4a129e16c..d88515e4a03bd812334ae0b7bf4c0bba119455dc 100644 --- a/libatomic/Makefile.am +++ b/libatomic/Makefile.am @@ -138,8 +138,9 @@ IFUNC_OPTIONS = -march=i586 libatomic_la_LIBADD += $(addsuffix _8_1_.lo,$(SIZEOBJS)) endif if ARCH_X86_64 -IFUNC_OPTIONS = -mcx16 -libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) +IFUNC_OPTIONS = -mcx16 -mcx16 +libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ + $(addsuffix _16_2_.lo,$(SIZEOBJS)) endif endif diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index 0a51bd55f01fee1419722ace0d24d17b90a39fbf..80d25653dc75cca995c8b0b2107a55f1234a6d52 100644 --- a/libatomic/Makefile.in +++ b/libatomic/Makefile.in @@ -96,7 +96,9 @@ target_triplet = @target@ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS)) @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS)) -@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS)) + subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ @@ -435,7 +437,7 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \ @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586 -@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 -mcx16 libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES) libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD) MULTISRCTOP = diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h index f20ce0941a171c01b9533b3937e155567466073f..007b7e14718891e6c4ad3063a14ce654af090b24 100644 --- a/libatomic/config/x86/host-config.h +++ b/libatomic/config/x86/host-config.h @@ -55,31 +55,37 @@ load_feat1 (void) } #ifdef __x86_64__ -# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B) +# define IFUNC_COND_1 ((load_feat1 () & (bit_AVX | bit_CMPXCHG16B)) \ + == (bit_AVX | bit_CMPXCHG16B)) +# define IFUNC_COND_2 (load_feat1 () & bit_CMPXCHG16B) #else # define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B) #endif #ifdef __x86_64__ -# define IFUNC_NCOND(N) (N == 16) +# define IFUNC_NCOND(N) (2 * (N == 16)) #else # define IFUNC_NCOND(N) (N == 8) #endif #ifdef __x86_64__ # undef MAYBE_HAVE_ATOMIC_CAS_16 -# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_1 +# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_2 # undef MAYBE_HAVE_ATOMIC_EXCHANGE_16 -# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1 +# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_2 # undef MAYBE_HAVE_ATOMIC_LDST_16 -# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1 +# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_2 /* Since load and store are implemented with CAS, they are not fast. */ # undef FAST_ATOMIC_LDST_16 # define FAST_ATOMIC_LDST_16 0 -# if IFUNC_ALT == 1 +# if IFUNC_ALT != 0 # undef HAVE_ATOMIC_CAS_16 # define HAVE_ATOMIC_CAS_16 1 # endif +# if IFUNC_ALT == 1 +# undef HAVE_ATOMIC_LDST_16 +# define HAVE_ATOMIC_LDST_16 1 +# endif #else # undef MAYBE_HAVE_ATOMIC_CAS_8 # define MAYBE_HAVE_ATOMIC_CAS_8 IFUNC_COND_1 @@ -93,7 +99,7 @@ load_feat1 (void) # endif #endif -#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1 +#if defined(__x86_64__) && N == 16 && IFUNC_ALT != 0 static inline bool atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval, bool weak_p UNUSED, int sm UNUSED, int fm UNUSED) @@ -108,6 +114,29 @@ atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval, # define atomic_compare_exchange_n atomic_compare_exchange_n #endif /* Have CAS 16 */ +#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1 +#define __atomic_load_n(ptr, model) \ + (sizeof (*ptr) == 16 ? atomic_load_n (ptr, model) \ + : (__atomic_load_n) (ptr, model)) +#define __atomic_store_n(ptr, val, model) \ + (sizeof (*ptr) == 16 ? atomic_store_n (ptr, val, model) \ + : (__atomic_store_n) (ptr, val, model)) + +static inline UTYPE +atomic_load_n (UTYPE *ptr, int model UNUSED) +{ + UTYPE ret; + __asm__ ("vmovdqa\t{%1, %0|%0, %1}" : "=x" (ret) : "m" (*ptr)); + return ret; +} + +static inline void +atomic_store_n (UTYPE *ptr, UTYPE val, int model UNUSED) +{ + __asm__ ("vmovdqa\t{%1, %0|%0, %1}\n\tmfence" : "=m" (*ptr) : "x" (val)); +} +#endif + #endif /* HAVE_IFUNC */ #include_next <host-config.h> diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c index 7bdec7227250c7e8a3a42b354ed5a996b65d9f40..6f6499c58c3c1830e6a1c0f57d99adde7d544e1a 100644 --- a/libatomic/config/x86/init.c +++ b/libatomic/config/x86/init.c @@ -34,6 +34,18 @@ __libat_feat1_init (void) unsigned int eax, ebx, ecx, edx; FEAT1_REGISTER = 0; __get_cpuid (1, &eax, &ebx, &ecx, &edx); +#ifdef __x86_64__ + if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B)) + == (bit_AVX | bit_CMPXCHG16B)) + { + /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address + is atomic, but so far we don't have this guarantee from AMD. */ + unsigned int ecx2 = 0; + __get_cpuid (0, &eax, &ebx, &ecx2, &edx); + if (ecx2 != signature_INTEL_ecx) + FEAT1_REGISTER &= ~bit_AVX; + } +#endif /* See the load in load_feat1. */ __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED); return FEAT1_REGISTER;