diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index 389f3dd1c6030cd27441200d073765a4a129e16c..d88515e4a03bd812334ae0b7bf4c0bba119455dc 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -138,8 +138,9 @@ IFUNC_OPTIONS	     = -march=i586
 libatomic_la_LIBADD += $(addsuffix _8_1_.lo,$(SIZEOBJS))
 endif
 if ARCH_X86_64
-IFUNC_OPTIONS	     = -mcx16
-libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS))
+IFUNC_OPTIONS	     = -mcx16 -mcx16
+libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+		       $(addsuffix _16_2_.lo,$(SIZEOBJS))
 endif
 endif
 
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index 0a51bd55f01fee1419722ace0d24d17b90a39fbf..80d25653dc75cca995c8b0b2107a55f1234a6d52 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -96,7 +96,9 @@ target_triplet = @target@
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	_8_2_.lo,$(SIZEOBJS))
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@		       $(addsuffix _16_2_.lo,$(SIZEOBJS))
+
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -435,7 +437,7 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 -mcx16
 libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES)
 libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD)
 MULTISRCTOP = 
diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h
index f20ce0941a171c01b9533b3937e155567466073f..007b7e14718891e6c4ad3063a14ce654af090b24 100644
--- a/libatomic/config/x86/host-config.h
+++ b/libatomic/config/x86/host-config.h
@@ -55,31 +55,37 @@ load_feat1 (void)
 }
 
 #ifdef __x86_64__
-# define IFUNC_COND_1	(load_feat1 () & bit_CMPXCHG16B)
+# define IFUNC_COND_1	((load_feat1 () & (bit_AVX | bit_CMPXCHG16B)) \
+			 == (bit_AVX | bit_CMPXCHG16B))
+# define IFUNC_COND_2	(load_feat1 () & bit_CMPXCHG16B)
 #else
 # define IFUNC_COND_1	(load_feat1 () & bit_CMPXCHG8B)
 #endif
 
 #ifdef __x86_64__
-# define IFUNC_NCOND(N) (N == 16)
+# define IFUNC_NCOND(N) (2 * (N == 16))
 #else
 # define IFUNC_NCOND(N) (N == 8)
 #endif
 
 #ifdef __x86_64__
 # undef MAYBE_HAVE_ATOMIC_CAS_16
-# define MAYBE_HAVE_ATOMIC_CAS_16	IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_CAS_16	IFUNC_COND_2
 # undef MAYBE_HAVE_ATOMIC_EXCHANGE_16
-# define MAYBE_HAVE_ATOMIC_EXCHANGE_16	IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_EXCHANGE_16	IFUNC_COND_2
 # undef MAYBE_HAVE_ATOMIC_LDST_16
-# define MAYBE_HAVE_ATOMIC_LDST_16	IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_LDST_16	IFUNC_COND_2
 /* Since load and store are implemented with CAS, they are not fast.  */
 # undef FAST_ATOMIC_LDST_16
 # define FAST_ATOMIC_LDST_16		0
-# if IFUNC_ALT == 1
+# if IFUNC_ALT != 0
 #  undef HAVE_ATOMIC_CAS_16
 #  define HAVE_ATOMIC_CAS_16 1
 # endif
+# if IFUNC_ALT == 1
+#  undef HAVE_ATOMIC_LDST_16
+#  define HAVE_ATOMIC_LDST_16 1
+# endif
 #else
 # undef MAYBE_HAVE_ATOMIC_CAS_8
 # define MAYBE_HAVE_ATOMIC_CAS_8	IFUNC_COND_1
@@ -93,7 +99,7 @@ load_feat1 (void)
 # endif
 #endif
 
-#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT != 0
 static inline bool
 atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
                            bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
@@ -108,6 +114,29 @@ atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
 # define atomic_compare_exchange_n atomic_compare_exchange_n
 #endif /* Have CAS 16 */
 
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#define __atomic_load_n(ptr, model) \
+  (sizeof (*ptr) == 16 ? atomic_load_n (ptr, model) \
+		       : (__atomic_load_n) (ptr, model))
+#define __atomic_store_n(ptr, val, model) \
+  (sizeof (*ptr) == 16 ? atomic_store_n (ptr, val, model) \
+		       : (__atomic_store_n) (ptr, val, model))
+
+static inline UTYPE
+atomic_load_n (UTYPE *ptr, int model UNUSED)
+{
+  UTYPE ret;
+  __asm__ ("vmovdqa\t{%1, %0|%0, %1}" : "=x" (ret) : "m" (*ptr));
+  return ret;
+}
+
+static inline void
+atomic_store_n (UTYPE *ptr, UTYPE val, int model UNUSED)
+{
+  __asm__ ("vmovdqa\t{%1, %0|%0, %1}\n\tmfence" : "=m" (*ptr) : "x" (val));
+}
+#endif
+
 #endif /* HAVE_IFUNC */
 
 #include_next <host-config.h>
diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c
index 7bdec7227250c7e8a3a42b354ed5a996b65d9f40..6f6499c58c3c1830e6a1c0f57d99adde7d544e1a 100644
--- a/libatomic/config/x86/init.c
+++ b/libatomic/config/x86/init.c
@@ -34,6 +34,18 @@ __libat_feat1_init (void)
   unsigned int eax, ebx, ecx, edx;
   FEAT1_REGISTER = 0;
   __get_cpuid (1, &eax, &ebx, &ecx, &edx);
+#ifdef __x86_64__
+  if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
+      == (bit_AVX | bit_CMPXCHG16B))
+    {
+      /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
+	 is atomic, but so far we don't have this guarantee from AMD.  */
+      unsigned int ecx2 = 0;
+      __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
+      if (ecx2 != signature_INTEL_ecx)
+	FEAT1_REGISTER &= ~bit_AVX;
+    }
+#endif
   /* See the load in load_feat1.  */
   __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
   return FEAT1_REGISTER;