diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 00569cee55459e6de4b6a7a3d15a00d317b19350..d837d0efb21e9c52bce437078046af273b14cc50 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -35,6 +35,7 @@ #include <cstdio> // for stderr #endif #include <cstring> +#include <cmath> #include <functional> #include <iosfwd> #include <utility> @@ -206,9 +207,170 @@ template <size_t _Np> // }}} template <size_t _Xp> using _SizeConstant = integral_constant<size_t, _Xp>; +// constexpr feature detection{{{ +constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; +constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; +constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; +constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; +constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; +constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; +constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; +constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; +constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; +constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; +constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; +constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; +constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; +constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; +constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; +constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; +constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; +constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; +constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; +constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; +constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; +constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; +constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; +constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; +constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG; +constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2; +constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI; +constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA; +constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD; +constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI; +constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ; +constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT; + +constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; +constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; +constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; +constexpr inline bool __support_neon_float = +#if defined __GCC_IEC_559 + __GCC_IEC_559 == 0; +#elif defined __FAST_MATH__ + true; +#else + false; +#endif + +#ifdef _ARCH_PWR10 +constexpr inline bool __have_power10vec = true; +#else +constexpr inline bool __have_power10vec = false; +#endif +#ifdef __POWER9_VECTOR__ +constexpr inline bool __have_power9vec = true; +#else +constexpr inline bool __have_power9vec = false; +#endif +#if defined __POWER8_VECTOR__ +constexpr inline bool __have_power8vec = true; +#else +constexpr inline bool __have_power8vec = __have_power9vec; +#endif +#if defined __VSX__ +constexpr inline bool __have_power_vsx = true; +#else +constexpr inline bool __have_power_vsx = __have_power8vec; +#endif +#if defined __ALTIVEC__ +constexpr inline bool __have_power_vmx = true; +#else +constexpr inline bool __have_power_vmx = __have_power_vsx; +#endif + +// }}} namespace __detail { + constexpr std::uint_least64_t + __floating_point_flags() + { + std::uint_least64_t __flags = 0; + if constexpr (math_errhandling & MATH_ERREXCEPT) + __flags |= 1; +#ifdef __FAST_MATH__ + __flags |= 1 << 1; +#elif __FINITE_MATH_ONLY__ + __flags |= 2 << 1; +#elif __GCC_IEC_559 < 2 + __flags |= 3 << 1; +#endif + __flags |= (__FLT_EVAL_METHOD__ + 1) << 3; + return __flags; + } + + constexpr std::uint_least64_t + __machine_flags() + { + if constexpr (__have_mmx || __have_sse) + return __have_mmx + | (__have_sse << 1) + | (__have_sse2 << 2) + | (__have_sse3 << 3) + | (__have_ssse3 << 4) + | (__have_sse4_1 << 5) + | (__have_sse4_2 << 6) + | (__have_xop << 7) + | (__have_avx << 8) + | (__have_avx2 << 9) + | (__have_bmi << 10) + | (__have_bmi2 << 11) + | (__have_lzcnt << 12) + | (__have_sse4a << 13) + | (__have_fma << 14) + | (__have_fma4 << 15) + | (__have_f16c << 16) + | (__have_popcnt << 17) + | (__have_avx512f << 18) + | (__have_avx512dq << 19) + | (__have_avx512vl << 20) + | (__have_avx512bw << 21) + | (__have_avx512bitalg << 22) + | (__have_avx512vbmi2 << 23) + | (__have_avx512vbmi << 24) + | (__have_avx512ifma << 25) + | (__have_avx512cd << 26) + | (__have_avx512vnni << 27) + | (__have_avx512vpopcntdq << 28) + | (__have_avx512vp2intersect << 29); + else if constexpr (__have_neon) + return __have_neon + | (__have_neon_a32 << 1) + | (__have_neon_a64 << 2) + | (__have_neon_a64 << 2) + | (__support_neon_float << 3); + else if constexpr (__have_power_vmx) + return __have_power_vmx + | (__have_power_vsx << 1) + | (__have_power8vec << 2) + | (__have_power9vec << 3) + | (__have_power10vec << 4); + else + return 0; + } + + namespace + { + struct _OdrEnforcer {}; + } + + template <std::uint_least64_t...> + struct _MachineFlagsTemplate {}; + + /**@internal + * Use this type as default template argument to all function templates that + * are not declared always_inline. It ensures, that a function + * specialization, which the compiler decides not to inline, has a unique symbol + * (_OdrEnforcer) or a symbol matching the machine/architecture flags + * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where + * users link TUs compiled with different flags. This is especially important + * for using simd in libraries. + */ + using __odr_helper + = conditional_t<__machine_flags() == 0, _OdrEnforcer, + _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>; + struct _Minimum { template <typename _Tp> @@ -472,71 +634,6 @@ template <int _Np> template <typename _Tp> inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value; -// }}} -// constexpr feature detection{{{ -constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; -constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; -constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; -constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; -constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; -constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; -constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; -constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; -constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; -constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; -constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; -constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; -constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; -constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; -constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; -constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; -constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; -constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; -constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; -constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; -constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; -constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; -constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; -constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; - -constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; -constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; -constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; -constexpr inline bool __support_neon_float = -#if defined __GCC_IEC_559 - __GCC_IEC_559 == 0; -#elif defined __FAST_MATH__ - true; -#else - false; -#endif - -#ifdef _ARCH_PWR10 -constexpr inline bool __have_power10vec = true; -#else -constexpr inline bool __have_power10vec = false; -#endif -#ifdef __POWER9_VECTOR__ -constexpr inline bool __have_power9vec = true; -#else -constexpr inline bool __have_power9vec = false; -#endif -#if defined __POWER8_VECTOR__ -constexpr inline bool __have_power8vec = true; -#else -constexpr inline bool __have_power8vec = __have_power9vec; -#endif -#if defined __VSX__ -constexpr inline bool __have_power_vsx = true; -#else -constexpr inline bool __have_power_vsx = __have_power8vec; -#endif -#if defined __ALTIVEC__ -constexpr inline bool __have_power_vmx = true; -#else -constexpr inline bool __have_power_vmx = __have_power_vsx; -#endif - // }}} // __is_scalar_abi {{{ template <typename _Abi> @@ -4004,7 +4101,7 @@ template <typename _Tp, typename _A0, typename... _As> // }}} // concat(simd...) {{{ -template <typename _Tp, typename... _As> +template <typename _Tp, typename... _As, typename = __detail::__odr_helper> inline _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>> concat(const simd<_Tp, _As>&... __xs) @@ -4589,6 +4686,7 @@ template <typename _Tp, typename _Abi> template <typename _Up, typename _A2, typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>> + _GLIBCXX_SIMD_ALWAYS_INLINE operator simd_mask<_Up, _A2>() && { using namespace std::experimental::__proposed; @@ -4826,121 +4924,153 @@ find_last_set(_ExactBool) /// @cond undocumented // _SimdIntOperators{{{1 -template <typename _V, typename _Impl, bool> +template <typename _V, typename _Tp, typename _Abi, bool> class _SimdIntOperators {}; -template <typename _V, typename _Impl> - class _SimdIntOperators<_V, _Impl, true> +template <typename _V, typename _Tp, typename _Abi> + class _SimdIntOperators<_V, _Tp, _Abi, true> { + using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl; + _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const { return *static_cast<const _V*>(this); } - template <typename _Tp> + template <typename _Up> _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V - _S_make_derived(_Tp&& __d) - { return {__private_init, static_cast<_Tp&&>(__d)}; } + _S_make_derived(_Up&& __d) + { return {__private_init, static_cast<_Up&&>(__d)}; } public: - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator%=(_V& __lhs, const _V& __x) { return __lhs = __lhs % __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator&=(_V& __lhs, const _V& __x) { return __lhs = __lhs & __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator|=(_V& __lhs, const _V& __x) { return __lhs = __lhs | __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator^=(_V& __lhs, const _V& __x) { return __lhs = __lhs ^ __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator<<=(_V& __lhs, const _V& __x) { return __lhs = __lhs << __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator>>=(_V& __lhs, const _V& __x) { return __lhs = __lhs >> __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator<<=(_V& __lhs, int __x) { return __lhs = __lhs << __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator>>=(_V& __lhs, int __x) { return __lhs = __lhs >> __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator%(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_modulus(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator&(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_and(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator|(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_or(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator^(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_xor(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator<<(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_shift_left(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator>>(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_shift_right(__data(__x), __data(__y))); } - template <typename _VV = _V> - _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y) - { - using _Tp = typename _VV::value_type; - if (__y < 0) - __invoke_ub("The behavior is undefined if the right operand of a " - "shift operation is negative. [expr.shift]\nA shift by " - "%d was requested", - __y); - if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) - __invoke_ub( - "The behavior is undefined if the right operand of a " - "shift operation is greater than or equal to the width of the " - "promoted left operand. [expr.shift]\nA shift by %d was requested", - __y); - return _SimdIntOperators::_S_make_derived( - _Impl::_S_bit_shift_left(__data(__x), __y)); - } + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator<<(const _V& __x, int __y) + { + if (__y < 0) + __invoke_ub("The behavior is undefined if the right operand of a " + "shift operation is negative. [expr.shift]\nA shift by " + "%d was requested", + __y); + if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) + __invoke_ub( + "The behavior is undefined if the right operand of a " + "shift operation is greater than or equal to the width of the " + "promoted left operand. [expr.shift]\nA shift by %d was requested", + __y); + return _SimdIntOperators::_S_make_derived( + _Impl::_S_bit_shift_left(__data(__x), __y)); + } - template <typename _VV = _V> - _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y) - { - using _Tp = typename _VV::value_type; - if (__y < 0) - __invoke_ub( - "The behavior is undefined if the right operand of a shift " - "operation is negative. [expr.shift]\nA shift by %d was requested", - __y); - if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) - __invoke_ub( - "The behavior is undefined if the right operand of a shift " - "operation is greater than or equal to the width of the promoted " - "left operand. [expr.shift]\nA shift by %d was requested", - __y); - return _SimdIntOperators::_S_make_derived( - _Impl::_S_bit_shift_right(__data(__x), __y)); - } + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator>>(const _V& __x, int __y) + { + if (__y < 0) + __invoke_ub( + "The behavior is undefined if the right operand of a shift " + "operation is negative. [expr.shift]\nA shift by %d was requested", + __y); + if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) + __invoke_ub( + "The behavior is undefined if the right operand of a shift " + "operation is greater than or equal to the width of the promoted " + "left operand. [expr.shift]\nA shift by %d was requested", + __y); + return _SimdIntOperators::_S_make_derived( + _Impl::_S_bit_shift_right(__data(__x), __y)); + } // unary operators (for integral _Tp) - _GLIBCXX_SIMD_CONSTEXPR _V operator~() const + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR + _V + operator~() const { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; } }; @@ -4950,7 +5080,7 @@ template <typename _V, typename _Impl> // simd {{{ template <typename _Tp, typename _Abi> class simd : public _SimdIntOperators< - simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl, + simd<_Tp, _Abi>, _Tp, _Abi, conjunction<is_integral<_Tp>, typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>, public _SimdTraits<_Tp, _Abi>::_SimdBase @@ -4964,7 +5094,7 @@ template <typename _Tp, typename _Abi> public: using _Impl = typename _Traits::_SimdImpl; friend _Impl; - friend _SimdIntOperators<simd, _Impl, true>; + friend _SimdIntOperators<simd, _Tp, _Abi, true>; using value_type = _Tp; using reference = _SmartReference<_MemberType, _Impl, value_type>; diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index 6d3ba5b45f0e692a10fc8dd2bce72b9f810b8e4c..019b98e8c37551e6544914d54e8e3800b350d4ef 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -50,7 +50,8 @@ template <typename _V, typename = _VectorTraits<_V>> //}}} // __vector_permute<Indices...>{{{ // Index == -1 requests zeroing of the output element -template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>> +template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>, + typename = __detail::__odr_helper> _Tp __vector_permute(_Tp __x) { @@ -62,7 +63,8 @@ template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>> // }}} // __vector_shuffle<Indices...>{{{ // Index == -1 requests zeroing of the output element -template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>> +template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>, + typename = __detail::__odr_helper> _Tp __vector_shuffle(_Tp __x, _Tp __y) { @@ -820,10 +822,12 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> // _SimdBase / base class for simd, providing extra conversions {{{ struct _SimdBase2 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data); } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __vector_type_t<_Tp, _Np>() const { return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin(); @@ -832,6 +836,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> struct _SimdBase1 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); } }; @@ -844,11 +849,13 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> // _MaskBase {{{ struct _MaskBase2 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return static_cast<const simd_mask<_Tp, _Abi>*>(this) ->_M_data.__intrin(); } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __vector_type_t<_Tp, _Np>() const { return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data; @@ -857,6 +864,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> struct _MaskBase1 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); } }; @@ -874,7 +882,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> _Up _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _MaskCastType(_Up __x) : _M_data(__x) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _MaskMember() const { return _M_data; } }; @@ -887,7 +897,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> _SimdMember _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _SimdMember() const { return _M_data; } }; @@ -898,8 +910,11 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> _SimdMember _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType2(_Bp __b) : _M_data(__b) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _SimdMember() const { return _M_data; } }; @@ -913,14 +928,14 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> struct _CommonImplX86; struct _CommonImplNeon; struct _CommonImplBuiltin; -template <typename _Abi> struct _SimdImplBuiltin; -template <typename _Abi> struct _MaskImplBuiltin; -template <typename _Abi> struct _SimdImplX86; -template <typename _Abi> struct _MaskImplX86; -template <typename _Abi> struct _SimdImplNeon; -template <typename _Abi> struct _MaskImplNeon; -template <typename _Abi> struct _SimdImplPpc; -template <typename _Abi> struct _MaskImplPpc; +template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin; +template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin; +template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86; +template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86; +template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon; +template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon; +template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc; +template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc; // simd_abi::_VecBuiltin {{{ template <int _UsedBytes> @@ -1369,7 +1384,7 @@ struct _CommonImplBuiltin // }}} // _SimdImplBuiltin {{{1 -template <typename _Abi> +template <typename _Abi, typename> struct _SimdImplBuiltin { // member types {{{2 @@ -2618,7 +2633,7 @@ struct _MaskImplBuiltinMixin }; // _MaskImplBuiltin {{{1 -template <typename _Abi> +template <typename _Abi, typename> struct _MaskImplBuiltin : _MaskImplBuiltinMixin { using _MaskImplBuiltinMixin::_S_to_bits; @@ -2953,4 +2968,4 @@ _GLIBCXX_SIMD_END_NAMESPACE #endif // __cplusplus >= 201703L #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_ -// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80 +// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100 diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h index 638a5b9233d522da0a7e2233ceb9a4c25000c3fc..9135d3e75d346f3365e45218e59c4f1a228eff16 100644 --- a/libstdc++-v3/include/experimental/bits/simd_detail.h +++ b/libstdc++-v3/include/experimental/bits/simd_detail.h @@ -173,6 +173,46 @@ #else #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 #endif +#ifdef __AVX512BITALG__ +#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0 +#endif +#ifdef __AVX512VBMI2__ +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0 +#endif +#ifdef __AVX512VBMI__ +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0 +#endif +#ifdef __AVX512IFMA__ +#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0 +#endif +#ifdef __AVX512CD__ +#define _GLIBCXX_SIMD_HAVE_AVX512CD 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512CD 0 +#endif +#ifdef __AVX512VNNI__ +#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0 +#endif +#ifdef __AVX512VPOPCNTDQ__ +#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0 +#endif +#ifdef __AVX512VP2INTERSECT__ +#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0 +#endif #if _GLIBCXX_SIMD_HAVE_SSE #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h index f2e77d0d1214cc3401f20d79a44b7333c81bb065..7bb248cf9d0a4acbbf43e5111db0ec32730ba3bd 100644 --- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h +++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h @@ -201,6 +201,7 @@ template <typename _Tp, typename _Abi, size_t _Offset> }; template <size_t _Offset, typename _Tp, typename _Abi, typename... _As> + _GLIBCXX_SIMD_INTRINSIC __tuple_element_meta<_Tp, _Abi, _Offset> __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) { return {}; } @@ -230,11 +231,13 @@ template <size_t _O0, size_t _O1, typename _Base> struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; template <size_t _Offset, typename _Tp> + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(_Tp& __base) { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } template <size_t _Offset, typename _Tp> + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(const _Tp& __base) { @@ -243,6 +246,7 @@ template <size_t _Offset, typename _Tp> } template <size_t _Offset, size_t _ExistingOffset, typename _Tp> + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) { @@ -251,6 +255,7 @@ template <size_t _Offset, size_t _ExistingOffset, typename _Tp> } template <size_t _Offset, size_t _ExistingOffset, typename _Tp> + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) { @@ -586,6 +591,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis> return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; } + _GLIBCXX_SIMD_INTRINSIC _Tp operator[](size_t __i) const noexcept { if constexpr (_S_tuple_size == 1) @@ -608,6 +614,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis> } } + _GLIBCXX_SIMD_INTRINSIC void _M_set(size_t __i, _Tp __val) noexcept { if constexpr (_S_tuple_size == 1) @@ -627,6 +634,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis> private: // _M_subscript_read/_write {{{ + _GLIBCXX_SIMD_INTRINSIC _Tp _M_subscript_read([[maybe_unused]] size_t __i) const noexcept { if constexpr (__is_vectorizable_v<_FirstType>) @@ -635,6 +643,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis> return first[__i]; } + _GLIBCXX_SIMD_INTRINSIC void _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept { if constexpr (__is_vectorizable_v<_FirstType>) @@ -1033,9 +1042,11 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> _Tp _M_data; using _TT = __remove_cvref_t<_Tp>; + _GLIBCXX_SIMD_INTRINSIC operator _TT() { return _M_data; } + _GLIBCXX_SIMD_INTRINSIC operator _TT&() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1043,6 +1054,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> return _M_data; } + _GLIBCXX_SIMD_INTRINSIC operator _TT*() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1050,13 +1062,16 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> return &_M_data; } - constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd) {} + _GLIBCXX_SIMD_INTRINSIC + constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd) {} template <typename _Abi> + _GLIBCXX_SIMD_INTRINSIC operator simd<typename _TT::value_type, _Abi>() { return {__private_init, _M_data}; } template <typename _Abi> + _GLIBCXX_SIMD_INTRINSIC operator simd<typename _TT::value_type, _Abi>&() { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>( @@ -1064,6 +1079,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> } template <typename _Abi> + _GLIBCXX_SIMD_INTRINSIC operator simd<typename _TT::value_type, _Abi>*() { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>( @@ -1081,14 +1097,18 @@ template <typename _Tp> _Tp _M_data; fixed_size_simd<_TT, 1> _M_fd; - constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} + _GLIBCXX_SIMD_INTRINSIC + constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} + _GLIBCXX_SIMD_INTRINSIC ~__autocvt_to_simd() { _M_data = __data(_M_fd).first; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1>() { return _M_fd; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1> &() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1096,6 +1116,7 @@ template <typename _Tp> return _M_fd; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1> *() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1107,8 +1128,8 @@ template <typename _Tp> // }}} struct _CommonImplFixedSize; -template <int _Np> struct _SimdImplFixedSize; -template <int _Np> struct _MaskImplFixedSize; +template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize; +template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize; // simd_abi::_Fixed {{{ template <int _Np> struct simd_abi::_Fixed @@ -1172,12 +1193,15 @@ template <int _Np> { // The following ensures, function arguments are passed via the stack. // This is important for ABI compatibility across TU boundaries + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdBase(const _SimdBase&) {} _SimdBase() = default; + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator const _SimdMember &() const { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator array<_Tp, _Np>() const { array<_Tp, _Np> __r; @@ -1198,8 +1222,11 @@ template <int _Np> // _SimdCastType {{{ struct _SimdCastType { + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType(const array<_Tp, _Np>&); + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator const _SimdMember &() const { return _M_data; } private: @@ -1237,7 +1264,7 @@ struct _CommonImplFixedSize // _SimdImplFixedSize {{{1 // fixed_size should not inherit from _SimdMathFallback in order for // specializations in the used _SimdTuple Abis to get used -template <int _Np> +template <int _Np, typename> struct _SimdImplFixedSize { // member types {{{2 @@ -1794,7 +1821,7 @@ template <int _Np> }; // _MaskImplFixedSize {{{1 -template <int _Np> +template <int _Np, typename> struct _MaskImplFixedSize { static_assert( diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h index 6f2ee3941b16648e0e0894fd27778fe8924d2763..5fe44adceb52e3f694dd7775a63dac2dc772fd85 100644 --- a/libstdc++-v3/include/experimental/bits/simd_math.h +++ b/libstdc++-v3/include/experimental/bits/simd_math.h @@ -60,6 +60,7 @@ template <typename _DoubleR, typename _Tp, typename _Abi> template <typename _Tp, typename _Abi, typename..., \ typename _R = _Math_return_type_t< \ decltype(std::__name(declval<double>())), _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t<is_floating_point_v<_Tp>, _R> \ __name(simd<_Tp, _Abi> __x) \ { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; } @@ -125,6 +126,7 @@ template < \ typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ typename _R = _Math_return_type_t< \ decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t<is_floating_point_v<_Tp>, _R> \ __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \ { \ @@ -155,6 +157,7 @@ template <typename _Tp, typename _Abi, typename..., \ decltype(std::__name(declval<double>(), _Arg2::declval(), \ _Arg3::declval())), \ _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t<is_floating_point_v<_Tp>, _R> \ __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \ const typename _Arg3::type& __z) \ @@ -399,6 +402,7 @@ template <typename _Abi> // }}} // __extract_exponent_as_int {{{ template <typename _Tp, typename _Abi> + _GLIBCXX_SIMD_INTRINSIC rebind_simd_t<int, simd<_Tp, _Abi>> __extract_exponent_as_int(const simd<_Tp, _Abi>& __v) { @@ -422,7 +426,8 @@ template <typename ImplFun, typename FallbackFun, typename... _Args> -> decltype(__impl_fun(static_cast<_Args&&>(__args)...)) { return __impl_fun(static_cast<_Args&&>(__args)...); } -template <typename ImplFun, typename FallbackFun, typename... _Args> +template <typename ImplFun, typename FallbackFun, typename... _Args, + typename = __detail::__odr_helper> inline auto __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun, _Args&&... __args) @@ -458,7 +463,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp) * Fix sign. */ // cos{{{ -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> cos(const simd<_Tp, _Abi>& __x) { @@ -504,7 +509,7 @@ template <typename _Tp> //}}} // sin{{{ -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> sin(const simd<_Tp, _Abi>& __x) { @@ -566,6 +571,7 @@ _GLIBCXX_SIMD_MATH_CALL_(expm1) // frexp {{{ #if _GLIBCXX_SIMD_X86INTRIN template <typename _Tp, size_t _Np> + _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np> __getexp(_SimdWrapper<_Tp, _Np> __x) { @@ -594,6 +600,7 @@ template <typename _Tp, size_t _Np> } template <typename _Tp, size_t _Np> + _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np> __getmant_avx512(_SimdWrapper<_Tp, _Np> __x) { @@ -634,7 +641,7 @@ template <typename _Tp, size_t _Np> * The return value will be in the range [0.5, 1.0[ * The @p __e value will be an integer defining the power-of-two exponent */ -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp) { @@ -739,7 +746,7 @@ _GLIBCXX_SIMD_MATH_CALL_(log2) //}}} // logb{{{ -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>> logb(const simd<_Tp, _Abi>& __x) { @@ -814,7 +821,7 @@ template <typename _Tp, typename _Abi> } //}}} -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr) { @@ -848,6 +855,7 @@ _GLIBCXX_SIMD_MATH_CALL_(fabs) // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to // allow signed integral _Tp template <typename _Tp, typename _Abi> + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>> abs(const simd<_Tp, _Abi>& __x) { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } @@ -930,7 +938,7 @@ template <typename _R, typename _ToApply, typename _Tp, typename... _Tps> __data(__args)...)}; } -template <typename _VV> +template <typename _VV, typename = __detail::__odr_helper> __remove_cvref_t<_VV> __hypot(_VV __x, _VV __y) { @@ -1069,7 +1077,7 @@ template <typename _Tp, typename _Abi> _GLIBCXX_SIMD_CVTING2(hypot) - template <typename _VV> + template <typename _VV, typename = __detail::__odr_helper> __remove_cvref_t<_VV> __hypot(_VV __x, _VV __y, _VV __z) { @@ -1271,7 +1279,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp) _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp) _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*) -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) { @@ -1303,12 +1311,14 @@ _GLIBCXX_SIMD_MATH_CALL_(isfinite) // `int isinf(double)`. template <typename _Tp, typename _Abi, typename..., typename _R = _Math_return_type_t<bool, _Tp, _Abi>> + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t<is_floating_point_v<_Tp>, _R> isinf(simd<_Tp, _Abi> __x) { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; } template <typename _Tp, typename _Abi, typename..., typename _R = _Math_return_type_t<bool, _Tp, _Abi>> + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t<is_floating_point_v<_Tp>, _R> isnan(simd<_Tp, _Abi> __x) { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; } @@ -1316,6 +1326,7 @@ template <typename _Tp, typename _Abi, typename..., _GLIBCXX_SIMD_MATH_CALL_(isnormal) template <typename..., typename _Tp, typename _Abi> + _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask<_Tp, _Abi> signbit(simd<_Tp, _Abi> __x) { @@ -1363,7 +1374,7 @@ simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer, */ // special math {{{ -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, @@ -1374,7 +1385,7 @@ template <typename _Tp, typename _Abi> }); } -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, @@ -1398,7 +1409,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp) _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp) _GLIBCXX_SIMD_MATH_CALL_(expint) -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const simd<_Tp, _Abi>& __x) @@ -1407,7 +1418,7 @@ template <typename _Tp, typename _Abi> [&](auto __i) { return std::hermite(__n[__i], __x[__i]); }); } -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const simd<_Tp, _Abi>& __x) @@ -1416,7 +1427,7 @@ template <typename _Tp, typename _Abi> [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); }); } -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const simd<_Tp, _Abi>& __x) @@ -1427,7 +1438,7 @@ template <typename _Tp, typename _Abi> _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta) -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const simd<_Tp, _Abi>& __x) @@ -1436,7 +1447,7 @@ template <typename _Tp, typename _Abi> [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); }); } -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l, const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, @@ -1447,7 +1458,7 @@ template <typename _Tp, typename _Abi> }); } -template <typename _Tp, typename _Abi> +template <typename _Tp, typename _Abi, typename = __detail::__odr_helper> enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, const simd<_Tp, _Abi>& __x) diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h index 5fa9fea108efbbbe618794308390a44839003f91..0945092940a9c5a024d48f98f38b5294c0856810 100644 --- a/libstdc++-v3/include/experimental/bits/simd_neon.h +++ b/libstdc++-v3/include/experimental/bits/simd_neon.h @@ -44,7 +44,7 @@ struct _CommonImplNeon : _CommonImplBuiltin // }}} // _SimdImplNeon {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _SimdImplNeon : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -390,7 +390,7 @@ struct _MaskImplNeonMixin // }}} // _MaskImplNeon {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi> { using _MaskImplBuiltinMixin::_S_to_maskvector; diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h index 4be3c2dde7cfb1558aece1ce80255488d486a604..a83d970a4575aa7f331bed06f1e19998e7eda22f 100644 --- a/libstdc++-v3/include/experimental/bits/simd_ppc.h +++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h @@ -35,7 +35,7 @@ _GLIBCXX_SIMD_BEGIN_NAMESPACE // _SimdImplPpc {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _SimdImplPpc : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -117,7 +117,7 @@ template <typename _Abi> // }}} // _MaskImplPpc {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _MaskImplPpc : _MaskImplBuiltin<_Abi> { using _Base = _MaskImplBuiltin<_Abi>; diff --git a/libstdc++-v3/include/experimental/bits/simd_scalar.h b/libstdc++-v3/include/experimental/bits/simd_scalar.h index 555143e6ffd861edf057ff3902378a8589118018..33c3cefc88dec9f0cdf0a0807b4318838137a17f 100644 --- a/libstdc++-v3/include/experimental/bits/simd_scalar.h +++ b/libstdc++-v3/include/experimental/bits/simd_scalar.h @@ -155,7 +155,8 @@ struct _SimdImplScalar // _S_masked_load {{{2 template <typename _Tp, typename _Up> - static inline _Tp _S_masked_load(_Tp __merge, bool __k, + _GLIBCXX_SIMD_INTRINSIC + static _Tp _S_masked_load(_Tp __merge, bool __k, const _Up* __mem) noexcept { if (__k) @@ -165,83 +166,97 @@ struct _SimdImplScalar // _S_store {{{2 template <typename _Tp, typename _Up> - static inline void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept + _GLIBCXX_SIMD_INTRINSIC + static void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept { __mem[0] = static_cast<_Up>(__v); } // _S_masked_store {{{2 template <typename _Tp, typename _Up> - static inline void _S_masked_store(const _Tp __v, _Up* __mem, + _GLIBCXX_SIMD_INTRINSIC + static void _S_masked_store(const _Tp __v, _Up* __mem, const bool __k) noexcept { if (__k) __mem[0] = __v; } // _S_negate {{{2 template <typename _Tp> - static constexpr inline bool _S_negate(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr bool _S_negate(_Tp __x) noexcept { return !__x; } // _S_reduce {{{2 template <typename _Tp, typename _BinaryOperation> - static constexpr inline _Tp + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_reduce(const simd<_Tp, simd_abi::scalar>& __x, const _BinaryOperation&) { return __x._M_data; } // _S_min, _S_max {{{2 template <typename _Tp> - static constexpr inline _Tp _S_min(const _Tp __a, const _Tp __b) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_min(const _Tp __a, const _Tp __b) { return std::min(__a, __b); } template <typename _Tp> - static constexpr inline _Tp _S_max(const _Tp __a, const _Tp __b) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_max(const _Tp __a, const _Tp __b) { return std::max(__a, __b); } // _S_complement {{{2 template <typename _Tp> - static constexpr inline _Tp _S_complement(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_complement(_Tp __x) noexcept { return static_cast<_Tp>(~__x); } // _S_unary_minus {{{2 template <typename _Tp> - static constexpr inline _Tp _S_unary_minus(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_unary_minus(_Tp __x) noexcept { return static_cast<_Tp>(-__x); } // arithmetic operators {{{2 template <typename _Tp> - static constexpr inline _Tp _S_plus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_plus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) + __promote_preserving_unsigned(__y)); } template <typename _Tp> - static constexpr inline _Tp _S_minus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_minus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) - __promote_preserving_unsigned(__y)); } template <typename _Tp> - static constexpr inline _Tp _S_multiplies(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_multiplies(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) * __promote_preserving_unsigned(__y)); } template <typename _Tp> - static constexpr inline _Tp _S_divides(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_divides(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) / __promote_preserving_unsigned(__y)); } template <typename _Tp> - static constexpr inline _Tp _S_modulus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_modulus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) % __promote_preserving_unsigned(__y)); } template <typename _Tp> - static constexpr inline _Tp _S_bit_and(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_and(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -254,7 +269,8 @@ struct _SimdImplScalar } template <typename _Tp> - static constexpr inline _Tp _S_bit_or(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_or(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -267,7 +283,8 @@ struct _SimdImplScalar } template <typename _Tp> - static constexpr inline _Tp _S_bit_xor(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_xor(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -280,11 +297,13 @@ struct _SimdImplScalar } template <typename _Tp> - static constexpr inline _Tp _S_bit_shift_left(_Tp __x, int __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_shift_left(_Tp __x, int __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) << __y); } template <typename _Tp> - static constexpr inline _Tp _S_bit_shift_right(_Tp __x, int __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_shift_right(_Tp __x, int __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) >> __y); } // math {{{2 @@ -553,11 +572,13 @@ struct _SimdImplScalar // _S_increment & _S_decrement{{{2 template <typename _Tp> - constexpr static inline void _S_increment(_Tp& __x) + _GLIBCXX_SIMD_INTRINSIC + constexpr static void _S_increment(_Tp& __x) { ++__x; } template <typename _Tp> - constexpr static inline void _S_decrement(_Tp& __x) + _GLIBCXX_SIMD_INTRINSIC + constexpr static void _S_decrement(_Tp& __x) { --__x; } @@ -582,6 +603,7 @@ struct _SimdImplScalar // smart_reference access {{{2 template <typename _Tp, typename _Up> + _GLIBCXX_SIMD_INTRINSIC constexpr static void _S_set(_Tp& __v, [[maybe_unused]] int __i, _Up&& __x) noexcept { @@ -677,25 +699,32 @@ struct _MaskImplScalar } // logical and bitwise operators {{{2 + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_logical_and(bool __x, bool __y) { return __x && __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_logical_or(bool __x, bool __y) { return __x || __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_not(bool __x) { return !__x; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_and(bool __x, bool __y) { return __x && __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_or(bool __x, bool __y) { return __x || __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_xor(bool __x, bool __y) { return __x != __y; } // smart_reference access {{{2 + _GLIBCXX_SIMD_INTRINSIC constexpr static void _S_set(bool& __k, [[maybe_unused]] int __i, bool __x) noexcept { diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index 8d493d87aa3b545879eb9e696eb2c5d74aa57888..baa978bffdafc2d378365da979d177ef1a71825c 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -822,7 +822,7 @@ struct _CommonImplX86 : _CommonImplBuiltin // }}} // _SimdImplX86 {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _SimdImplX86 : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -4241,7 +4241,7 @@ struct _MaskImplX86Mixin // }}} // _MaskImplX86 {{{ -template <typename _Abi> +template <typename _Abi, typename> struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi> { using _MaskImplX86Mixin::_S_to_bits;