From 400453c334a97cd14e331b2a2d8a28c57a68b1a8 Mon Sep 17 00:00:00 2001 From: Christophe Lyon <christophe.lyon@arm.com> Date: Mon, 13 Feb 2023 21:05:56 +0000 Subject: [PATCH] arm: [MVE intrinsics] rework vmaxnmavq vmaxnmvq vminnmavq vminnmvq Implement vmaxnmavq vmaxnmvq vminnmavq vminnmvq using the new MVE builtins framework. 2022-09-08 Christophe Lyon <christophe.lyon@arm.com> gcc/ * config/arm/arm-mve-builtins-base.cc (FUNCTION_PRED_P_F): New. (vmaxnmavq, vmaxnmvq, vminnmavq, vminnmvq): New. * config/arm/arm-mve-builtins-base.def (vmaxnmavq, vmaxnmvq) (vminnmavq, vminnmvq): New. * config/arm/arm-mve-builtins-base.h (vmaxnmavq, vmaxnmvq) (vminnmavq, vminnmvq): New. * config/arm/arm_mve.h (vminnmvq): Remove. (vminnmavq): Remove. (vmaxnmvq): Remove. (vmaxnmavq): Remove. (vmaxnmavq_p): Remove. (vmaxnmvq_p): Remove. (vminnmavq_p): Remove. (vminnmvq_p): Remove. (vminnmvq_f16): Remove. (vminnmavq_f16): Remove. (vmaxnmvq_f16): Remove. (vmaxnmavq_f16): Remove. (vminnmvq_f32): Remove. (vminnmavq_f32): Remove. (vmaxnmvq_f32): Remove. (vmaxnmavq_f32): Remove. (vmaxnmavq_p_f16): Remove. (vmaxnmvq_p_f16): Remove. (vminnmavq_p_f16): Remove. (vminnmvq_p_f16): Remove. (vmaxnmavq_p_f32): Remove. (vmaxnmvq_p_f32): Remove. (vminnmavq_p_f32): Remove. (vminnmvq_p_f32): Remove. (__arm_vminnmvq_f16): Remove. (__arm_vminnmavq_f16): Remove. (__arm_vmaxnmvq_f16): Remove. (__arm_vmaxnmavq_f16): Remove. (__arm_vminnmvq_f32): Remove. (__arm_vminnmavq_f32): Remove. (__arm_vmaxnmvq_f32): Remove. (__arm_vmaxnmavq_f32): Remove. (__arm_vmaxnmavq_p_f16): Remove. (__arm_vmaxnmvq_p_f16): Remove. (__arm_vminnmavq_p_f16): Remove. (__arm_vminnmvq_p_f16): Remove. (__arm_vmaxnmavq_p_f32): Remove. (__arm_vmaxnmvq_p_f32): Remove. (__arm_vminnmavq_p_f32): Remove. (__arm_vminnmvq_p_f32): Remove. (__arm_vminnmvq): Remove. (__arm_vminnmavq): Remove. (__arm_vmaxnmvq): Remove. (__arm_vmaxnmavq): Remove. (__arm_vmaxnmavq_p): Remove. (__arm_vmaxnmvq_p): Remove. (__arm_vminnmavq_p): Remove. (__arm_vminnmvq_p): Remove. (__arm_vmaxnmavq_m): Remove. (__arm_vmaxnmvq_m): Remove. --- gcc/config/arm/arm-mve-builtins-base.cc | 10 + gcc/config/arm/arm-mve-builtins-base.def | 4 + gcc/config/arm/arm-mve-builtins-base.h | 4 + gcc/config/arm/arm_mve.h | 314 ----------------------- 4 files changed, 18 insertions(+), 314 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index dcbd1906563d..af00d0707392 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -225,6 +225,12 @@ namespace arm_mve { (UNSPEC##_S, UNSPEC##_U, -1, \ UNSPEC##_P_S, UNSPEC##_P_U, -1)) + /* Helper for builtins without RTX codes, _F mode, _p predicated. */ +#define FUNCTION_PRED_P_F(NAME, UNSPEC) FUNCTION \ + (NAME, unspec_mve_function_exact_insn_pred_p, \ + (-1, -1, UNSPEC##_F, \ + -1, -1, UNSPEC##_P_F)) + FUNCTION_WITHOUT_N (vabdq, VABDQ) FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1)) FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ) @@ -236,11 +242,15 @@ FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ) FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ) FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ) FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ) +FUNCTION_PRED_P_F (vmaxnmavq, VMAXNMAVQ) FUNCTION (vmaxnmq, unspec_based_mve_function_exact_insn, (UNKNOWN, UNKNOWN, SMAX, -1, -1, -1, -1, -1, VMAXNMQ_M_F, -1, -1, -1)) +FUNCTION_PRED_P_F (vmaxnmvq, VMAXNMVQ) FUNCTION_WITH_RTX_M_NO_F (vmaxq, SMAX, UMAX, VMAXQ) FUNCTION_PRED_P_S_U (vmaxvq, VMAXVQ) FUNCTION_PRED_P_S (vminavq, VMINAVQ) +FUNCTION_PRED_P_F (vminnmavq, VMINNMAVQ) FUNCTION (vminnmq, unspec_based_mve_function_exact_insn, (UNKNOWN, UNKNOWN, SMIN, -1, -1, -1, -1, -1, VMINNMQ_M_F, -1, -1, -1)) +FUNCTION_PRED_P_F (vminnmvq, VMINNMVQ) FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ) FUNCTION_PRED_P_S_U (vminvq, VMINVQ) FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index c2155bafeb33..19ac75c8f2e1 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -86,8 +86,12 @@ DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none) DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcreateq, create, all_float, none) DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none) DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vmaxnmvq, binary_maxvminv, all_float, p_or_none) +DEF_MVE_FUNCTION (vminnmavq, binary_maxvminv, all_float, p_or_none) DEF_MVE_FUNCTION (vminnmq, binary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vminnmvq, binary_maxvminv, all_float, p_or_none) DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_float, mx_or_none) DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 0290ee72b4c7..dc413fc63df5 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -34,11 +34,15 @@ extern const function_base *const veorq; extern const function_base *const vhaddq; extern const function_base *const vhsubq; extern const function_base *const vmaxavq; +extern const function_base *const vmaxnmavq; extern const function_base *const vmaxnmq; +extern const function_base *const vmaxnmvq; extern const function_base *const vmaxq; extern const function_base *const vmaxvq; extern const function_base *const vminavq; +extern const function_base *const vminnmavq; extern const function_base *const vminnmq; +extern const function_base *const vminnmvq; extern const function_base *const vminq; extern const function_base *const vminvq; extern const function_base *const vmovnbq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 12e77eee11e1..ac0e5801d0c3 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -308,11 +308,7 @@ #define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a) #define vcvtq(__a) __arm_vcvtq(__a) #define vcvtq_n(__a, __imm6) __arm_vcvtq_n(__a, __imm6) -#define vminnmvq(__a, __b) __arm_vminnmvq(__a, __b) -#define vminnmavq(__a, __b) __arm_vminnmavq(__a, __b) #define vminnmaq(__a, __b) __arm_vminnmaq(__a, __b) -#define vmaxnmvq(__a, __b) __arm_vmaxnmvq(__a, __b) -#define vmaxnmavq(__a, __b) __arm_vmaxnmavq(__a, __b) #define vmaxnmaq(__a, __b) __arm_vmaxnmaq(__a, __b) #define vcmulq_rot90(__a, __b) __arm_vcmulq_rot90(__a, __b) #define vcmulq_rot270(__a, __b) __arm_vcmulq_rot270(__a, __b) @@ -333,11 +329,7 @@ #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p) #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p) #define vmaxnmaq_m(__a, __b, __p) __arm_vmaxnmaq_m(__a, __b, __p) -#define vmaxnmavq_p(__a, __b, __p) __arm_vmaxnmavq_p(__a, __b, __p) -#define vmaxnmvq_p(__a, __b, __p) __arm_vmaxnmvq_p(__a, __b, __p) #define vminnmaq_m(__a, __b, __p) __arm_vminnmaq_m(__a, __b, __p) -#define vminnmavq_p(__a, __b, __p) __arm_vminnmavq_p(__a, __b, __p) -#define vminnmvq_p(__a, __b, __p) __arm_vminnmvq_p(__a, __b, __p) #define vcvtq_m_n(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n(__inactive, __a, __imm6, __p) #define vcmlaq_m(__a, __b, __c, __p) __arm_vcmlaq_m(__a, __b, __c, __p) #define vcmlaq_rot180_m(__a, __b, __c, __p) __arm_vcmlaq_rot180_m(__a, __b, __c, __p) @@ -627,11 +619,7 @@ #define vmlsldavq_s16(__a, __b) __arm_vmlsldavq_s16(__a, __b) #define vmlaldavxq_s16(__a, __b) __arm_vmlaldavxq_s16(__a, __b) #define vmlaldavq_s16(__a, __b) __arm_vmlaldavq_s16(__a, __b) -#define vminnmvq_f16(__a, __b) __arm_vminnmvq_f16(__a, __b) -#define vminnmavq_f16(__a, __b) __arm_vminnmavq_f16(__a, __b) #define vminnmaq_f16(__a, __b) __arm_vminnmaq_f16(__a, __b) -#define vmaxnmvq_f16(__a, __b) __arm_vmaxnmvq_f16(__a, __b) -#define vmaxnmavq_f16(__a, __b) __arm_vmaxnmavq_f16(__a, __b) #define vmaxnmaq_f16(__a, __b) __arm_vmaxnmaq_f16(__a, __b) #define vcmulq_rot90_f16(__a, __b) __arm_vcmulq_rot90_f16(__a, __b) #define vcmulq_rot270_f16(__a, __b) __arm_vcmulq_rot270_f16(__a, __b) @@ -666,11 +654,7 @@ #define vmlsldavq_s32(__a, __b) __arm_vmlsldavq_s32(__a, __b) #define vmlaldavxq_s32(__a, __b) __arm_vmlaldavxq_s32(__a, __b) #define vmlaldavq_s32(__a, __b) __arm_vmlaldavq_s32(__a, __b) -#define vminnmvq_f32(__a, __b) __arm_vminnmvq_f32(__a, __b) -#define vminnmavq_f32(__a, __b) __arm_vminnmavq_f32(__a, __b) #define vminnmaq_f32(__a, __b) __arm_vminnmaq_f32(__a, __b) -#define vmaxnmvq_f32(__a, __b) __arm_vmaxnmvq_f32(__a, __b) -#define vmaxnmavq_f32(__a, __b) __arm_vmaxnmavq_f32(__a, __b) #define vmaxnmaq_f32(__a, __b) __arm_vmaxnmaq_f32(__a, __b) #define vcmulq_rot90_f32(__a, __b) __arm_vcmulq_rot90_f32(__a, __b) #define vcmulq_rot270_f32(__a, __b) __arm_vcmulq_rot270_f32(__a, __b) @@ -945,11 +929,7 @@ #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p) #define vdupq_m_n_f16(__inactive, __a, __p) __arm_vdupq_m_n_f16(__inactive, __a, __p) #define vmaxnmaq_m_f16(__a, __b, __p) __arm_vmaxnmaq_m_f16(__a, __b, __p) -#define vmaxnmavq_p_f16(__a, __b, __p) __arm_vmaxnmavq_p_f16(__a, __b, __p) -#define vmaxnmvq_p_f16(__a, __b, __p) __arm_vmaxnmvq_p_f16(__a, __b, __p) #define vminnmaq_m_f16(__a, __b, __p) __arm_vminnmaq_m_f16(__a, __b, __p) -#define vminnmavq_p_f16(__a, __b, __p) __arm_vminnmavq_p_f16(__a, __b, __p) -#define vminnmvq_p_f16(__a, __b, __p) __arm_vminnmvq_p_f16(__a, __b, __p) #define vmlaldavq_p_s16(__a, __b, __p) __arm_vmlaldavq_p_s16(__a, __b, __p) #define vmlaldavxq_p_s16(__a, __b, __p) __arm_vmlaldavxq_p_s16(__a, __b, __p) #define vmlsldavq_p_s16(__a, __b, __p) __arm_vmlsldavq_p_s16(__a, __b, __p) @@ -999,11 +979,7 @@ #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p) #define vdupq_m_n_f32(__inactive, __a, __p) __arm_vdupq_m_n_f32(__inactive, __a, __p) #define vmaxnmaq_m_f32(__a, __b, __p) __arm_vmaxnmaq_m_f32(__a, __b, __p) -#define vmaxnmavq_p_f32(__a, __b, __p) __arm_vmaxnmavq_p_f32(__a, __b, __p) -#define vmaxnmvq_p_f32(__a, __b, __p) __arm_vmaxnmvq_p_f32(__a, __b, __p) #define vminnmaq_m_f32(__a, __b, __p) __arm_vminnmaq_m_f32(__a, __b, __p) -#define vminnmavq_p_f32(__a, __b, __p) __arm_vminnmavq_p_f32(__a, __b, __p) -#define vminnmvq_p_f32(__a, __b, __p) __arm_vminnmvq_p_f32(__a, __b, __p) #define vmlaldavq_p_s32(__a, __b, __p) __arm_vmlaldavq_p_s32(__a, __b, __p) #define vmlaldavxq_p_s32(__a, __b, __p) __arm_vmlaldavxq_p_s32(__a, __b, __p) #define vmlsldavq_p_s32(__a, __b, __p) __arm_vmlsldavq_p_s32(__a, __b, __p) @@ -10002,20 +9978,6 @@ __arm_vornq_f16 (float16x8_t __a, float16x8_t __b) return __builtin_mve_vornq_fv8hf (__a, __b); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_f16 (float16_t __a, float16x8_t __b) -{ - return __builtin_mve_vminnmvq_fv8hf (__a, __b); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_f16 (float16_t __a, float16x8_t __b) -{ - return __builtin_mve_vminnmavq_fv8hf (__a, __b); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_f16 (float16x8_t __a, float16x8_t __b) @@ -10023,20 +9985,6 @@ __arm_vminnmaq_f16 (float16x8_t __a, float16x8_t __b) return __builtin_mve_vminnmaq_fv8hf (__a, __b); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_f16 (float16_t __a, float16x8_t __b) -{ - return __builtin_mve_vmaxnmvq_fv8hf (__a, __b); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_f16 (float16_t __a, float16x8_t __b) -{ - return __builtin_mve_vmaxnmavq_fv8hf (__a, __b); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmaxnmaq_f16 (float16x8_t __a, float16x8_t __b) @@ -10184,20 +10132,6 @@ __arm_vornq_f32 (float32x4_t __a, float32x4_t __b) return __builtin_mve_vornq_fv4sf (__a, __b); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_f32 (float32_t __a, float32x4_t __b) -{ - return __builtin_mve_vminnmvq_fv4sf (__a, __b); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_f32 (float32_t __a, float32x4_t __b) -{ - return __builtin_mve_vminnmavq_fv4sf (__a, __b); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_f32 (float32x4_t __a, float32x4_t __b) @@ -10205,20 +10139,6 @@ __arm_vminnmaq_f32 (float32x4_t __a, float32x4_t __b) return __builtin_mve_vminnmaq_fv4sf (__a, __b); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_f32 (float32_t __a, float32x4_t __b) -{ - return __builtin_mve_vmaxnmvq_fv4sf (__a, __b); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_f32 (float32_t __a, float32x4_t __b) -{ - return __builtin_mve_vmaxnmavq_fv4sf (__a, __b); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmaxnmaq_f32 (float32x4_t __a, float32x4_t __b) @@ -10493,20 +10413,6 @@ __arm_vmaxnmaq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) return __builtin_mve_vmaxnmaq_m_fv8hf (__a, __b, __p); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_p_f16 (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmaxnmavq_p_fv8hf (__a, __b, __p); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_p_f16 (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmaxnmvq_p_fv8hf (__a, __b, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) @@ -10514,20 +10420,6 @@ __arm_vminnmaq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) return __builtin_mve_vminnmaq_m_fv8hf (__a, __b, __p); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_p_f16 (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vminnmavq_p_fv8hf (__a, __b, __p); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_p_f16 (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vminnmvq_p_fv8hf (__a, __b, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpselq_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) @@ -10745,20 +10637,6 @@ __arm_vmaxnmaq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) return __builtin_mve_vmaxnmaq_m_fv4sf (__a, __b, __p); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_p_f32 (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmaxnmavq_p_fv4sf (__a, __b, __p); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_p_f32 (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmaxnmvq_p_fv4sf (__a, __b, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) @@ -10766,20 +10644,6 @@ __arm_vminnmaq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) return __builtin_mve_vminnmaq_m_fv4sf (__a, __b, __p); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_p_f32 (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vminnmavq_p_fv4sf (__a, __b, __p); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_p_f32 (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vminnmvq_p_fv4sf (__a, __b, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpselq_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) @@ -19166,20 +19030,6 @@ __arm_vornq (float16x8_t __a, float16x8_t __b) return __arm_vornq_f16 (__a, __b); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq (float16_t __a, float16x8_t __b) -{ - return __arm_vminnmvq_f16 (__a, __b); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq (float16_t __a, float16x8_t __b) -{ - return __arm_vminnmavq_f16 (__a, __b); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq (float16x8_t __a, float16x8_t __b) @@ -19187,20 +19037,6 @@ __arm_vminnmaq (float16x8_t __a, float16x8_t __b) return __arm_vminnmaq_f16 (__a, __b); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq (float16_t __a, float16x8_t __b) -{ - return __arm_vmaxnmvq_f16 (__a, __b); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq (float16_t __a, float16x8_t __b) -{ - return __arm_vmaxnmavq_f16 (__a, __b); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmaxnmaq (float16x8_t __a, float16x8_t __b) @@ -19348,20 +19184,6 @@ __arm_vornq (float32x4_t __a, float32x4_t __b) return __arm_vornq_f32 (__a, __b); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq (float32_t __a, float32x4_t __b) -{ - return __arm_vminnmvq_f32 (__a, __b); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq (float32_t __a, float32x4_t __b) -{ - return __arm_vminnmavq_f32 (__a, __b); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq (float32x4_t __a, float32x4_t __b) @@ -19369,20 +19191,6 @@ __arm_vminnmaq (float32x4_t __a, float32x4_t __b) return __arm_vminnmaq_f32 (__a, __b); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq (float32_t __a, float32x4_t __b) -{ - return __arm_vmaxnmvq_f32 (__a, __b); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq (float32_t __a, float32x4_t __b) -{ - return __arm_vmaxnmavq_f32 (__a, __b); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmaxnmaq (float32x4_t __a, float32x4_t __b) @@ -19642,20 +19450,6 @@ __arm_vmaxnmaq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) return __arm_vmaxnmaq_m_f16 (__a, __b, __p); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_p (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmaxnmavq_p_f16 (__a, __b, __p); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_p (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmaxnmvq_p_f16 (__a, __b, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) @@ -19663,20 +19457,6 @@ __arm_vminnmaq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) return __arm_vminnmaq_m_f16 (__a, __b, __p); } -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_p (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __arm_vminnmavq_p_f16 (__a, __b, __p); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_p (float16_t __a, float16x8_t __b, mve_pred16_t __p) -{ - return __arm_vminnmvq_p_f16 (__a, __b, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpselq (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) @@ -19894,20 +19674,6 @@ __arm_vmaxnmaq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) return __arm_vmaxnmaq_m_f32 (__a, __b, __p); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmavq_p (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmaxnmavq_p_f32 (__a, __b, __p); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmaxnmvq_p (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmaxnmvq_p_f32 (__a, __b, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vminnmaq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) @@ -19915,20 +19681,6 @@ __arm_vminnmaq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) return __arm_vminnmaq_m_f32 (__a, __b, __p); } -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmavq_p (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vminnmavq_p_f32 (__a, __b, __p); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vminnmvq_p (float32_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vminnmvq_p_f32 (__a, __b, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpselq (float32x4_t __a, float32x4_t __b, mve_pred16_t __p) @@ -21372,36 +21124,12 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) -#define __arm_vmaxnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) - -#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) - -#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) - #define __arm_vminnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));}) -#define __arm_vminnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) - #define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ @@ -21413,12 +21141,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));}) -#define __arm_vminnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) - #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ @@ -21890,48 +21612,12 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) -#define __arm_vmaxnmavq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#define __arm_vmaxnmvq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#define __arm_vmaxnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#define __arm_vmaxnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - #define __arm_vminnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) -#define __arm_vminnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#define __arm_vminnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - #define __arm_vrev64q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ -- GitLab