From 218881ac83c9e0cb80a21a34a829c44a51d1a34a Mon Sep 17 00:00:00 2001 From: Christophe Lyon <christophe.lyon@linaro.org> Date: Tue, 29 Oct 2024 10:34:23 +0000 Subject: [PATCH] arm: [MVE intrinsics] rework vldr gather_offset Implement vldr?q_gather_offset using the new MVE builtins framework. The patch introduces a new attribute iterator (MVE_u_elem) to accomodate the fact that ACLE's expected output description uses "uNN" for all modes, except V8HF where it expects ".f16". Using "V_sz_elem" would work, but would require to update several testcases. gcc/ChangeLog: * config/arm/arm-mve-builtins-base.cc (class vldrq_gather_impl): New. (vldrbq_gather, vldrdq_gather, vldrhq_gather, vldrwq_gather): New. * config/arm/arm-mve-builtins-base.def (vldrbq_gather) (vldrdq_gather, vldrhq_gather, vldrwq_gather): New. * config/arm/arm-mve-builtins-base.h (vldrbq_gather) (vldrdq_gather, vldrhq_gather, vldrwq_gather): New. * config/arm/arm_mve.h (vldrbq_gather_offset): Delete. (vldrbq_gather_offset_z): Delete. (vldrhq_gather_offset): Delete. (vldrhq_gather_offset_z): Delete. (vldrdq_gather_offset): Delete. (vldrdq_gather_offset_z): Delete. (vldrwq_gather_offset): Delete. (vldrwq_gather_offset_z): Delete. (vldrbq_gather_offset_u8): Delete. (vldrbq_gather_offset_s8): Delete. (vldrbq_gather_offset_u16): Delete. (vldrbq_gather_offset_s16): Delete. (vldrbq_gather_offset_u32): Delete. (vldrbq_gather_offset_s32): Delete. (vldrbq_gather_offset_z_s16): Delete. (vldrbq_gather_offset_z_u8): Delete. (vldrbq_gather_offset_z_s32): Delete. (vldrbq_gather_offset_z_u16): Delete. (vldrbq_gather_offset_z_u32): Delete. (vldrbq_gather_offset_z_s8): Delete. (vldrhq_gather_offset_s32): Delete. (vldrhq_gather_offset_s16): Delete. (vldrhq_gather_offset_u32): Delete. (vldrhq_gather_offset_u16): Delete. (vldrhq_gather_offset_z_s32): Delete. (vldrhq_gather_offset_z_s16): Delete. (vldrhq_gather_offset_z_u32): Delete. (vldrhq_gather_offset_z_u16): Delete. (vldrdq_gather_offset_s64): Delete. (vldrdq_gather_offset_u64): Delete. (vldrdq_gather_offset_z_s64): Delete. (vldrdq_gather_offset_z_u64): Delete. (vldrhq_gather_offset_f16): Delete. (vldrhq_gather_offset_z_f16): Delete. (vldrwq_gather_offset_f32): Delete. (vldrwq_gather_offset_s32): Delete. (vldrwq_gather_offset_u32): Delete. (vldrwq_gather_offset_z_f32): Delete. (vldrwq_gather_offset_z_s32): Delete. (vldrwq_gather_offset_z_u32): Delete. (__arm_vldrbq_gather_offset_u8): Delete. (__arm_vldrbq_gather_offset_s8): Delete. (__arm_vldrbq_gather_offset_u16): Delete. (__arm_vldrbq_gather_offset_s16): Delete. (__arm_vldrbq_gather_offset_u32): Delete. (__arm_vldrbq_gather_offset_s32): Delete. (__arm_vldrbq_gather_offset_z_s8): Delete. (__arm_vldrbq_gather_offset_z_s32): Delete. (__arm_vldrbq_gather_offset_z_s16): Delete. (__arm_vldrbq_gather_offset_z_u8): Delete. (__arm_vldrbq_gather_offset_z_u32): Delete. (__arm_vldrbq_gather_offset_z_u16): Delete. (__arm_vldrhq_gather_offset_s32): Delete. (__arm_vldrhq_gather_offset_s16): Delete. (__arm_vldrhq_gather_offset_u32): Delete. (__arm_vldrhq_gather_offset_u16): Delete. (__arm_vldrhq_gather_offset_z_s32): Delete. (__arm_vldrhq_gather_offset_z_s16): Delete. (__arm_vldrhq_gather_offset_z_u32): Delete. (__arm_vldrhq_gather_offset_z_u16): Delete. (__arm_vldrdq_gather_offset_s64): Delete. (__arm_vldrdq_gather_offset_u64): Delete. (__arm_vldrdq_gather_offset_z_s64): Delete. (__arm_vldrdq_gather_offset_z_u64): Delete. (__arm_vldrwq_gather_offset_s32): Delete. (__arm_vldrwq_gather_offset_u32): Delete. (__arm_vldrwq_gather_offset_z_s32): Delete. (__arm_vldrwq_gather_offset_z_u32): Delete. (__arm_vldrhq_gather_offset_f16): Delete. (__arm_vldrhq_gather_offset_z_f16): Delete. (__arm_vldrwq_gather_offset_f32): Delete. (__arm_vldrwq_gather_offset_z_f32): Delete. (__arm_vldrbq_gather_offset): Delete. (__arm_vldrbq_gather_offset_z): Delete. (__arm_vldrhq_gather_offset): Delete. (__arm_vldrhq_gather_offset_z): Delete. (__arm_vldrdq_gather_offset): Delete. (__arm_vldrdq_gather_offset_z): Delete. (__arm_vldrwq_gather_offset): Delete. (__arm_vldrwq_gather_offset_z): Delete. * config/arm/arm_mve_builtins.def (vldrbq_gather_offset_u) (vldrbq_gather_offset_s, vldrbq_gather_offset_z_s) (vldrbq_gather_offset_z_u, vldrhq_gather_offset_z_u) (vldrhq_gather_offset_u, vldrhq_gather_offset_z_s) (vldrhq_gather_offset_s, vldrdq_gather_offset_s) (vldrhq_gather_offset_f, vldrwq_gather_offset_f) (vldrwq_gather_offset_s, vldrdq_gather_offset_z_s) (vldrhq_gather_offset_z_f, vldrwq_gather_offset_z_f) (vldrwq_gather_offset_z_s, vldrdq_gather_offset_u) (vldrwq_gather_offset_u, vldrdq_gather_offset_z_u) (vldrwq_gather_offset_z_u): Delete. * config/arm/iterators.md (MVE_u_elem): New. (supf): Remove VLDRBQGO_S, VLDRBQGO_U, VLDRHQGO_S, VLDRHQGO_U, VLDRDQGO_S, VLDRDQGO_U, VLDRWQGO_S, VLDRWQGO_U. (VLDRBGOQ, VLDRHGOQ, VLDRDGOQ, VLDRWGOQ): Delete. * config/arm/mve.md (mve_vldrbq_gather_offset_<supf><mode>): Delete. (mve_vldrbq_gather_offset_z_<supf><mode>): Delete. (mve_vldrhq_gather_offset_<supf><mode>): Delete. (mve_vldrhq_gather_offset_z_<supf><mode>): Delete. (mve_vldrdq_gather_offset_<supf>v2di): Delete. (mve_vldrdq_gather_offset_z_<supf>v2di): Delete. (mve_vldrhq_gather_offset_fv8hf): Delete. (mve_vldrhq_gather_offset_z_fv8hf): Delete. (mve_vldrwq_gather_offset_fv4sf): Delete. (mve_vldrwq_gather_offset_<supf>v4si): Delete. (mve_vldrwq_gather_offset_z_fv4sf): Delete. (mve_vldrwq_gather_offset_z_<supf>v4si): Delete. (@mve_vldrq_gather_offset_<mode>): New. (@mve_vldrq_gather_offset_extend_<mode><US>): New. (@mve_vldrq_gather_offset_z_<mode>): New. (@mve_vldrq_gather_offset_z_extend_<mode><US>): New. * config/arm/unspecs.md (VLDRBQGO_S, VLDRBQGO_U, VLDRHQGO_S) (VLDRHQGO_U, VLDRDQGO_S, VLDRDQGO_U, VLDRHQGO_F, VLDRWQGO_F) (VLDRWQGO_S, VLDRWQGO_U): Delete. (VLDRGOQ, VLDRGOQ_Z, VLDRGOQ_EXT, VLDRGOQ_EXT_Z): New. --- gcc/config/arm/arm-mve-builtins-base.cc | 47 ++ gcc/config/arm/arm-mve-builtins-base.def | 6 + gcc/config/arm/arm-mve-builtins-base.h | 4 + gcc/config/arm/arm_mve.h | 576 ----------------------- gcc/config/arm/arm_mve_builtins.def | 20 - gcc/config/arm/iterators.md | 24 +- gcc/config/arm/mve.md | 349 ++++---------- gcc/config/arm/unspecs.md | 14 +- 8 files changed, 156 insertions(+), 884 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 27e31d6c8cdf..a4d498d534bc 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -411,6 +411,49 @@ public: } }; +/* Builds the vldrq_gather*offset intrinsics. */ +class vldrq_gather_impl : public load_extending +{ +public: + using load_extending::load_extending; + + rtx expand (function_expander &e) const override + { + insn_code icode; + switch (e.pred) + { + case PRED_none: + if (e.vector_mode (0) == e.memory_vector_mode ()) + /* Non-extending load case. */ + icode = code_for_mve_vldrq_gather_offset (e.vector_mode (0)); + else + /* Extending load case. */ + icode = code_for_mve_vldrq_gather_offset_extend + (e.memory_vector_mode (), + e.type_suffix (0).unsigned_p + ? ZERO_EXTEND + : SIGN_EXTEND); + break; + + case PRED_z: + if (e.vector_mode (0) == e.memory_vector_mode ()) + icode = code_for_mve_vldrq_gather_offset_z (e.vector_mode (0)); + else + icode = code_for_mve_vldrq_gather_offset_z_extend + (e.memory_vector_mode (), + e.type_suffix (0).unsigned_p + ? ZERO_EXTEND + : SIGN_EXTEND); + break; + + default: + gcc_unreachable (); + } + + return e.use_exact_insn (icode); + } +}; + /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics. */ class vctpq_impl : public function_base { @@ -1208,8 +1251,12 @@ FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ) FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ) FUNCTION (vld1q, vld1_impl,) FUNCTION (vldrbq, vldrq_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8)) +FUNCTION (vldrbq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8)) +FUNCTION (vldrdq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES)) FUNCTION (vldrhq, vldrq_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16)) +FUNCTION (vldrhq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16)) FUNCTION (vldrwq, vldrq_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) +FUNCTION (vldrwq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32)) FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ) FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ) FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index a56fae7414ee..5e30a27ae74d 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -60,8 +60,12 @@ DEF_MVE_FUNCTION (vidupq, viddup, all_unsigned, mx_or_none) DEF_MVE_FUNCTION (viwdupq, vidwdup, all_unsigned, mx_or_none) DEF_MVE_FUNCTION (vld1q, load, all_integer, z_or_none) DEF_MVE_FUNCTION (vldrbq, load_ext, all_integer, z_or_none) +DEF_MVE_FUNCTION (vldrbq_gather, load_ext_gather_offset, all_integer, z_or_none) +DEF_MVE_FUNCTION (vldrdq_gather, load_ext_gather_offset, integer_64, z_or_none) DEF_MVE_FUNCTION (vldrhq, load_ext, integer_16_32, z_or_none) +DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, integer_16_32, z_or_none) DEF_MVE_FUNCTION (vldrwq, load_ext, integer_32, z_or_none) +DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, integer_32, z_or_none) DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none) DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none) DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none) @@ -226,7 +230,9 @@ DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none) DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none) DEF_MVE_FUNCTION (vld1q, load, all_float, z_or_none) DEF_MVE_FUNCTION (vldrhq, load_ext, float_16, z_or_none) +DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, float_16, z_or_none) DEF_MVE_FUNCTION (vldrwq, load_ext, float_32, z_or_none) +DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, float_32, z_or_none) DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none) DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none) DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 261248086dc3..88fcff3d577b 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -83,8 +83,12 @@ extern const function_base *const vidupq; extern const function_base *const viwdupq; extern const function_base *const vld1q; extern const function_base *const vldrbq; +extern const function_base *const vldrbq_gather; +extern const function_base *const vldrdq_gather; extern const function_base *const vldrhq; +extern const function_base *const vldrhq_gather; extern const function_base *const vldrwq; +extern const function_base *const vldrwq_gather; extern const function_base *const vmaxaq; extern const function_base *const vmaxavq; extern const function_base *const vmaxnmaq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 26846d1d20b9..b9df10d8d561 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -46,18 +46,10 @@ #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE #define vst4q(__addr, __value) __arm_vst4q(__addr, __value) -#define vldrbq_gather_offset(__base, __offset) __arm_vldrbq_gather_offset(__base, __offset) -#define vldrbq_gather_offset_z(__base, __offset, __p) __arm_vldrbq_gather_offset_z(__base, __offset, __p) -#define vldrhq_gather_offset(__base, __offset) __arm_vldrhq_gather_offset(__base, __offset) -#define vldrhq_gather_offset_z(__base, __offset, __p) __arm_vldrhq_gather_offset_z(__base, __offset, __p) #define vldrhq_gather_shifted_offset(__base, __offset) __arm_vldrhq_gather_shifted_offset(__base, __offset) #define vldrhq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z(__base, __offset, __p) -#define vldrdq_gather_offset(__base, __offset) __arm_vldrdq_gather_offset(__base, __offset) -#define vldrdq_gather_offset_z(__base, __offset, __p) __arm_vldrdq_gather_offset_z(__base, __offset, __p) #define vldrdq_gather_shifted_offset(__base, __offset) __arm_vldrdq_gather_shifted_offset(__base, __offset) #define vldrdq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z(__base, __offset, __p) -#define vldrwq_gather_offset(__base, __offset) __arm_vldrwq_gather_offset(__base, __offset) -#define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p) #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset) #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p) #define vuninitializedq(__v) __arm_vuninitializedq(__v) @@ -77,30 +69,10 @@ #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) #define vpnot(__a) __arm_vpnot(__a) -#define vldrbq_gather_offset_u8(__base, __offset) __arm_vldrbq_gather_offset_u8(__base, __offset) -#define vldrbq_gather_offset_s8(__base, __offset) __arm_vldrbq_gather_offset_s8(__base, __offset) -#define vldrbq_gather_offset_u16(__base, __offset) __arm_vldrbq_gather_offset_u16(__base, __offset) -#define vldrbq_gather_offset_s16(__base, __offset) __arm_vldrbq_gather_offset_s16(__base, __offset) -#define vldrbq_gather_offset_u32(__base, __offset) __arm_vldrbq_gather_offset_u32(__base, __offset) -#define vldrbq_gather_offset_s32(__base, __offset) __arm_vldrbq_gather_offset_s32(__base, __offset) #define vldrwq_gather_base_s32(__addr, __offset) __arm_vldrwq_gather_base_s32(__addr, __offset) #define vldrwq_gather_base_u32(__addr, __offset) __arm_vldrwq_gather_base_u32(__addr, __offset) -#define vldrbq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s16(__base, __offset, __p) -#define vldrbq_gather_offset_z_u8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u8(__base, __offset, __p) -#define vldrbq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s32(__base, __offset, __p) -#define vldrbq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u16(__base, __offset, __p) -#define vldrbq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u32(__base, __offset, __p) -#define vldrbq_gather_offset_z_s8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s8(__base, __offset, __p) #define vldrwq_gather_base_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_u32(__addr, __offset, __p) #define vldrwq_gather_base_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_s32(__addr, __offset, __p) -#define vldrhq_gather_offset_s32(__base, __offset) __arm_vldrhq_gather_offset_s32(__base, __offset) -#define vldrhq_gather_offset_s16(__base, __offset) __arm_vldrhq_gather_offset_s16(__base, __offset) -#define vldrhq_gather_offset_u32(__base, __offset) __arm_vldrhq_gather_offset_u32(__base, __offset) -#define vldrhq_gather_offset_u16(__base, __offset) __arm_vldrhq_gather_offset_u16(__base, __offset) -#define vldrhq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s32(__base, __offset, __p) -#define vldrhq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s16(__base, __offset, __p) -#define vldrhq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u32(__base, __offset, __p) -#define vldrhq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u16(__base, __offset, __p) #define vldrhq_gather_shifted_offset_s32(__base, __offset) __arm_vldrhq_gather_shifted_offset_s32(__base, __offset) #define vldrhq_gather_shifted_offset_s16(__base, __offset) __arm_vldrhq_gather_shifted_offset_s16(__base, __offset) #define vldrhq_gather_shifted_offset_u32(__base, __offset) __arm_vldrhq_gather_shifted_offset_u32(__base, __offset) @@ -113,26 +85,14 @@ #define vldrdq_gather_base_u64(__addr, __offset) __arm_vldrdq_gather_base_u64(__addr, __offset) #define vldrdq_gather_base_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_s64(__addr, __offset, __p) #define vldrdq_gather_base_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_z_u64(__addr, __offset, __p) -#define vldrdq_gather_offset_s64(__base, __offset) __arm_vldrdq_gather_offset_s64(__base, __offset) -#define vldrdq_gather_offset_u64(__base, __offset) __arm_vldrdq_gather_offset_u64(__base, __offset) -#define vldrdq_gather_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_s64(__base, __offset, __p) -#define vldrdq_gather_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_u64(__base, __offset, __p) #define vldrdq_gather_shifted_offset_s64(__base, __offset) __arm_vldrdq_gather_shifted_offset_s64(__base, __offset) #define vldrdq_gather_shifted_offset_u64(__base, __offset) __arm_vldrdq_gather_shifted_offset_u64(__base, __offset) #define vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) #define vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) -#define vldrhq_gather_offset_f16(__base, __offset) __arm_vldrhq_gather_offset_f16(__base, __offset) -#define vldrhq_gather_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_f16(__base, __offset, __p) #define vldrhq_gather_shifted_offset_f16(__base, __offset) __arm_vldrhq_gather_shifted_offset_f16(__base, __offset) #define vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) #define vldrwq_gather_base_f32(__addr, __offset) __arm_vldrwq_gather_base_f32(__addr, __offset) #define vldrwq_gather_base_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_f32(__addr, __offset, __p) -#define vldrwq_gather_offset_f32(__base, __offset) __arm_vldrwq_gather_offset_f32(__base, __offset) -#define vldrwq_gather_offset_s32(__base, __offset) __arm_vldrwq_gather_offset_s32(__base, __offset) -#define vldrwq_gather_offset_u32(__base, __offset) __arm_vldrwq_gather_offset_u32(__base, __offset) -#define vldrwq_gather_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_f32(__base, __offset, __p) -#define vldrwq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_s32(__base, __offset, __p) -#define vldrwq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_u32(__base, __offset, __p) #define vldrwq_gather_shifted_offset_f32(__base, __offset) __arm_vldrwq_gather_shifted_offset_f32(__base, __offset) #define vldrwq_gather_shifted_offset_s32(__base, __offset) __arm_vldrwq_gather_shifted_offset_s32(__base, __offset) #define vldrwq_gather_shifted_offset_u32(__base, __offset) __arm_vldrwq_gather_shifted_offset_u32(__base, __offset) @@ -294,48 +254,6 @@ __arm_vpnot (mve_pred16_t __a) return __builtin_mve_vpnotv16bi (__a); } -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_u8 (uint8_t const * __base, uint8x16_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_uv16qi ((__builtin_neon_qi *) __base, __offset); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_s8 (int8_t const * __base, uint8x16_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_sv16qi ((__builtin_neon_qi *) __base, __offset); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_u16 (uint8_t const * __base, uint16x8_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_uv8hi ((__builtin_neon_qi *) __base, __offset); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_s16 (int8_t const * __base, uint16x8_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_sv8hi ((__builtin_neon_qi *) __base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_u32 (uint8_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_uv4si ((__builtin_neon_qi *) __base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_s32 (int8_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrbq_gather_offset_sv4si ((__builtin_neon_qi *) __base, __offset); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_s32 (uint32x4_t __addr, const int __offset) @@ -350,48 +268,6 @@ __arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset) return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_s8 (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_sv16qi ((__builtin_neon_qi *) __base, __offset, __p); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_s32 (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_sv4si ((__builtin_neon_qi *) __base, __offset, __p); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_s16 (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_sv8hi ((__builtin_neon_qi *) __base, __offset, __p); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_u8 (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_uv16qi ((__builtin_neon_qi *) __base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_u32 (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_uv4si ((__builtin_neon_qi *) __base, __offset, __p); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z_u16 (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrbq_gather_offset_z_uv8hi ((__builtin_neon_qi *) __base, __offset, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_z_s32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p) @@ -406,62 +282,6 @@ __arm_vldrwq_gather_base_z_u32 (uint32x4_t __addr, const int __offset, mve_pred1 return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p); } -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrhq_gather_offset_sv4si ((__builtin_neon_hi *) __base, __offset); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_s16 (int16_t const * __base, uint16x8_t __offset) -{ - return __builtin_mve_vldrhq_gather_offset_sv8hi ((__builtin_neon_hi *) __base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_u32 (uint16_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrhq_gather_offset_uv4si ((__builtin_neon_hi *) __base, __offset); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_u16 (uint16_t const * __base, uint16x8_t __offset) -{ - return __builtin_mve_vldrhq_gather_offset_uv8hi ((__builtin_neon_hi *) __base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z_s32 (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrhq_gather_offset_z_sv4si ((__builtin_neon_hi *) __base, __offset, __p); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z_s16 (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrhq_gather_offset_z_sv8hi ((__builtin_neon_hi *) __base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z_u32 (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrhq_gather_offset_z_uv4si ((__builtin_neon_hi *) __base, __offset, __p); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z_u16 (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrhq_gather_offset_z_uv8hi ((__builtin_neon_hi *) __base, __offset, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_shifted_offset_s32 (int16_t const * __base, uint32x4_t __offset) @@ -546,35 +366,6 @@ __arm_vldrdq_gather_base_z_u64 (uint64x2_t __addr, const int __offset, mve_pred1 return __builtin_mve_vldrdq_gather_base_z_uv2di (__addr, __offset, __p); } -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_s64 (int64_t const * __base, uint64x2_t __offset) -{ - return __builtin_mve_vldrdq_gather_offset_sv2di ((__builtin_neon_di *) __base, __offset); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_u64 (uint64_t const * __base, uint64x2_t __offset) -{ - return __builtin_mve_vldrdq_gather_offset_uv2di ((__builtin_neon_di *) __base, __offset); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrdq_gather_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p); -} - - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrdq_gather_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p); -} - __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_shifted_offset_s64 (int64_t const * __base, uint64x2_t __offset) @@ -603,34 +394,6 @@ __arm_vldrdq_gather_shifted_offset_z_u64 (uint64_t const * __base, uint64x2_t __ return __builtin_mve_vldrdq_gather_shifted_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p); } -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_s32 (int32_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrwq_gather_offset_sv4si ((__builtin_neon_si *) __base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_u32 (uint32_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrwq_gather_offset_uv4si ((__builtin_neon_si *) __base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_shifted_offset_s32 (int32_t const * __base, uint32x4_t __offset) @@ -1169,20 +932,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) __builtin_mve_vst4qv4sf (__addr, __rv.__o); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_f16 (float16_t const * __base, uint16x8_t __offset) -{ - return __builtin_mve_vldrhq_gather_offset_fv8hf((__builtin_neon_hi *) __base, __offset); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrhq_gather_offset_z_fv8hf((__builtin_neon_hi *) __base, __offset, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_shifted_offset_f16 (float16_t const * __base, uint16x8_t __offset) @@ -1211,20 +960,6 @@ __arm_vldrwq_gather_base_z_f32 (uint32x4_t __addr, const int __offset, mve_pred1 return __builtin_mve_vldrwq_gather_base_z_fv4sf (__addr, __offset, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_f32 (float32_t const * __base, uint32x4_t __offset) -{ - return __builtin_mve_vldrwq_gather_offset_fv4sf((__builtin_neon_si *) __base, __offset); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __builtin_mve_vldrwq_gather_offset_z_fv4sf((__builtin_neon_si *) __base, __offset, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_shifted_offset_f32 (float32_t const * __base, uint32x4_t __offset) @@ -1391,146 +1126,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value) __arm_vst4q_u32 (__addr, __value); } -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (uint8_t const * __base, uint8x16_t __offset) -{ - return __arm_vldrbq_gather_offset_u8 (__base, __offset); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (int8_t const * __base, uint8x16_t __offset) -{ - return __arm_vldrbq_gather_offset_s8 (__base, __offset); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (uint8_t const * __base, uint16x8_t __offset) -{ - return __arm_vldrbq_gather_offset_u16 (__base, __offset); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (int8_t const * __base, uint16x8_t __offset) -{ - return __arm_vldrbq_gather_offset_s16 (__base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (uint8_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrbq_gather_offset_u32 (__base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset (int8_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrbq_gather_offset_s32 (__base, __offset); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_s8 (__base, __offset, __p); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_s32 (__base, __offset, __p); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_s16 (__base, __offset, __p); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_u8 (__base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_u32 (__base, __offset, __p); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrhq_gather_offset_s32 (__base, __offset); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset (int16_t const * __base, uint16x8_t __offset) -{ - return __arm_vldrhq_gather_offset_s16 (__base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset (uint16_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrhq_gather_offset_u32 (__base, __offset); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset (uint16_t const * __base, uint16x8_t __offset) -{ - return __arm_vldrhq_gather_offset_u16 (__base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrhq_gather_offset_z_s32 (__base, __offset, __p); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __arm_vldrhq_gather_offset_z_s16 (__base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrhq_gather_offset_z_u32 (__base, __offset, __p); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __arm_vldrhq_gather_offset_z_u16 (__base, __offset, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_shifted_offset (int16_t const * __base, uint32x4_t __offset) @@ -1587,34 +1182,6 @@ __arm_vldrhq_gather_shifted_offset_z (uint16_t const * __base, uint16x8_t __offs return __arm_vldrhq_gather_shifted_offset_z_u16 (__base, __offset, __p); } -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset (int64_t const * __base, uint64x2_t __offset) -{ - return __arm_vldrdq_gather_offset_s64 (__base, __offset); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset (uint64_t const * __base, uint64x2_t __offset) -{ - return __arm_vldrdq_gather_offset_u64 (__base, __offset); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_z (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) -{ - return __arm_vldrdq_gather_offset_z_s64 (__base, __offset, __p); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_offset_z (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p) -{ - return __arm_vldrdq_gather_offset_z_u64 (__base, __offset, __p); -} - __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_shifted_offset (int64_t const * __base, uint64x2_t __offset) @@ -1643,34 +1210,6 @@ __arm_vldrdq_gather_shifted_offset_z (uint64_t const * __base, uint64x2_t __offs return __arm_vldrdq_gather_shifted_offset_z_u64 (__base, __offset, __p); } -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset (int32_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrwq_gather_offset_s32 (__base, __offset); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset (uint32_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrwq_gather_offset_u32 (__base, __offset); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrwq_gather_offset_z_s32 (__base, __offset, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrwq_gather_offset_z_u32 (__base, __offset, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_shifted_offset (int32_t const * __base, uint32x4_t __offset) @@ -1953,20 +1492,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value) __arm_vst4q_f32 (__addr, __value); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset) -{ - return __arm_vldrhq_gather_offset_f16 (__base, __offset); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrhq_gather_offset_z (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p) -{ - return __arm_vldrhq_gather_offset_z_f16 (__base, __offset, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_shifted_offset (float16_t const * __base, uint16x8_t __offset) @@ -1981,20 +1506,6 @@ __arm_vldrhq_gather_shifted_offset_z (float16_t const * __base, uint16x8_t __off return __arm_vldrhq_gather_shifted_offset_z_f16 (__base, __offset, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset (float32_t const * __base, uint32x4_t __offset) -{ - return __arm_vldrwq_gather_offset_f32 (__base, __offset); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_offset_z (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p) -{ - return __arm_vldrwq_gather_offset_z_f32 (__base, __offset, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_shifted_offset (float32_t const * __base, uint32x4_t __offset) @@ -2424,22 +1935,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *)))) -#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t)));}) - -#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));}) - #define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ @@ -2456,18 +1951,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));}) -#define __arm_vldrwq_gather_offset(p0,p1) ( \ - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1))) - -#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ( \ - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1, p2), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1, p2), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_z_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1, p2))) - #define __arm_vldrwq_gather_shifted_offset(p0,p1) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \ @@ -2542,29 +2025,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));}) -#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) - -#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) - -#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) - #define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ @@ -2579,16 +2039,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) -#define __arm_vldrwq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));}) - -#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));}) - #define __arm_vldrwq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \ @@ -2662,14 +2112,6 @@ extern void *__ARM_undef; #endif /* MVE Integer. */ -#define __arm_vldrdq_gather_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \ - int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1))) - -#define __arm_vldrdq_gather_offset_z(p0,p1,p2) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \ - int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2))) - #define __arm_vldrdq_gather_shifted_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \ int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1))) @@ -2678,24 +2120,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \ int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2))) -#define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) - -#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) - #endif /* __cplusplus */ #endif /* __ARM_FEATURE_MVE */ #endif /* _GCC_ARM_MVE_H. */ diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 07f5a59b2481..5f328f5e6306 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -663,51 +663,31 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf) -VAR3 (LDRGU, vldrbq_gather_offset_u, v16qi, v8hi, v4si) -VAR3 (LDRGS, vldrbq_gather_offset_s, v16qi, v8hi, v4si) VAR1 (LDRGBS, vldrwq_gather_base_s, v4si) VAR1 (LDRGBU, vldrwq_gather_base_u, v4si) VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si) VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si) -VAR3 (LDRGS_Z, vldrbq_gather_offset_z_s, v16qi, v8hi, v4si) -VAR3 (LDRGU_Z, vldrbq_gather_offset_z_u, v16qi, v8hi, v4si) VAR2 (LDRGU_Z, vldrhq_gather_shifted_offset_z_u, v8hi, v4si) -VAR2 (LDRGU_Z, vldrhq_gather_offset_z_u, v8hi, v4si) VAR2 (LDRGU, vldrhq_gather_shifted_offset_u, v8hi, v4si) -VAR2 (LDRGU, vldrhq_gather_offset_u, v8hi, v4si) VAR2 (LDRGS_Z, vldrhq_gather_shifted_offset_z_s, v8hi, v4si) -VAR2 (LDRGS_Z, vldrhq_gather_offset_z_s, v8hi, v4si) VAR2 (LDRGS, vldrhq_gather_shifted_offset_s, v8hi, v4si) -VAR2 (LDRGS, vldrhq_gather_offset_s, v8hi, v4si) VAR1 (LDRGBS, vldrdq_gather_base_s, v2di) VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf) VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di) VAR1 (LDRGBS_Z, vldrwq_gather_base_z_f, v4sf) VAR1 (LDRGBU, vldrdq_gather_base_u, v2di) VAR1 (LDRGBU_Z, vldrdq_gather_base_z_u, v2di) -VAR1 (LDRGS, vldrdq_gather_offset_s, v2di) VAR1 (LDRGS, vldrdq_gather_shifted_offset_s, v2di) -VAR1 (LDRGS, vldrhq_gather_offset_f, v8hf) VAR1 (LDRGS, vldrhq_gather_shifted_offset_f, v8hf) -VAR1 (LDRGS, vldrwq_gather_offset_f, v4sf) -VAR1 (LDRGS, vldrwq_gather_offset_s, v4si) VAR1 (LDRGS, vldrwq_gather_shifted_offset_f, v4sf) VAR1 (LDRGS, vldrwq_gather_shifted_offset_s, v4si) -VAR1 (LDRGS_Z, vldrdq_gather_offset_z_s, v2di) VAR1 (LDRGS_Z, vldrdq_gather_shifted_offset_z_s, v2di) -VAR1 (LDRGS_Z, vldrhq_gather_offset_z_f, v8hf) VAR1 (LDRGS_Z, vldrhq_gather_shifted_offset_z_f, v8hf) -VAR1 (LDRGS_Z, vldrwq_gather_offset_z_f, v4sf) -VAR1 (LDRGS_Z, vldrwq_gather_offset_z_s, v4si) VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_f, v4sf) VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_s, v4si) -VAR1 (LDRGU, vldrdq_gather_offset_u, v2di) VAR1 (LDRGU, vldrdq_gather_shifted_offset_u, v2di) -VAR1 (LDRGU, vldrwq_gather_offset_u, v4si) VAR1 (LDRGU, vldrwq_gather_shifted_offset_u, v4si) -VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di) VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di) -VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si) VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si) VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si) VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 155e9ef6368e..4c0aac53c0dc 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1797,6 +1797,16 @@ (DI "u64") (V2DI "u64") (V2SF "f32") (V4SF "f32")]) +;; Same, but for MVE gather loads. +;; Note that using "uNN" or "NN" everywhere would work too. +;; We use this to match the expected output described in ACLE. +(define_mode_attr MVE_u_elem [(V16QI "u8") + (V8HI "u16") + (V4SI "u32") + (V2DI "u64") + (V8HF "f16") + (V4SF "u32")]) + ;; Element types for extraction of unsigned scalars. (define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8") (V4HI "u16") (V8HI "u16") @@ -2527,12 +2537,12 @@ (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u") (VMLALDAVAXQ_P_S "s") (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") - (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRWQGB_S "s") - (VLDRWQGB_U "u") (VLDRHQGO_S "s") - (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u") + (VLDRWQGB_S "s") + (VLDRWQGB_U "u") + (VLDRHQGSO_S "s") (VLDRHQGSO_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u") - (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s") - (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u") + (VLDRDQGSO_S "s") + (VLDRDQGSO_U "u") (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") @@ -2935,14 +2945,10 @@ (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) -(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U]) (define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U]) -(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U]) (define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U]) (define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U]) -(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U]) (define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U]) -(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U]) (define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U]) (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) (define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 1963a1ec4f60..b437fc9883fb 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3416,30 +3416,92 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>")) (set_attr "length" "4")]) +;; Vector gather loads with offset ;; ;; [vldrbq_gather_offset_s vldrbq_gather_offset_u] +;; [vldrhq_gather_offset_s vldrhq_gather_offset_u] +;; [vldrhq_gather_offset_f] +;; [vldrwq_gather_offset_s vldrwq_gather_offset_u] +;; [vldrwq_gather_offset_f] +;; [vldrdq_gather_offset_s vldrdq_gather_offset_u] ;; -(define_insn "mve_vldrbq_gather_offset_<supf><mode>" - [(set (match_operand:MVE_2 0 "s_register_operand" "=&w") - (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VLDRBGOQ)) +(define_insn "@mve_vldrq_gather_offset_<mode>" + [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w") + (unspec:MVE_VLD_ST_scatter + [(match_operand:SI 1 "register_operand" "r") + (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w") + (mem:BLK (scratch))] + VLDRGOQ)) ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - if (!strcmp ("<supf>","s") && <V_sz_elem> == 8) - output_asm_insn ("vldrb.u8\t%q0, [%m1, %q2]",ops); - else - output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vldr<MVE_elem_ch>.<MVE_u_elem>\t%q0, [%1, %q2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>")) + (set_attr "length" "4")]) + +;; Extending vector gather loads with offset +;; +;; [vldrbq_gather_offset_s vldrbq_gather_offset_u] +;; [vldrhq_gather_offset_s vldrhq_gather_offset_u] +;; +(define_insn "@mve_vldrq_gather_offset_extend_<mode><US>" + [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w") + (SE:<MVE_wide_n_TYPE> + (unspec:MVE_w_narrow_TYPE + [(match_operand:SI 1 "register_operand" "r") + (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w") + (mem:BLK (scratch))] + VLDRGOQ_EXT))) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))" + "vldr<MVE_elem_ch>.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>")) (set_attr "length" "4")]) +;; Predicated gather loads with offset +;; +;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u] +;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u] +;; [vldrhq_gather_offset_z_f] +;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u] +;; [vldrwq_gather_offset_z_f] +;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u] +;; +(define_insn "@mve_vldrq_gather_offset_z_<mode>" + [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w") + (unspec:MVE_VLD_ST_scatter + [(match_operand:SI 1 "register_operand" "r") + (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRGOQ_Z)) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vpst\n\tvldr<MVE_elem_ch>t.<MVE_u_elem>\t%q0, [%1, %q2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>")) + (set_attr "length" "8")]) + +;; Predicated extending gather loads with offset +;; +;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u] +;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u] +;; +(define_insn "@mve_vldrq_gather_offset_z_extend_<mode><US>" + [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w") + (SE:<MVE_wide_n_TYPE> + (unspec:MVE_w_narrow_TYPE + [(match_operand:SI 1 "register_operand" "r") + (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w") + (match_operand:<MVE_wide_n_VPRED> 3 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRGOQ_EXT_Z))) + ] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))" + "vpst\n\tvldr<MVE_elem_ch>t.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>")) + (set_attr "length" "8")]) + ;; ;; [vldrwq_gather_base_s vldrwq_gather_base_u] ;; @@ -3482,32 +3544,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>")) (set_attr "length" "8")]) -;; -;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u] -;; -(define_insn "mve_vldrbq_gather_offset_z_<supf><mode>" - [(set (match_operand:MVE_2 0 "s_register_operand" "=&w") - (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us") - (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] - VLDRBGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - ops[3] = operands[3]; - if (!strcmp ("<supf>","s") && <V_sz_elem> == 8) - output_asm_insn ("vpst\n\tvldrbt.u8\t%q0, [%m1, %q2]",ops); - else - output_asm_insn ("vpst\n\tvldrbt.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>")) - (set_attr "length" "8")]) - ;; ;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u] ;; @@ -3530,56 +3566,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si")) (set_attr "length" "8")]) -;; -;; [vldrhq_gather_offset_s vldrhq_gather_offset_u] -;; -(define_insn "mve_vldrhq_gather_offset_<supf><mode>" - [(set (match_operand:MVE_5 0 "s_register_operand" "=&w") - (unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us") - (match_operand:MVE_5 2 "s_register_operand" "w")] - VLDRHGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - if (!strcmp ("<supf>","s") && <V_sz_elem> == 16) - output_asm_insn ("vldrh.u16\t%q0, [%m1, %q2]",ops); - else - output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>")) - (set_attr "length" "4")]) - -;; -;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u] -;; -(define_insn "mve_vldrhq_gather_offset_z_<supf><mode>" - [(set (match_operand:MVE_5 0 "s_register_operand" "=&w") - (unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us") - (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up") - ]VLDRHGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - ops[3] = operands[3]; - if (!strcmp ("<supf>","s") && <V_sz_elem> == 16) - output_asm_insn ("vpst\n\tvldrht.u16\t%q0, [%m1, %q2]",ops); - else - output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>")) - (set_attr "length" "8")]) - ;; ;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u] ;; @@ -3673,49 +3659,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di")) (set_attr "length" "8")]) -;; -;; [vldrdq_gather_offset_s vldrdq_gather_offset_u] -;; -(define_insn "mve_vldrdq_gather_offset_<supf>v2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") - (match_operand:V2DI 2 "s_register_operand" "w")] - VLDRDGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di")) - (set_attr "length" "4")]) - -;; -;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u] -;; -(define_insn "mve_vldrdq_gather_offset_z_<supf>v2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:V2QI 3 "vpr_register_operand" "Up")] - VLDRDGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di")) - (set_attr "length" "8")]) - ;; ;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u] ;; @@ -3759,50 +3702,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di")) (set_attr "length" "8")]) -;; -;; [vldrhq_gather_offset_f] -;; -(define_insn "mve_vldrhq_gather_offset_fv8hf" - [(set (match_operand:V8HF 0 "s_register_operand" "=&w") - (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") - (match_operand:V8HI 2 "s_register_operand" "w")] - VLDRHQGO_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf")) - (set_attr "length" "4")]) - -;; -;; [vldrhq_gather_offset_z_f] -;; -(define_insn "mve_vldrhq_gather_offset_z_fv8hf" - [(set (match_operand:V8HF 0 "s_register_operand" "=&w") - (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") - (match_operand:V8HI 2 "s_register_operand" "w") - (match_operand:V8BI 3 "vpr_register_operand" "Up")] - VLDRHQGO_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - ops[3] = operands[3]; - output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf")) - (set_attr "length" "8")]) - ;; ;; [vldrhq_gather_shifted_offset_f] ;; @@ -3890,94 +3789,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf")) (set_attr "length" "8")]) -;; -;; [vldrwq_gather_offset_f] -;; -(define_insn "mve_vldrwq_gather_offset_fv4sf" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") - (match_operand:V4SI 2 "s_register_operand" "w")] - VLDRWQGO_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf")) - (set_attr "length" "4")]) - -;; -;; [vldrwq_gather_offset_s vldrwq_gather_offset_u] -;; -(define_insn "mve_vldrwq_gather_offset_<supf>v4si" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") - (match_operand:V4SI 2 "s_register_operand" "w")] - VLDRWGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si")) - (set_attr "length" "4")]) - -;; -;; [vldrwq_gather_offset_z_f] -;; -(define_insn "mve_vldrwq_gather_offset_z_fv4sf" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VLDRWQGO_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - ops[3] = operands[3]; - output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf")) - (set_attr "length" "8")]) - -;; -;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u] -;; -(define_insn "mve_vldrwq_gather_offset_z_<supf>v4si" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VLDRWGOQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - ops[3] = operands[3]; - output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si")) - (set_attr "length" "8")]) - ;; ;; [vldrwq_gather_shifted_offset_f] ;; diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 182908909ab5..cdad4ed8a7a2 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1156,26 +1156,20 @@ VLDRQ_Z VLDRQ_EXT VLDRQ_EXT_Z - VLDRBQGO_S - VLDRBQGO_U + VLDRGOQ + VLDRGOQ_Z + VLDRGOQ_EXT + VLDRGOQ_EXT_Z VLDRWQGB_S VLDRWQGB_U - VLDRHQGO_S - VLDRHQGO_U VLDRHQGSO_S VLDRHQGSO_U VLDRDQGB_S VLDRDQGB_U - VLDRDQGO_S - VLDRDQGO_U VLDRDQGSO_S VLDRDQGSO_U - VLDRHQGO_F VLDRHQGSO_F VLDRWQGB_F - VLDRWQGO_F - VLDRWQGO_S - VLDRWQGO_U VLDRWQGSO_F VLDRWQGSO_S VLDRWQGSO_U -- GitLab