diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 017380b7563b33a9d710ef3475cc5dc6eb25bca8..dd4b32546c2fae259cb65e8d07c28701cbd12c18 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -258,6 +258,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); + aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile); /* Not for ACLE, but required to keep "float.h" correct if we switch target between implementations that do or do not support ARMv8.2-A diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 505805e2ecf6eca3bb57baee0c3fbb710d749341..6cba6ab5f74c57be30e3703a1f13936a6adba612 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -31,14 +31,25 @@ ;; ---- Single-vector stores ;; ---- Table stores ;; ---- Single-vector moves +;; ---- Multi-vector moves ;; ---- Zeroing ;; ;; == Binary arithmetic ;; ---- Binary arithmetic on ZA tile +;; ---- Binary arithmetic on ZA slice +;; ---- Binary arithmetic, writing to ZA slice ;; ;; == Ternary arithmetic +;; ---- [INT] Dot product +;; ---- [INT] Ternary widening arithmetic on ZA slice ;; ---- [INT] Sum of outer products +;; ---- [FP] Dot product +;; ---- [FP] Ternary arithmetic on ZA slice +;; ---- [FP] Ternary widening arithmetic on ZA slice ;; ---- [FP] Sum of outer products +;; +;; == Table lookup +;; ---- Table lookup ;; ========================================================================= ;; == State management @@ -772,6 +783,131 @@ "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q" ) +;; ------------------------------------------------------------------------- +;; ---- Multi-vector moves +;; ------------------------------------------------------------------------- +;; Includes: +;; - MOVA +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_FULLx24 + [(reg:SVE_FULLx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 1 "const_int_operand") + (match_operand:SI 2 "register_operand" "Ucj")] + SME_READ))] + "TARGET_STREAMING_SME2" + { + operands[3] = GEN_INT (<vector_count> - 1); + return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]"; + } +) + +(define_insn "*aarch64_sme_<optab><mode><mode>_plus" + [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_FULLx24 + [(reg:SVE_FULLx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 1 "const_int_operand") + (plus:SI + (match_operand:SI 2 "register_operand" "Ucj") + (match_operand:SI 3 "const_int_operand"))] + SME_READ))] + "TARGET_STREAMING_SME2 + && UINTVAL (operands[3]) % <vector_count> == 0 + && UINTVAL (operands[3]) < 128 / <elem_bits>" + { + operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1); + return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]"; + } +) + +(define_insn "@aarch64_sme_read<mode>" + [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_DIx24 + [(reg:SVE_DIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 1 "register_operand" "Uci")] + UNSPEC_SME_READ))] + "TARGET_STREAMING_SME2" + "mova\t%0, za.d[%w1, 0, vgx<vector_count>]" +) + +(define_insn "*aarch64_sme_read<mode>_plus" + [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_DIx24 + [(reg:SVE_DIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 1 "register_operand" "Uci") + (match_operand:SI 2 "const_0_to_7_operand"))] + UNSPEC_SME_READ))] + "TARGET_STREAMING_SME2" + "mova\t%0, za.d[%w1, %2, vgx<vector_count>]" +) + +(define_insn "@aarch64_sme_<optab><mode><mode>" + [(set (reg:SVE_FULLx24 ZA_REGNUM) + (unspec:SVE_FULLx24 + [(reg:SVE_FULLx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand:SI 1 "register_operand" "Ucj") + (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_WRITE))] + "TARGET_STREAMING_SME2" + { + operands[3] = GEN_INT (<vector_count> - 1); + return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2"; + } +) + +(define_insn "*aarch64_sme_<optab><mode><mode>_plus" + [(set (reg:SVE_FULLx24 ZA_REGNUM) + (unspec:SVE_FULLx24 + [(reg:SVE_FULLx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (plus:SI + (match_operand:SI 1 "register_operand" "Ucj") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_WRITE))] + "TARGET_STREAMING_SME2 + && UINTVAL (operands[2]) % <vector_count> == 0 + && UINTVAL (operands[2]) < 128 / <elem_bits>" + { + operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1); + return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3"; + } +) + +(define_insn "@aarch64_sme_write<mode>" + [(set (reg:SVE_DIx24 ZA_REGNUM) + (unspec:SVE_DIx24 + [(reg:SVE_DIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")] + UNSPEC_SME_READ))] + "TARGET_STREAMING_SME2" + "mova\tza.d[%w0, 0, vgx<vector_count>], %1" +) + +(define_insn "*aarch64_sme_write<mode>_plus" + [(set (reg:SVE_DIx24 ZA_REGNUM) + (unspec:SVE_DIx24 + [(reg:SVE_DIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")] + UNSPEC_SME_READ))] + "TARGET_STREAMING_SME2" + "mova\tza.d[%w0, %1, vgx<vector_count>], %2" +) + ;; ------------------------------------------------------------------------- ;; ---- Zeroing ;; ------------------------------------------------------------------------- @@ -793,6 +929,14 @@ } ) +(define_insn "aarch64_sme_zero_zt0" + [(set (reg:V8DI ZT0_REGNUM) + (const_int 0)) + (use (reg:DI SME_STATE_REGNUM))] + "TARGET_SME2" + "zero\t{ zt0 }" +) + ;; ========================================================================= ;; == Binary arithmetic ;; ========================================================================= @@ -819,14 +963,543 @@ "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>" ) +;; ------------------------------------------------------------------------- +;; ---- Binary arithmetic on ZA slice +;; ------------------------------------------------------------------------- +;; Includes: +;; - ADD +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><mode>" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" +) + +(define_insn "*aarch64_sme_<optab><mode>_plus" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" +) + +(define_insn "@aarch64_sme_<optab><mode>" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_SDF))] + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" +) + +(define_insn "*aarch64_sme_<optab><mode>_plus" + [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) + (unspec:SME_ZA_SDFx24 + [(reg:SME_ZA_SDFx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_SLICE_SDF))] + "TARGET_STREAMING_SME2" + "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" +) + +;; ------------------------------------------------------------------------- +;; ---- Binary arithmetic, writing to ZA slice +;; ------------------------------------------------------------------------- +;; Includes: +;; - ADD +;; - SUB +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><mode>" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_WRITE_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><mode>_plus" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_BINARY_WRITE_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" +) + +(define_insn "@aarch64_sme_single_<optab><mode>" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDIx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_SDIx24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SME_BINARY_WRITE_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" +) + +(define_insn "*aarch64_sme_single_<optab><mode>_plus" + [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) + (unspec:SME_ZA_SDIx24 + [(reg:SME_ZA_SDIx24 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDIx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_SDIx24 + (match_operand:<VSINGLE> 3 "register_operand" "x"))] + SME_BINARY_WRITE_SLICE_SDI))] + "TARGET_STREAMING_SME2" + "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" +) + ;; ========================================================================= ;; == Ternary arithmetic ;; ========================================================================= ;; ------------------------------------------------------------------------- -;; ---- [INT] Sum of outer products +;; ---- [INT] Dot product +;; ------------------------------------------------------------------------- +;; Includes: +;; - SDOT +;; - SUDOT +;; - UDOT +;; - USDOT +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_DOTPROD))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_DOTPROD))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" +) + +(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_BHIx24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SME_INT_DOTPROD))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>" +) + +(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_BHIx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_BHIx24 + (match_operand:<VSINGLE> 3 "register_operand" "x"))] + SME_INT_DOTPROD))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>" +) + +;; SUDOT is USDOT with the operands swapped. +(define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (vec_duplicate:SME_ZA_BIx24 + (match_operand:<VSINGLE> 2 "register_operand" "x")) + (match_operand:SME_ZA_BIx24 1 "register_operand" "w")] + UNSPEC_SME_USDOT))] + "TARGET_STREAMING_SME2" + "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b" +) + +(define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (vec_duplicate:SME_ZA_BIx24 + (match_operand:<VSINGLE> 3 "register_operand" "x")) + (match_operand:SME_ZA_BIx24 2 "register_operand" "w")] + UNSPEC_SME_USDOT))] + "TARGET_STREAMING_SME2" + "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b" +) + +(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_BHIx24 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_DOTPROD_LANE))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" + [(set (reg:SME_ZA_SDI ZA_REGNUM) + (unspec:SME_ZA_SDI + [(reg:SME_ZA_SDI ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_BHIx24 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_DOTPROD_LANE))] + "TARGET_STREAMING_SME2 + && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) + && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" + "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]" +) + +;; ------------------------------------------------------------------------- +;; ---- [INT] Ternary widening arithmetic on ZA slice ;; ------------------------------------------------------------------------- ;; Includes: +;; - SMLA +;; - SMLS +;; - UMLA +;; - UMLS +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SVE_FULL_BHI 1 "register_operand" "w") + (match_operand:SVE_FULL_BHI 2 "register_operand" "x")] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>" +) + +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SVE_FULL_BHI 2 "register_operand" "w") + (match_operand:SVE_FULL_BHI 3 "register_operand" "x")] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); + return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>"; + } +) + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); + return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3"; + } +) + +(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_BHIx24 + (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>" +) + +(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_BHIx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_BHIx24 + (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); + return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"; + } +) + +(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_BHIx124 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_BHIx124 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); + return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]"; + } +) + +(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:VNx8HI_ONLY 1 "register_operand" "w") + (match_operand:VNx8HI_ONLY 2 "register_operand" "x")] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h" +) + +(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za64_offset_range>_operand")) + (match_operand:VNx8HI_ONLY 2 "register_operand" "w") + (match_operand:VNx8HI_ONLY 3 "register_operand" "x")] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h"; + } +) + +(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za64_offset_range>_operand")) + (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3"; + } +) + +(define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HIx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_HIx24 + (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h" +) + +(define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za64_offset_range>_operand")) + (match_operand:SME_ZA_HIx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_HIx24 + (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h"; + } +) + +(define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_HIx124 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>" + [(set (reg:VNx2DI_ONLY ZA_REGNUM) + (unspec:VNx2DI_ONLY + [(reg:VNx2DI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za64_offset_range>_operand")) + (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_HIx124 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_INT_TERNARY_SLICE))] + "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + { + operands[5] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]"; + } +) + +;; ------------------------------------------------------------------------- +;; ---- [INT] Sum of outer products +;; ------------------------------------------------------------------------- +;; - BMOPA +;; - BMOPS ;; - SMOPA ;; - SMOPS ;; - SUMOPA @@ -867,6 +1540,380 @@ "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h" ) +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") + (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") + (match_operand:VNx8HI_ONLY 3 "register_operand" "w") + (match_operand:VNx8HI_ONLY 4 "register_operand" "w")] + SME2_INT_MOP))] + "TARGET_STREAMING_SME2" + "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h" +) + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:DI 0 "const_int_operand") + (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") + (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") + (match_operand:VNx4SI_ONLY 3 "register_operand" "w") + (match_operand:VNx4SI_ONLY 4 "register_operand" "w")] + SME2_BMOP))] + "TARGET_STREAMING_SME2" + "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s" +) + +;; ------------------------------------------------------------------------- +;; ---- [FP] Dot product +;; ------------------------------------------------------------------------- +;; Includes: +;; - BFDOT +;; - FDOT +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_DOTPROD))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_DOTPROD))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3" +) + +(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_HFx24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SME_FP_DOTPROD))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h" +) + +(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_HFx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_HFx24 + (match_operand:<VSINGLE> 3 "register_operand" "x"))] + SME_FP_DOTPROD))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h" +) + +(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_HFx24 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_DOTPROD_LANE))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_HFx24 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_DOTPROD_LANE))] + "TARGET_STREAMING_SME2" + "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]" +) + +;; ------------------------------------------------------------------------- +;; ---- [FP] Ternary arithmetic on ZA slice +;; ------------------------------------------------------------------------- +;; Includes: +;; - FMLA +;; - FMLS +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" +) + +(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDFx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_SDFx24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>" +) + +(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDFx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_SDFx24 + (match_operand:<VSINGLE> 3 "register_operand" "x"))] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>" +) + +(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_SDFx24 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" + [(set (reg:SME_ZA_SDF_I ZA_REGNUM) + (unspec:SME_ZA_SDF_I + [(reg:SME_ZA_SDF_I ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") + (unspec:SME_ZA_SDFx24 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_TERNARY_SLICE))] + "TARGET_SME2 + && TARGET_STREAMING_SME + && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" + "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]" +) + +;; ------------------------------------------------------------------------- +;; ---- [FP] Ternary widening arithmetic on ZA slice +;; ------------------------------------------------------------------------- +;; Includes: +;; - BFMLAL +;; - BFMLSL +;; - FMLAL +;; - FMLSL +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SVE_FULL_HF 1 "register_operand" "w") + (match_operand:SVE_FULL_HF 2 "register_operand" "x")] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h" +) + +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SVE_FULL_HF 2 "register_operand" "w") + (match_operand:SVE_FULL_HF 3 "register_operand" "x")] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h"; + } +) + +(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2" +) + +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3"; + } +) + +(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_HFx24 + (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h" +) + +(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_HFx24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_HFx24 + (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h"; + } +) + +(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_HFx124 + [(match_operand:<VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]" +) + +(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_HFx124 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + SME_FP_TERNARY_SLICE))] + "TARGET_STREAMING_SME2" + { + operands[5] = GEN_INT (INTVAL (operands[1]) + 1); + return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]"; + } +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Sum of outer products ;; ------------------------------------------------------------------------- @@ -892,3 +1939,46 @@ && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" ) + +;; ========================================================================= +;; == Table lookup +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- Table lookup +;; ------------------------------------------------------------------------- +;; Includes: +;; - LUTI2 +;; - LUTI4 +;; ------------------------------------------------------------------------- + +(define_c_enum "unspec" [ + UNSPEC_SME_LUTI +]) + +(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>" + [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:VNx16QI 1 "register_operand" "w") + (match_operand:DI 2 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SME_LUTI))] + "TARGET_STREAMING_SME2" + "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]" +) + +(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>" + [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_BHSx24 + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:VNx16QI 1 "register_operand" "w") + (match_operand:DI 2 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SME_LUTI))] + "TARGET_STREAMING_SME2 + && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)" + "luti<LUTI_BITS>\t%0, zt0, %1[%2]" +) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 89035135a38d287ae404d996f26e27fa93492ee1..6492da0b383a0ae14d0c20cd9fb11ceb352c30c4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -240,7 +240,7 @@ public: { machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); - return e.map_to_rtx_codes (AND, AND, -1); + return e.map_to_rtx_codes (AND, AND, -1, -1); } if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) @@ -573,6 +573,12 @@ public: rtx expand (function_expander &e) const override { + if (e.type_suffix (0).tclass == TYPE_count) + { + unsigned int bits = e.type_suffix (0).element_bits; + return e.use_exact_insn (code_for_aarch64_sve_cntp_c (bits)); + } + machine_mode mode = e.vector_mode (0); e.add_ptrue_hint (0, mode); return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); @@ -640,9 +646,24 @@ public: rtx expand (function_expander &e) const override { + insn_code icode; + if (e.pred == PRED_none) + { + machine_mode mode0 = e.result_mode (); + machine_mode mode1 = GET_MODE (e.args[0]); + convert_optab optab; + if (e.type_suffix (0).integer_p) + optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab; + else if (e.type_suffix (1).integer_p) + optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab; + else + optab = trunc_optab; + icode = convert_optab_handler (optab, mode0, mode1); + gcc_assert (icode != CODE_FOR_nothing); + return e.use_exact_insn (icode); + } machine_mode mode0 = e.vector_mode (0); machine_mode mode1 = e.vector_mode (1); - insn_code icode; /* All this complication comes from the need to select four things simultaneously: @@ -706,9 +727,17 @@ public: /* In the optab, the multiplication operands come before the accumulator operand. The optab is keyed off the multiplication mode. */ e.rotate_inputs_left (0, 3); - insn_code icode - = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, - 0, GET_MODE (e.args[0])); + insn_code icode; + if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES) + icode = e.direct_optab_handler_for_sign (sdot_prod_optab, + udot_prod_optab, + 0, GET_MODE (e.args[0])); + else + icode = (e.type_suffix (0).float_p + ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf + : e.type_suffix (0).unsigned_p + ? CODE_FOR_aarch64_sve_udotvnx4sivnx8hi + : CODE_FOR_aarch64_sve_sdotvnx4sivnx8hi); return e.use_unpred_insn (icode); } }; @@ -721,12 +750,18 @@ public: rtx expand (function_expander &e) const override { + machine_mode mode0 = GET_MODE (e.args[0]); + machine_mode mode1 = GET_MODE (e.args[1]); /* Use the same ordering as the dot_prod_optab, with the accumulator last. */ e.rotate_inputs_left (0, 4); int unspec = unspec_for (e); - machine_mode mode = e.vector_mode (0); - return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); + insn_code icode; + if (unspec == UNSPEC_FDOT) + icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf; + else + icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1); + return e.use_exact_insn (icode); } }; @@ -1013,7 +1048,7 @@ public: with an extra argument on the end. Take the inactive elements from this extra argument. */ e.rotate_inputs_left (0, 4); - return e.map_to_rtx_codes (AND, AND, -1, 3); + return e.map_to_rtx_codes (AND, AND, -1, -1, 3); } machine_mode wide_mode = e.vector_mode (0); @@ -1244,6 +1279,9 @@ public: gimple * fold (gimple_folder &f) const override { + if (f.vectors_per_tuple () != 1) + return nullptr; + tree vectype = f.vector_type (0); /* Get the predicate and base pointer. */ @@ -1262,8 +1300,12 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = convert_optab_handler (maskload_optab, - e.vector_mode (0), e.gp_mode (0)); + insn_code icode; + if (e.vectors_per_tuple () == 1) + icode = convert_optab_handler (maskload_optab, + e.vector_mode (0), e.gp_mode (0)); + else + icode = code_for_aarch64_ld1 (e.tuple_mode (0)); return e.use_contiguous_load_insn (icode); } }; @@ -1563,7 +1605,7 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); + insn_code icode = code_for_aarch64_ldnt1 (e.tuple_mode (0)); return e.use_contiguous_load_insn (icode); } }; @@ -1823,7 +1865,10 @@ public: gimple * fold (gimple_folder &f) const override { - return f.fold_to_pfalse (); + if (f.type_suffix (0).tclass == TYPE_bool) + return f.fold_to_pfalse (); + + return nullptr; } rtx @@ -1968,13 +2013,20 @@ public: gimple * fold (gimple_folder &f) const override { - return f.fold_to_ptrue (); + if (f.type_suffix (0).tclass == TYPE_bool) + return f.fold_to_ptrue (); + + return nullptr; } rtx expand (function_expander &e) const override { - return aarch64_ptrue_all (e.type_suffix (0).element_bytes); + if (e.type_suffix (0).tclass == TYPE_bool) + return aarch64_ptrue_all (e.type_suffix (0).element_bytes); + + auto bits = e.type_suffix (0).element_bits; + return e.use_exact_insn (code_for_aarch64_sve_ptrue_c (bits)); } }; @@ -2202,12 +2254,37 @@ public: } }; +class svrint_impl : public function_base +{ +public: + CONSTEXPR svrint_impl (optab_tag optab, int cond_unspec) + : m_optab (optab), m_cond_unspec (cond_unspec) + {} + + rtx + expand (function_expander &e) const override + { + if (e.pred == PRED_none) + { + auto icode = direct_optab_handler (m_optab, e.tuple_mode (0)); + return e.use_exact_insn (icode); + } + return e.map_to_unspecs (-1, -1, m_cond_unspec); + } + + optab_tag m_optab; + int m_cond_unspec; +}; + class svsel_impl : public quiet<function_base> { public: gimple * fold (gimple_folder &f) const override { + if (f.vectors_per_tuple () > 1) + return nullptr; + /* svsel corresponds exactly to VEC_COND_EXPR. */ gimple_seq stmts = NULL; tree pred = f.convert_pred (stmts, f.vector_type (0), 0); @@ -2222,9 +2299,11 @@ public: { /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ e.rotate_inputs_left (0, 3); - insn_code icode = convert_optab_handler (vcond_mask_optab, - e.vector_mode (0), - e.gp_mode (0)); + insn_code icode = (e.vectors_per_tuple () > 1 + ? code_for_aarch64_sve_sel (e.tuple_mode (0)) + : convert_optab_handler (vcond_mask_optab, + e.vector_mode (0), + e.gp_mode (0))); return e.use_exact_insn (icode); } }; @@ -2311,6 +2390,9 @@ public: gimple * fold (gimple_folder &f) const override { + if (f.vectors_per_tuple () != 1) + return nullptr; + tree vectype = f.vector_type (0); /* Get the predicate and base pointer. */ @@ -2328,8 +2410,12 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = convert_optab_handler (maskstore_optab, - e.vector_mode (0), e.gp_mode (0)); + insn_code icode; + if (e.vectors_per_tuple () == 1) + icode = convert_optab_handler (maskstore_optab, + e.vector_mode (0), e.gp_mode (0)); + else + icode = code_for_aarch64_st1 (e.tuple_mode (0)); return e.use_contiguous_store_insn (icode); } }; @@ -2447,7 +2533,7 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); + insn_code icode = code_for_aarch64_stnt1 (e.tuple_mode (0)); return e.use_contiguous_store_insn (icode); } }; @@ -2464,7 +2550,7 @@ public: /* Canonicalize subtractions of constants to additions. */ machine_mode mode = e.vector_mode (0); if (e.try_negating_argument (2, mode)) - return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); + return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD, -1); return rtx_code_function::expand (e); } @@ -2675,6 +2761,9 @@ public: gimple * fold (gimple_folder &f) const override { + if (f.vectors_per_tuple () > 1) + return nullptr; + if (f.type_suffix (1).unsigned_p) return fold_type<poly_uint64> (f); else @@ -2812,7 +2901,8 @@ FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdot, svdot_impl,) -FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) +FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, + UNSPEC_FDOT)) FUNCTION (svdup, svdup_impl,) FUNCTION (svdup_lane, svdup_lane_impl,) FUNCTION (svdupq, svdupq_impl,) @@ -2878,12 +2968,16 @@ FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) FUNCTION (svmad, svmad_impl,) -FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) -FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) +FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX, + UNSPEC_FMAX)) +FUNCTION (svmaxnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMAXNM, + UNSPEC_FMAXNM)) FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) -FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) -FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) +FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN, + UNSPEC_FMIN)) +FUNCTION (svminnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMINNM, + UNSPEC_FMINNM)) FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) FUNCTION (svmla, svmla_impl,) @@ -2955,13 +3049,13 @@ FUNCTION (svrev, svrev_impl,) FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) -FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) -FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) -FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) -FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) -FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) -FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) -FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) +FUNCTION (svrinta, svrint_impl, (round_optab, UNSPEC_COND_FRINTA)) +FUNCTION (svrinti, svrint_impl, (nearbyint_optab, UNSPEC_COND_FRINTI)) +FUNCTION (svrintm, svrint_impl, (floor_optab, UNSPEC_COND_FRINTM)) +FUNCTION (svrintn, svrint_impl, (roundeven_optab, UNSPEC_COND_FRINTN)) +FUNCTION (svrintp, svrint_impl, (ceil_optab, UNSPEC_COND_FRINTP)) +FUNCTION (svrintx, svrint_impl, (rint_optab, UNSPEC_COND_FRINTX)) +FUNCTION (svrintz, svrint_impl, (btrunc_optab, UNSPEC_COND_FRINTZ)) FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index a742c7bbc56db2b299c52e02c1f8e57970c4b512..ddeeaea20285d8bbf616e19926d1691ca9450b3c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def @@ -71,13 +71,14 @@ DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit) DEF_SVE_FUNCTION (svcntw, count_inherent, none, none) DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none) DEF_SVE_FUNCTION (svcreate2, create, all_data, none) +DEF_SVE_FUNCTION (svcreate2, create, b, none) DEF_SVE_FUNCTION (svcreate3, create, all_data, none) DEF_SVE_FUNCTION (svcreate4, create, all_data, none) -DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz) +DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz) DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz) DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz) -DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none) -DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none) +DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, sd_integer, none) +DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, sd_integer, none) DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none) DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none) DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none) @@ -92,6 +93,7 @@ DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz) DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz) DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz) DEF_SVE_FUNCTION (svget2, get, all_data, none) +DEF_SVE_FUNCTION (svget2, get, b, none) DEF_SVE_FUNCTION (svget3, get, all_data, none) DEF_SVE_FUNCTION (svget4, get, all_data, none) DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none) @@ -116,12 +118,12 @@ DEF_SVE_FUNCTION (svlsl_wide, binary_uint64_opt_n, bhs_integer, mxz) DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz) DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz) DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz) -DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz) -DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz) +DEF_SVE_FUNCTION (svmax, binary_opt_single_n, all_arith, mxz) +DEF_SVE_FUNCTION (svmaxnm, binary_opt_single_n, all_float, mxz) DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit) DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit) -DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz) -DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz) +DEF_SVE_FUNCTION (svmin, binary_opt_single_n, all_arith, mxz) +DEF_SVE_FUNCTION (svminnm, binary_opt_single_n, all_float, mxz) DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit) DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit) DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz) @@ -148,6 +150,7 @@ DEF_SVE_FUNCTION (svorr, binary_opt_n, all_integer, mxz) DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z) DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit) DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none) +DEF_SVE_FUNCTION (svpfalse, inherent, c, none) DEF_SVE_FUNCTION (svpfirst, unary, b, implicit) DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit) DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit) @@ -204,31 +207,32 @@ DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz) DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz) -DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz) +DEF_SVE_FUNCTION (svrinta, unaryxn, all_float, mxz) DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz) -DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz) -DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz) -DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz) +DEF_SVE_FUNCTION (svrintm, unaryxn, all_float, mxz) +DEF_SVE_FUNCTION (svrintn, unaryxn, all_float, mxz) +DEF_SVE_FUNCTION (svrintp, unaryxn, all_float, mxz) DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz) DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz) DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none) DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none) DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz) -DEF_SVE_FUNCTION (svsel, binary, all_data, implicit) -DEF_SVE_FUNCTION (svsel, binary, b, implicit) +DEF_SVE_FUNCTION (svsel, binaryxn, all_data, implicit) +DEF_SVE_FUNCTION (svsel, binaryxn, b, implicit) DEF_SVE_FUNCTION (svset2, set, all_data, none) +DEF_SVE_FUNCTION (svset2, set, b, none) DEF_SVE_FUNCTION (svset3, set, all_data, none) DEF_SVE_FUNCTION (svset4, set, all_data, none) DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit) DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz) -DEF_SVE_FUNCTION (svst1, store, all_data, implicit) +DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit) DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit) DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit) DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit) DEF_SVE_FUNCTION (svst2, store, all_data, implicit) DEF_SVE_FUNCTION (svst3, store, all_data, implicit) DEF_SVE_FUNCTION (svst4, store, all_data, implicit) -DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit) +DEF_SVE_FUNCTION (svstnt1, storexn, all_data, implicit) DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz) DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz) DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none) @@ -238,6 +242,7 @@ DEF_SVE_FUNCTION (svtrn2, binary, all_data, none) DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none) DEF_SVE_FUNCTION (svundef, inherent, all_data, none) DEF_SVE_FUNCTION (svundef2, inherent, all_data, none) +DEF_SVE_FUNCTION (svundef2, inherent, b, none) DEF_SVE_FUNCTION (svundef3, inherent, all_data, none) DEF_SVE_FUNCTION (svundef4, inherent, all_data, none) DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none) @@ -329,7 +334,7 @@ DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none) DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none) DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none) -DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) +DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt_bfloat, mxz) DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index 5bd200d9c0a0d4eb967c6aac65d3cbaa589fd9ee..b40640b076348052bec617e158eff2babb595f6e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -60,6 +60,12 @@ using read_write_za = add_call_properties<T, CP_READ_ZA | CP_WRITE_ZA>; template<typename T> using write_za = add_call_properties<T, CP_WRITE_ZA>; +template<typename T> +using read_zt0 = add_call_properties<T, CP_READ_ZT0>; + +template<typename T> +using write_zt0 = add_call_properties<T, CP_WRITE_ZT0>; + /* A function_base that sometimes or always operates on tuples of vectors. */ class multi_vector_function : public function_base @@ -102,8 +108,9 @@ public: memory_vector_mode (const function_instance &fi) const override { machine_mode mode = fi.vector_mode (0); - if (m_vectors_per_tuple != 1) - mode = targetm.array_mode (mode, m_vectors_per_tuple).require (); + auto vectors_per_tuple = fi.vectors_per_tuple (); + if (vectors_per_tuple != 1) + mode = targetm.array_mode (mode, vectors_per_tuple).require (); return mode; } }; @@ -196,9 +203,11 @@ class rtx_code_function_base : public function_base public: CONSTEXPR rtx_code_function_base (rtx_code code_for_sint, rtx_code code_for_uint, - int unspec_for_fp = -1) + int unspec_for_cond_fp = -1, + int unspec_for_uncond_fp = -1) : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint), - m_unspec_for_fp (unspec_for_fp) {} + m_unspec_for_cond_fp (unspec_for_cond_fp), + m_unspec_for_uncond_fp (unspec_for_uncond_fp) {} /* The rtx code to use for signed and unsigned integers respectively. Can be UNKNOWN for functions that don't have integer forms. */ @@ -207,7 +216,11 @@ public: /* The UNSPEC_COND_* to use for floating-point operations. Can be -1 for functions that only operate on integers. */ - int m_unspec_for_fp; + int m_unspec_for_cond_fp; + + /* The UNSPEC_* to use for unpredicated floating-point operations. + Can be -1 if there is no such operation. */ + int m_unspec_for_uncond_fp; }; /* A function_base for functions that have an associated rtx code. @@ -221,7 +234,7 @@ public: expand (function_expander &e) const override { return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, - m_unspec_for_fp); + m_unspec_for_cond_fp, m_unspec_for_uncond_fp); } }; @@ -242,7 +255,8 @@ public: unsigned int nargs = e.args.length (); e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, - m_unspec_for_fp, nargs - 1); + m_unspec_for_cond_fp, m_unspec_for_uncond_fp, + nargs - 1); } }; @@ -334,10 +348,13 @@ public: expand (function_expander &e) const override { return e.use_exact_insn (CODE (unspec_for (e), - e.vector_mode (m_suffix_index))); + e.tuple_mode (m_suffix_index))); } }; +typedef unspec_based_function_exact_insn<code_for_aarch64_sve> + unspec_based_uncond_function; + /* A function that performs an unspec and then adds it to another value. */ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add> unspec_based_add_function; @@ -374,6 +391,34 @@ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub> typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane> unspec_based_sub_lane_function; +/* A function that has conditional and unconditional forms, with both + forms being associated with a single unspec each. */ +class cond_or_uncond_unspec_function : public function_base +{ +public: + CONSTEXPR cond_or_uncond_unspec_function (int cond_unspec, int uncond_unspec) + : m_cond_unspec (cond_unspec), m_uncond_unspec (uncond_unspec) {} + + rtx + expand (function_expander &e) const override + { + if (e.pred == PRED_none) + { + auto mode = e.tuple_mode (0); + auto icode = (e.mode_suffix_id == MODE_single + ? code_for_aarch64_sve_single (m_uncond_unspec, mode) + : code_for_aarch64_sve (m_uncond_unspec, mode)); + return e.use_exact_insn (icode); + } + return e.map_to_unspecs (m_cond_unspec, m_cond_unspec, m_cond_unspec); + } + + /* The unspecs for the conditional and unconditional instructions, + respectively. */ + int m_cond_unspec; + int m_uncond_unspec; +}; + /* General SME unspec-based functions, parameterized on the vector mode. */ class sme_1mode_function : public read_write_za<unspec_based_function_base> { @@ -388,14 +433,19 @@ public: rtx expand (function_expander &e) const override { - auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1)); + insn_code icode; + if (e.mode_suffix_id == MODE_single) + icode = code_for_aarch64_sme_single (unspec_for (e), e.tuple_mode (1)); + else + icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1)); return e.use_exact_insn (icode); } }; /* General SME unspec-based functions, parameterized on both the ZA mode and the vector mode. */ -template<insn_code (*CODE) (int, machine_mode, machine_mode)> +template<insn_code (*CODE) (int, machine_mode, machine_mode), + insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)> class sme_2mode_function_t : public read_write_za<unspec_based_function_base> { public: @@ -409,13 +459,21 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = CODE (unspec_for (e), e.vector_mode (0), - e.tuple_mode (1)); + insn_code icode; + if (e.mode_suffix_id == MODE_single) + icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0), + e.tuple_mode (1)); + else + icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1)); return e.use_exact_insn (icode); } }; -using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme>; +using sme_2mode_function + = sme_2mode_function_t<code_for_aarch64_sme, code_for_aarch64_sme_single>; + +using sme_2mode_lane_function + = sme_2mode_function_t<code_for_aarch64_sme_lane, nullptr>; /* A function that acts like unspec_based_function_exact_insn<INT_CODE> when operating on integers, but that expands to an (fma ...)-style @@ -565,6 +623,77 @@ public: int m_unspec; }; +/* A function that implements a x2 or x4 permute instruction. Both forms + of intrinsic have a single x2 or x4 tuple argument, but the underlying + x2 instruction takes two separate input operands. */ +class multireg_permute : public function_base +{ +public: + CONSTEXPR multireg_permute (int unspec) : m_unspec (unspec) {} + + rtx + expand (function_expander &e) const override + { + insn_code icode = code_for_aarch64_sve (m_unspec, e.tuple_mode (0)); + if (e.group_suffix ().vectors_per_tuple == 2) + { + machine_mode elt_mode = e.vector_mode (0); + rtx arg = e.args[0]; + e.args[0] = simplify_gen_subreg (elt_mode, arg, GET_MODE (arg), 0); + e.args.safe_push (simplify_gen_subreg (elt_mode, arg, GET_MODE (arg), + GET_MODE_SIZE (elt_mode))); + } + return e.use_exact_insn (icode); + } + + /* The unspec associated with the permutation. */ + int m_unspec; +}; + +/* A function that has two type integer type suffixes, which might agree + or disagree on signedness. There are separate instructions for each + signed/unsigned combination. */ +class integer_conversion : public function_base +{ +public: + CONSTEXPR integer_conversion (int unspec_for_sint, int unspec_for_sintu, + int unspec_for_uint, int unspec_for_uints) + : m_unspec_for_sint (unspec_for_sint), + m_unspec_for_sintu (unspec_for_sintu), + m_unspec_for_uint (unspec_for_uint), + m_unspec_for_uints (unspec_for_uints) + {} + + rtx + expand (function_expander &e) const override + { + machine_mode mode0 = e.vector_mode (0); + machine_mode mode1 = GET_MODE (e.args[0]); + int unspec; + if (e.type_suffix (0).unsigned_p == e.type_suffix (1).unsigned_p) + unspec = (e.type_suffix (0).unsigned_p + ? m_unspec_for_uint + : m_unspec_for_sint); + else + unspec = (e.type_suffix (0).unsigned_p + ? m_unspec_for_sintu + : m_unspec_for_uints); + return e.use_exact_insn (code_for_aarch64_sve (unspec, mode0, mode1)); + } + + /* The unspec for signed -> signed. */ + int m_unspec_for_sint; + + /* The unspec for signed -> unsigned. */ + int m_unspec_for_sintu; + + /* The unspec for unsigned -> signed. */ + int m_unspec_for_uint; + + /* The unspec for unsigned -> unsigned. */ + int m_unspec_for_uints; +}; + /* A function_base for functions that reduce a vector to a scalar. */ class reduction : public function_base { @@ -623,7 +752,7 @@ public: if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT)) { e.args.last () = shift; - return e.map_to_rtx_codes (m_code, m_code, -1); + return e.map_to_rtx_codes (m_code, m_code, -1, -1); } if (e.pred == PRED_x) @@ -679,6 +808,19 @@ public: int unspec = (e.type_suffix (1).unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); + if (e.vectors_per_tuple () > 1) + { + auto bits = e.type_suffix (0).element_bits; + auto icode = code_for_aarch64_sve_while_b_x2 (unspec, bits); + return e.use_exact_insn (icode); + } + if (e.type_suffix (0).tclass == TYPE_count) + { + auto bits = e.type_suffix (0).element_bits; + auto icode = code_for_aarch64_sve_while_c (unspec, bits); + return e.use_exact_insn (icode); + } + machine_mode pred_mode = e.vector_mode (0); scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 36c3c5005c4b41933832133e27c83ff18671af0b..9380cc7db205b1c22185eb5174b01d3fc9c5c2de 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -52,6 +52,17 @@ build_const_pointer (tree t) return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST)); } +/* GROUP's first type suffix is a ZA-related one. Return true if the + group exists only for the purpose of defining C overloads. This is + useful if some forms of an instruction require one feature and other + forms require another feature, and neither feature implies the other. */ +static bool +za_group_is_pure_overload (const function_group_info &group) +{ + gcc_checking_assert (type_suffixes[group.types[0][0]].za_p); + return group.types[0][1] == NUM_TYPE_SUFFIXES; +} + /* If INSTANCE has a governing predicate, add it to the list of argument types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the function. */ @@ -64,7 +75,7 @@ apply_predication (const function_instance &instance, tree return_type, in the original format string. */ if (instance.pred != PRED_none && instance.pred != PRED_za_m) { - argument_types.quick_insert (0, get_svbool_t ()); + argument_types.quick_insert (0, instance.gp_type ()); /* For unary merge operations, the first argument is a vector with the same type as the result. For unary_convert_narrowt it also provides the "bottom" half of active elements, and is present @@ -82,6 +93,7 @@ apply_predication (const function_instance &instance, tree return_type, f<bits> - a floating-point type with the given number of bits f[01] - a floating-point type with the same width as type suffix 0 or 1 B - bfloat16_t + c - a predicate-as-counter h<elt> - a half-sized version of <elt> p - a predicate (represented as TYPE_SUFFIX_b) q<elt> - a quarter-sized version of <elt> @@ -118,6 +130,9 @@ parse_element_type (const function_instance &instance, const char *&format) return suffix; } + if (ch == 'c') + return TYPE_SUFFIX_c; + if (ch == 'p') return TYPE_SUFFIX_b; @@ -156,6 +171,8 @@ parse_element_type (const function_instance &instance, const char *&format) ap - array pointer for prefetches as - array pointer for stores b - base vector type (from a _<m0>base suffix) + c0 - the result of a conversion, based on type and group suffixes + c1 - the source of a conversion, based on type and group suffixes d - displacement vector type (from a _<m1>index or _<m1>offset suffix) e<name> - an enum with the given name s<elt> - a scalar type with the given element suffix @@ -189,6 +206,23 @@ parse_type (const function_instance &instance, const char *&format) if (ch == 'b') return instance.base_vector_type (); + if (ch == 'c') + { + int ch = *format++; + gcc_assert (ch == '0' || ch == '1'); + unsigned int id = (ch == '0' ? 0 : 1); + auto vector_type = instance.type_suffix (id).vector_type; + unsigned int num_vectors = instance.group_suffix ().vectors_per_tuple; + if (num_vectors != 1) + { + unsigned int bits = instance.type_suffix (id).element_bits; + unsigned int other_bits = instance.type_suffix (1 - id).element_bits; + if (other_bits > bits) + num_vectors /= other_bits / bits; + } + return acle_vector_types[num_vectors - 1][vector_type]; + } + if (ch == 'd') return instance.displacement_vector_type (); @@ -619,6 +653,63 @@ struct binary_za_m_base : public overloaded_base<1> } }; +/* Base class for shapes like binary_za_slice_lane. TCLASS is the type + class of the final vector argument. */ +template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS> +struct binary_za_slice_lane_base : public overloaded_base<1> +{ + constexpr binary_za_slice_lane_base (unsigned int lane_type_suffix) + : m_lane_type_suffix (lane_type_suffix) {} + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,v1,su64", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (4) + || !r.require_scalar_type (0, "uint32_t") + || !(type = r.infer_tuple_type (1)) + || !r.require_derived_vector_type (2, 1, type, TCLASS) + || !r.require_integer_immediate (3)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } + + bool + check (function_checker &c) const override + { + unsigned int bytes = c.type_suffix (m_lane_type_suffix).element_bytes; + return c.require_immediate_range (3, 0, 16 / bytes - 1); + } + + unsigned int m_lane_type_suffix; +}; + +/* Base class for shapes like binary_za_slice_opt_single. TCLASS is the + type class of the final argument. */ +template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS> +struct binary_za_slice_opt_single_base : public overloaded_base<1> +{ + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (3) + || !r.require_scalar_type (0, "uint32_t") + || !(type = r.infer_tuple_type (1))) + return error_mark_node; + + return r.finish_opt_single_resolution (2, 1, type, TCLASS); + } +}; + /* Base class for inc_dec and inc_dec_pat. */ struct inc_dec_base : public overloaded_base<0> { @@ -684,7 +775,8 @@ struct load_contiguous_base : public overloaded_base<0> || (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))) return error_mark_node; - return r.resolve_to (r.mode_suffix_id, type); + return r.resolve_to (r.mode_suffix_id, type, NUM_TYPE_SUFFIXES, + r.group_suffix_id); } }; @@ -739,6 +831,29 @@ struct load_ext_gather_base : public overloaded_base<1> } }; +/* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t) + + where the first argument is the ZT register number (currently always 0) + and the final argument is a constant index. The instruction divides + the vector argument in BITS-bit quantities. */ +template<unsigned int BITS> +struct luti_lane_zt_base : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "t0,su64,vu8,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + auto nvectors = c.vectors_per_tuple (); + return (c.require_immediate_range (0, 0, 0) + && c.require_immediate_range (2, 0, 32 / BITS / nvectors - 1)); + } +}; + /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t) (for integer t0) sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t) (for floating-point t0) @@ -1136,6 +1251,41 @@ struct binary_int_opt_n_def : public overloaded_base<0> }; SHAPE (binary_int_opt_n) +/* Like binary_int_opt_n for single vectors. For tuples: + + sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0:int>x<g>_t) + sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0:int>_t). */ +struct binary_int_opt_single_n_def : public overloaded_base<0> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "t0,t0,ts0", group, MODE_none); + if (group.groups[0] == GROUP_none) + build_all (b, "v0,v0,ss0", group, MODE_n); + else + build_all (b, "t0,t0,vs0", group, MODE_single); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + sve_type type; + if (!r.check_gp_argument (2, i, nargs) + || !(type = r.infer_sve_type (i))) + return error_mark_node; + + return (type.num_vectors == 1 && r.scalar_argument_p (i + 1) + ? r.finish_opt_n_resolution (i + 1, i, type.type, TYPE_signed) + : r.finish_opt_single_resolution (i + 1, i, type, TYPE_signed)); + } +}; +SHAPE (binary_int_opt_single_n) + /* sv<t0>_t svfoo_<t0>(sv<t0>_t, sv<t0>_t, uint64_t) where the final argument is an integer constant expression in the @@ -1340,6 +1490,41 @@ struct binary_opt_n_def : public overloaded_base<0> }; SHAPE (binary_opt_n) +/* Like binary_opt_n for single vectors. For tuples: + + sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t, sv<t0>x<g>_t) + sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t). */ +struct binary_opt_single_n_def : public overloaded_base<0> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "t0,t0,t0", group, MODE_none); + if (group.groups[0] == GROUP_none) + build_all (b, "v0,v0,s0", group, MODE_n); + else + build_all (b, "t0,t0,v0", group, MODE_single); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + sve_type type; + if (!r.check_gp_argument (2, i, nargs) + || !(type = r.infer_sve_type (i))) + return error_mark_node; + + return (type.num_vectors == 1 && r.scalar_argument_p (i + 1) + ? r.finish_opt_n_resolution (i + 1, i, type.type) + : r.finish_opt_single_resolution (i + 1, i, type)); + } +}; +SHAPE (binary_opt_single_n) + /* svbool_t svfoo(svbool_t, svbool_t). */ struct binary_pred_def : public nonoverloaded_base { @@ -1391,6 +1576,33 @@ struct binary_scalar_def : public nonoverloaded_base }; SHAPE (binary_scalar) +/* sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t). */ +struct binary_single_def : public overloaded_base<0> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "t0,t0,v0", group, MODE_single); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (2) + || !(type = r.infer_sve_type (0)) + || !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS, + r.SAME_SIZE, 1)) + return error_mark_node; + + return r.resolve_to (MODE_single, type); + } +}; +SHAPE (binary_single) + /* sv<t0:uint>_t svfoo[_t0](sv<t0>_t, sv<t0>_t). i.e. a version of "binary" that returns unsigned integers. */ @@ -1642,6 +1854,67 @@ struct binary_za_m_def : public binary_za_m_base<> }; SHAPE (binary_za_m) +/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t) + + where the first argument is a variable ZA slice and the final argument + indexes a single element in the preceding vector argument. */ +struct binary_za_slice_lane_def : public binary_za_slice_lane_base<> +{ + constexpr binary_za_slice_lane_def () : binary_za_slice_lane_base<> (1) {} +}; +SHAPE (binary_za_slice_lane) + +/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>x<g>_t) + void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t). + + where the first argument is a variable ZA slice. */ +struct binary_za_slice_int_opt_single_def + : public binary_za_slice_opt_single_base<TYPE_signed> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,ts1", group, MODE_none); + build_all (b, "_,su32,t1,vs1", group, MODE_single); + } +}; +SHAPE (binary_za_slice_int_opt_single) + +/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>x<g>_t) + void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t) + + where the first argument is a variable ZA slice. */ +struct binary_za_slice_opt_single_def + : public binary_za_slice_opt_single_base<> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,t1", group, MODE_none); + build_all (b, "_,su32,t1,v1", group, MODE_single); + } +}; +SHAPE (binary_za_slice_opt_single) + +/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>x<g>_t) + void svfoo[_single]_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t) + + where the first argument is a variable ZA slice. */ +struct binary_za_slice_uint_opt_single_def + : public binary_za_slice_opt_single_base<TYPE_unsigned> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,tu1", group, MODE_none); + build_all (b, "_,su32,t1,vu1", group, MODE_single); + } +}; +SHAPE (binary_za_slice_uint_opt_single) + /* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, sv<t1:uint>x<g>_t) @@ -1657,6 +1930,35 @@ struct binary_za_uint_m_def : public binary_za_m_base<TYPE_unsigned> }; SHAPE (binary_za_uint_m) +/* sv<t0>x<g>_t svfoo[_t0_t1_g](sv<t0>x<g>_t, sv<t0>x<g>_t). */ +struct binaryxn_def : public overloaded_base<0> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "t0,t0,t0", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + vector_type_index pred_type; + sve_type type; + if (!r.check_num_arguments (3) + || (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES + || !(type = r.infer_sve_type (1)) + || !r.require_matching_predicate_type (pred_type, type) + || !r.require_matching_vector_type (2, 1, type)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (binaryxn) + /* bool svfoo(). */ struct bool_inherent_def : public nonoverloaded_base { @@ -1668,6 +1970,45 @@ struct bool_inherent_def : public nonoverloaded_base }; SHAPE (bool_inherent) +/* Either: + + sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t) + + for single vectors or: + + sv<t0>x<g>_t svfoo[_single_t0_g](sv<t0>x<g>_t, sv<t0>_t, sv<t0>_t) + + for tuples. */ +struct clamp_def : public overloaded_base<0> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "t0,t0,v0,v0", group, + group.groups[0] == GROUP_none ? MODE_none : MODE_single); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (3) + || !(type = r.infer_sve_type (0)) + || !r.require_derived_vector_type (1, 0, type, r.SAME_TYPE_CLASS, + r.SAME_SIZE, 1) + || !r.require_derived_vector_type (2, 0, type, r.SAME_TYPE_CLASS, + r.SAME_SIZE, 1)) + return error_mark_node; + + auto mode = type.num_vectors == 1 ? MODE_none : MODE_single; + return r.resolve_to (mode, type); + } +}; +SHAPE (clamp) + /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t) <t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t). */ struct clast_def : public overloaded_base<0> @@ -1773,7 +2114,7 @@ struct compare_ptr_def : public overloaded_base<0> }; SHAPE (compare_ptr) -/* svbool_t svfoo_t0[_t1](<t1>_t, <t1>_t) +/* svboolx<g>_t svfoo_t0[_t1]_g(<t1>_t, <t1>_t) where _t0 is a _b<bits> suffix that describes the predicate result. There is no direct relationship between the element sizes of _t0 @@ -1784,7 +2125,7 @@ struct compare_scalar_def : public overloaded_base<1> build (function_builder &b, const function_group_info &group) const override { b.add_overloaded_functions (group, MODE_none); - build_all (b, "vp,s1,s1", group, MODE_none); + build_all (b, "tp,s1,s1", group, MODE_none); } tree @@ -1797,11 +2138,47 @@ struct compare_scalar_def : public overloaded_base<1> || !r.require_matching_integer_scalar_type (i + 1, i, type)) return error_mark_node; - return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); + return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type, + r.group_suffix_id); } }; SHAPE (compare_scalar) +/* svcount_t svfoo_t0[_t1](<t1>_t, <t1>_t, uint64_t) + + where _t0 is a _c<bits> suffix that describes the predicate-as-counter + result. The final argument is an integer constant that specifies the + number of vectors (2 or 4). */ +struct compare_scalar_count_def : public overloaded_base<1> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,s1,s1,su64", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_64bit_scalar_integer_pair (i)) == NUM_TYPE_SUFFIXES + || !r.require_integer_immediate (i + 2)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_either_or (2, 2, 4); + } +}; +SHAPE (compare_scalar_count) + /* svbool_t svfoo[_t0](sv<t0>_t, svint64_t) (for signed t0) svbool_t svfoo[_n_t0](sv<t0>_t, int64_t) (for signed t0) svbool_t svfoo[_t0](sv<t0>_t, svuint64_t) (for unsigned t0) @@ -1865,6 +2242,25 @@ struct count_pred_def : public nonoverloaded_base }; SHAPE (count_pred) +/* uint64_t svfoo_t0(sv<t0>_t, uint64_t) + + where the final argument must be 2 or 4. */ +struct count_pred_c_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "su64,v0,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_either_or (1, 2, 4); + } +}; +SHAPE (count_pred_c) + /* uint64_t svfoo[_t0](sv<t0>_t). */ struct count_vector_def : public overloaded_base<0> { @@ -1903,32 +2299,80 @@ struct create_def : public overloaded_base<0> }; SHAPE (create) -/* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t) +/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:int>_t, uint64_t) - where there are enough arguments to fill 128 bits of data (or to - control 128 bits of data in the case of predicates). */ -struct dupq_def : public overloaded_base<1> + where the final argument indexes a <t0>-sized group of elements in the + preceding vector argument. */ +struct dot_za_slice_int_lane_def + : public binary_za_slice_lane_base<TYPE_signed> { + constexpr dot_za_slice_int_lane_def () + : binary_za_slice_lane_base<TYPE_signed> (0) {} + void build (function_builder &b, const function_group_info &group) const override { - /* The "_n" suffix is optional; the full name has it, but the short - name doesn't. */ - build_all (b, "v0,s0*q", group, MODE_n, true); - } - - tree - resolve (function_resolver &) const override - { - /* The short forms just make "_n" implicit, so no resolution is needed. */ - gcc_unreachable (); + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,vs1,su64", group, MODE_none); } }; -SHAPE (dupq) +SHAPE (dot_za_slice_int_lane) -/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t) +/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1>_t, uint64_t) - where the final argument is an integer constant expression that when + where the final argument indexes a <t0>-sized group of elements in the + preceding vector argument. */ +struct dot_za_slice_lane_def : public binary_za_slice_lane_base<> +{ + constexpr dot_za_slice_lane_def () : binary_za_slice_lane_base<> (0) {} +}; +SHAPE (dot_za_slice_lane) + +/* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t, uint64_t) + + where the final argument indexes a <t0>-sized group of elements in the + preceding vector argument. */ +struct dot_za_slice_uint_lane_def + : public binary_za_slice_lane_base<TYPE_unsigned> +{ + constexpr dot_za_slice_uint_lane_def () + : binary_za_slice_lane_base<TYPE_unsigned> (0) {} + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1,vu1,su64", group, MODE_none); + } +}; +SHAPE (dot_za_slice_uint_lane) + +/* sv<t0>_t svfoo[_n]_t0(<t0>_t, ..., <t0>_t) + + where there are enough arguments to fill 128 bits of data (or to + control 128 bits of data in the case of predicates). */ +struct dupq_def : public overloaded_base<1> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + /* The "_n" suffix is optional; the full name has it, but the short + name doesn't. */ + build_all (b, "v0,s0*q", group, MODE_n, true); + } + + tree + resolve (function_resolver &) const override + { + /* The short forms just make "_n" implicit, so no resolution is needed. */ + gcc_unreachable (); + } +}; +SHAPE (dupq) + +/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t) + + where the final argument is an integer constant expression that when multiplied by the number of bytes in t0 is in the range [0, 255]. */ struct ext_def : public overloaded_base<0> { @@ -1954,6 +2398,24 @@ struct ext_def : public overloaded_base<0> }; SHAPE (ext) +/* svboolx<g>_t svfoo_t0_g(sv<t0>_t, sv<t0>_t, uint32_t). */ +struct extract_pred_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "tp,vc,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + unsigned int size = c.vectors_per_tuple (); + return c.require_immediate_range (1, 0, 4 / size - 1); + } +}; +SHAPE (extract_pred) + /* <t0>_t svfoo[_t0](<t0>_t, sv<t0>_t). */ struct fold_left_def : public overloaded_base<0> { @@ -2158,6 +2620,25 @@ struct inherent_za_def : public nonoverloaded_base }; SHAPE (inherent_za) +/* void svfoo_zt(uint64_t) + + where the argument must be zero. */ +struct inherent_zt_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "_,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (0, 0, 0); + } +}; +SHAPE (inherent_zt) + /* void svfoo_t0(uint64_t) where the argument is an integer constant that specifies an 8-bit mask. */ @@ -2192,8 +2673,27 @@ struct ldr_za_def : public nonoverloaded_base }; SHAPE (ldr_za) -/* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *) - sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t). */ +/* void svfoo_zt(uint64_t, const void *) + + where the first argument must be zero. */ +struct ldr_zt_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "_,su64,al", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (0, 0, 0); + } +}; +SHAPE (ldr_zt) + +/* sv<t0>[xN]_t svfoo[_t0]_g(const <t0>_t *) + sv<t0>[xN]_t svfoo_vnum[_t0]_g(const <t0>_t *, int64_t). */ struct load_def : public load_contiguous_base { void @@ -2423,6 +2923,12 @@ struct load_za_def : public nonoverloaded_base }; SHAPE (load_za) +using luti2_lane_zt_def = luti_lane_zt_base<2>; +SHAPE (luti2_lane_zt) + +using luti4_lane_zt_def = luti_lane_zt_base<4>; +SHAPE (luti4_lane_zt) + /* svbool_t svfoo(enum svpattern). */ struct pattern_pred_def : public nonoverloaded_base { @@ -2517,6 +3023,23 @@ struct rdffr_def : public nonoverloaded_base }; SHAPE (rdffr) +/* sv<t1>x<g>_t svfoo_t0_t1_g(uint64_t, uint32_t). */ +struct read_za_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "t1,su64,su32", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); + } +}; +SHAPE (read_za) + /* sv<t1>_t svfoo_t0[_t1](uint64_t, uint32_t) where the first two fields form a (ZA tile, slice) pair. */ @@ -2559,6 +3082,17 @@ struct read_za_m_def : public overloaded_base<1> }; SHAPE (read_za_m) +/* sv<t1>x<g>_t svfoo_t0_t1_g(uint32_t). */ +struct read_za_slice_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "t1,su32", group, MODE_none); + } +}; +SHAPE (read_za_slice) + /* <t0>_t svfoo[_t0](sv<t0>_t). */ struct reduction_def : public overloaded_base<0> { @@ -2628,6 +3162,17 @@ struct reinterpret_def : public overloaded_base<1> }; SHAPE (reinterpret) +/* sv<t0>_t svfoo_t0(sv<t0>_t, sv<t0>_t, uint32_t). */ +struct select_pred_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "v0,v0,vp,su32", group, MODE_none); + } +}; +SHAPE (select_pred) + /* sv<t0>xN_t svfoo[_t0](sv<t0>xN_t, uint64_t, sv<t0>_t) where the second argument is an integer constant expression in the @@ -2797,6 +3342,42 @@ typedef shift_right_imm_narrow_wrapper<binary_imm_narrowt_base_unsigned, 2> shift_right_imm_narrowt_to_uint_def; SHAPE (shift_right_imm_narrowt_to_uint) +/* sv<t0>_t svfoo[_n_t0])(sv<t0>_t, uint64_t) + + where the final argument must be an integer constant expression in the + range [1, sizeof (<t0>_t) * 8]. */ +struct shift_right_imm_narrowxn_def : public overloaded_base<1> +{ + bool explicit_group_suffix_p () const override { return false; } + + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_n); + build_all (b, "c0,c1,su64", group, MODE_n); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (2) + || !(type = r.infer_sve_type (0)) + || !r.require_integer_immediate (1)) + return error_mark_node; + return r.resolve_to (r.mode_suffix_id, type); + } + + bool + check (function_checker &c) const override + { + unsigned int suffix = c.group_suffix_id == GROUP_x4 ? 1 : 0; + unsigned int bits = c.type_suffix (suffix).element_bits; + return c.require_immediate_range (1, 1, bits); + } +}; +SHAPE (shift_right_imm_narrowxn) + /* void svfoo[_t0](<X>_t *, sv<t0>[xN]_t) void svfoo_vnum[_t0](<X>_t *, int64_t, sv<t0>[xN]_t) @@ -2948,6 +3529,37 @@ struct store_za_def : public nonoverloaded_base }; SHAPE (store_za) +/* void svfoo[_t0_g](<X>_t *, sv<t0>x<g>_t) + void svfoo_vnum[_t0_g](<X>_t *, int64_t, sv<t0>x<g>_t) + + where <X> might be tied to <t0> (for non-truncating stores) or might + depend on the function base name (for truncating stores). */ +struct storexn_def : public store_def +{ + bool explicit_group_suffix_p () const override { return false; } + + tree + resolve (function_resolver &r) const override + { + bool vnum_p = r.mode_suffix_id == MODE_vnum; + gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); + + unsigned int nargs = vnum_p ? 4 : 3; + vector_type_index pred_type; + sve_type type; + if (!r.check_num_arguments (nargs) + || (pred_type = r.infer_predicate_type (0)) == NUM_VECTOR_TYPES + || !r.require_pointer_type (1) + || (vnum_p && !r.require_scalar_type (2, "int64_t")) + || !(type = r.infer_sve_type (nargs - 1)) + || !r.require_matching_predicate_type (pred_type, type)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (storexn) + /* void svfoo_t0(uint32_t, void *) void svfoo_vnum_t0(uint32_t, void *, int64_t) @@ -2963,6 +3575,25 @@ struct str_za_def : public nonoverloaded_base }; SHAPE (str_za) +/* void svfoo_zt(uint64_t, void *) + + where the first argument must be zero. */ +struct str_zt_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + build_all (b, "_,su64,as", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (0, 0, 0); + } +}; +SHAPE (str_zt) + /* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t). */ struct tbl_tuple_def : public overloaded_base<0> { @@ -3184,20 +3815,49 @@ struct ternary_opt_n_def : public overloaded_base<0> }; SHAPE (ternary_opt_n) -/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t) +/* A choice between: + + (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, + uint64_t) + + (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t, uint64_t) where the final argument is an integer constant expression in the range [0, 16 / sizeof (<t0>_t) - 1]. */ -struct ternary_qq_lane_def : public ternary_qq_lane_base<> +struct ternary_qq_or_011_lane_def : public ternary_qq_lane_base<> { void build (function_builder &b, const function_group_info &group) const override { b.add_overloaded_functions (group, MODE_none); - build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); + if (group.types[0][1] == NUM_TYPE_SUFFIXES) + build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); + else + build_all (b, "v0,v0,v1,v1,su64", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type0, type1; + if (!r.check_gp_argument (4, i, nargs) + || (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES + || (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES + || !r.require_matching_vector_type (i + 2, i + 1, type1) + || !r.require_integer_immediate (i + 3)) + return error_mark_node; + + if ((type_suffixes[type0].element_bits + == 4 * type_suffixes[type1].element_bits) + && type_suffixes[type0].tclass == type_suffixes[type1].tclass) + if (tree res = r.lookup_form (MODE_none, type0)) + return res; + + return r.resolve_to (r.mode_suffix_id, type0, type1); } }; -SHAPE (ternary_qq_lane) +SHAPE (ternary_qq_or_011_lane) /* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t) @@ -3240,24 +3900,64 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0> }; SHAPE (ternary_qq_lane_rotate) -/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t) - sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t) +/* A choice between: - i.e. a version of the standard ternary shape ternary_opt_n in which - the element type of the last two arguments is the quarter-sized - equivalent of <t0>. */ -struct ternary_qq_opt_n_def + (1) sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t) + sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t) + + i.e. a version of the standard ternary shape ternary_opt_n in which + the element type of the last two arguments is the quarter-sized + equivalent of <t0>. + + (2) sv<t0>_t svfoo[_t0_t1](sv<t0>_t, sv<t1>_t, sv<t1>_t) + + where the element type of the last two arguments is specified + explicitly. */ +struct ternary_qq_opt_n_or_011_def : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE> { void build (function_builder &b, const function_group_info &group) const override { b.add_overloaded_functions (group, MODE_none); - build_all (b, "v0,v0,vq0,vq0", group, MODE_none); - build_all (b, "v0,v0,vq0,sq0", group, MODE_n); + if (group.types[0][1] == NUM_TYPE_SUFFIXES) + { + build_all (b, "v0,v0,vq0,vq0", group, MODE_none); + build_all (b, "v0,v0,vq0,sq0", group, MODE_n); + } + else + build_all (b, "v0,v0,v1,v1", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type0, type1; + if (!r.check_gp_argument (3, i, nargs) + || (type0 = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES + || (type1 = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES + || !r.require_vector_or_scalar_type (i + 2)) + return error_mark_node; + + auto mode = r.scalar_argument_p (i + 2) ? MODE_n : MODE_none; + if (mode == MODE_none + && !r.require_matching_vector_type (i + 2, i + 1, type1)) + return error_mark_node; + + if ((type_suffixes[type0].element_bits + == 4 * type_suffixes[type1].element_bits) + && type_suffixes[type0].tclass == type_suffixes[type1].tclass) + if (tree res = r.lookup_form (mode, type0)) + return res; + + if (!r.require_nonscalar_type (i + 2)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type0, type1); } }; -SHAPE (ternary_qq_opt_n) +SHAPE (ternary_qq_opt_n_or_011) /* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t) @@ -3467,7 +4167,7 @@ struct unary_def : public overloaded_base<0> build (function_builder &b, const function_group_info &group) const override { b.add_overloaded_functions (group, MODE_none); - build_all (b, "v0,v0", group, MODE_none); + build_all (b, "t0,t0", group, MODE_none); } tree @@ -3488,7 +4188,7 @@ struct unary_convert_def : public overloaded_base<1> build (function_builder &b, const function_group_info &group) const override { b.add_overloaded_functions (group, MODE_none); - build_all (b, "v0,v1", group, MODE_none); + build_all (b, "c0,c1", group, MODE_none); } tree @@ -3529,6 +4229,38 @@ struct unary_convert_narrowt_def : public overloaded_base<1> }; SHAPE (unary_convert_narrowt) +/* sv<t0>x<g0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t) + + where the target type <t0> must be specified explicitly but the + source type <t1> can be inferred. + + Functions with a group suffix are unpredicated. For them: + + - If <t0> is N times wider than <t1>, the return value has N times + more vectors than the argument. + + - If <t1> is N times wider than <t0>, the argument has N times + more vectors than the return type. */ +struct unary_convertxn_def : public unary_convert_def +{ + bool explicit_group_suffix_p () const override { return false; } + + tree + resolve (function_resolver &r) const override + { + if (r.pred != PRED_none) + return unary_convert_def::resolve (r); + + sve_type type; + if (!r.check_num_arguments (1) + || !(type = r.infer_sve_type (0))) + return error_mark_node; + + return r.resolve_conversion (r.mode_suffix_id, type); + } +}; +SHAPE (unary_convertxn) + /* sv<t0>_t svfoo[_t0](sv<t0:half>_t). */ struct unary_long_def : public overloaded_base<0> { @@ -3757,6 +4489,83 @@ struct unary_za_m_def : public overloaded_base<1> }; SHAPE (unary_za_m) +/* void svfoo_t0[_t1]_g(uint32_t, sv<t1>x<g>_t). */ +struct unary_za_slice_def : public overloaded_base<1> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + if (!za_group_is_pure_overload (group)) + build_all (b, "_,su32,t1", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (2) + || !r.require_scalar_type (0, "uint32_t") + || !(type = r.infer_tuple_type (1))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (unary_za_slice) + +/* sv<t0>x<g>_t svfoo[_t0_g](sv<t0>x<g>_t). */ +struct unaryxn_def : public unary_def +{ + bool explicit_group_suffix_p () const override { return false; } + + tree + resolve (function_resolver &r) const override + { + if (r.pred != PRED_none) + return unary_def::resolve (r); + + sve_type type; + if (!r.check_num_arguments (1) + || !(type = r.infer_sve_type (0))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (unaryxn) + +/* void svfoo_t0[_t1_g](uint64_t, uint32_t, sv<t1>x<g>_t). */ +struct write_za_def : public overloaded_base<1> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su64,su32,t1", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (3) + || !r.require_integer_immediate (0) + || !r.require_scalar_type (1, "uint32_t") + || !(type = r.infer_tuple_type (2))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); + } +}; +SHAPE (write_za) + /* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv<t1>_t) where the first two fields form a (ZA tile, slice) pair. */ @@ -3791,4 +4600,28 @@ struct write_za_m_def : public overloaded_base<1> }; SHAPE (write_za_m) +/* void svfoo_t0[_t1_g](uint32_t, sv<t1>x<g>_t). */ +struct write_za_slice_def : public overloaded_base<1> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,t1", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (2) + || !r.require_scalar_type (0, "uint32_t") + || !(type = r.infer_tuple_type (1))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (write_za_slice) + } diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index d64ddca73588510e775459f42e3ba1a0799abe4d..88af62df48ba5e60a311c9f9b6eef7a4cc1ac40a 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -40,6 +40,9 @@ namespace aarch64_sve one in which the argument is the usual vector, and one in which it is replaced by a scalar. + - an "_opt_single" suffix similarly says that the function can take + a vector or tuple argument, with the former having a "_single" suffix. + - "_int" and "_uint" replace the argument's element type with a signed or unsigned integer of the same width. The suffixes above then indicate whether this final argument is or might be a scalar. @@ -75,6 +78,7 @@ namespace aarch64_sve extern const function_shape *const adr_offset; extern const function_shape *const binary; extern const function_shape *const binary_int_opt_n; + extern const function_shape *const binary_int_opt_single_n; extern const function_shape *const binary_lane; extern const function_shape *const binary_long_lane; extern const function_shape *const binary_long_opt_n; @@ -82,9 +86,11 @@ namespace aarch64_sve extern const function_shape *const binary_narrowb_opt_n; extern const function_shape *const binary_narrowt_opt_n; extern const function_shape *const binary_opt_n; + extern const function_shape *const binary_opt_single_n; extern const function_shape *const binary_pred; extern const function_shape *const binary_rotate; extern const function_shape *const binary_scalar; + extern const function_shape *const binary_single; extern const function_shape *const binary_to_uint; extern const function_shape *const binary_uint; extern const function_shape *const binary_uint_n; @@ -95,21 +101,33 @@ namespace aarch64_sve extern const function_shape *const binary_wide_opt_n; extern const function_shape *const binary_za_int_m; extern const function_shape *const binary_za_m; + extern const function_shape *const binary_za_slice_lane; + extern const function_shape *const binary_za_slice_int_opt_single; + extern const function_shape *const binary_za_slice_opt_single; + extern const function_shape *const binary_za_slice_uint_opt_single; extern const function_shape *const binary_za_uint_m; + extern const function_shape *const binaryxn; extern const function_shape *const bool_inherent; + extern const function_shape *const clamp; extern const function_shape *const clast; extern const function_shape *const compare; extern const function_shape *const compare_opt_n; extern const function_shape *const compare_ptr; extern const function_shape *const compare_scalar; + extern const function_shape *const compare_scalar_count; extern const function_shape *const compare_wide_opt_n; extern const function_shape *const count_inherent; extern const function_shape *const count_pat; extern const function_shape *const count_pred; + extern const function_shape *const count_pred_c; extern const function_shape *const count_vector; extern const function_shape *const create; + extern const function_shape *const dot_za_slice_int_lane; + extern const function_shape *const dot_za_slice_lane; + extern const function_shape *const dot_za_slice_uint_lane; extern const function_shape *const dupq; extern const function_shape *const ext; + extern const function_shape *const extract_pred; extern const function_shape *const fold_left; extern const function_shape *const get; extern const function_shape *const inc_dec; @@ -119,7 +137,9 @@ namespace aarch64_sve extern const function_shape *const inherent; extern const function_shape *const inherent_b; extern const function_shape *const inherent_za; + extern const function_shape *const inherent_zt; extern const function_shape *const inherent_mask_za; + extern const function_shape *const ldr_zt; extern const function_shape *const ldr_za; extern const function_shape *const load; extern const function_shape *const load_ext; @@ -132,6 +152,8 @@ namespace aarch64_sve extern const function_shape *const load_gather_vs; extern const function_shape *const load_replicate; extern const function_shape *const load_za; + extern const function_shape *const luti2_lane_zt; + extern const function_shape *const luti4_lane_zt; extern const function_shape *const mmla; extern const function_shape *const pattern_pred; extern const function_shape *const prefetch; @@ -139,10 +161,13 @@ namespace aarch64_sve extern const function_shape *const prefetch_gather_offset; extern const function_shape *const ptest; extern const function_shape *const rdffr; + extern const function_shape *const read_za; extern const function_shape *const read_za_m; + extern const function_shape *const read_za_slice; extern const function_shape *const reduction; extern const function_shape *const reduction_wide; extern const function_shape *const reinterpret; + extern const function_shape *const select_pred; extern const function_shape *const set; extern const function_shape *const setffr; extern const function_shape *const shift_left_imm_long; @@ -150,6 +175,7 @@ namespace aarch64_sve extern const function_shape *const shift_right_imm; extern const function_shape *const shift_right_imm_narrowb; extern const function_shape *const shift_right_imm_narrowt; + extern const function_shape *const shift_right_imm_narrowxn; extern const function_shape *const shift_right_imm_narrowb_to_uint; extern const function_shape *const shift_right_imm_narrowt_to_uint; extern const function_shape *const store; @@ -158,7 +184,9 @@ namespace aarch64_sve extern const function_shape *const store_scatter_offset; extern const function_shape *const store_scatter_offset_restricted; extern const function_shape *const store_za; + extern const function_shape *const storexn; extern const function_shape *const str_za; + extern const function_shape *const str_zt; extern const function_shape *const tbl_tuple; extern const function_shape *const ternary_bfloat; extern const function_shape *const ternary_bfloat_lane; @@ -171,9 +199,9 @@ namespace aarch64_sve extern const function_shape *const ternary_long_lane; extern const function_shape *const ternary_long_opt_n; extern const function_shape *const ternary_opt_n; - extern const function_shape *const ternary_qq_lane; + extern const function_shape *const ternary_qq_or_011_lane; extern const function_shape *const ternary_qq_lane_rotate; - extern const function_shape *const ternary_qq_opt_n; + extern const function_shape *const ternary_qq_opt_n_or_011; extern const function_shape *const ternary_qq_rotate; extern const function_shape *const ternary_rotate; extern const function_shape *const ternary_shift_left_imm; @@ -186,6 +214,7 @@ namespace aarch64_sve extern const function_shape *const unary; extern const function_shape *const unary_convert; extern const function_shape *const unary_convert_narrowt; + extern const function_shape *const unary_convertxn; extern const function_shape *const unary_long; extern const function_shape *const unary_n; extern const function_shape *const unary_narrowb; @@ -198,7 +227,11 @@ namespace aarch64_sve extern const function_shape *const unary_uint; extern const function_shape *const unary_widen; extern const function_shape *const unary_za_m; + extern const function_shape *const unary_za_slice; + extern const function_shape *const unaryxn; + extern const function_shape *const write_za; extern const function_shape *const write_za_m; + extern const function_shape *const write_za_slice; } } diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc index e1df6ce0d30c05df2fa0e06964ace84d73ab22bc..8d06a72f38495892ec5426d61b35762599e0934f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc @@ -47,7 +47,7 @@ using namespace aarch64_sve; namespace { -class load_store_za_base : public function_base +class load_store_za_zt0_base : public function_base { public: tree @@ -66,7 +66,7 @@ public: expand (function_expander &e) const override { auto za_mode = e.vector_mode (0); - auto z_mode = e.vector_mode (1); + auto z_mode = e.tuple_mode (1); auto icode = (za_mode == VNx1TImode ? code_for_aarch64_sme (m_unspec, za_mode, z_mode) : code_for_aarch64_sme (m_unspec, z_mode, z_mode)); @@ -76,11 +76,11 @@ public: int m_unspec; }; -using load_za_base = add_call_properties<load_store_za_base, +using load_za_base = add_call_properties<load_store_za_zt0_base, CP_READ_MEMORY | CP_READ_ZA | CP_WRITE_ZA>; -using store_za_base = add_call_properties<load_store_za_base, +using store_za_base = add_call_properties<load_store_za_zt0_base, CP_WRITE_MEMORY | CP_READ_ZA>; /* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE. @@ -161,6 +161,17 @@ expand_ldr_str_za (function_expander &e, insn_code base_code, } } +/* Use instruction ICODE to expand ZT0 load or store E. */ + +static rtx +expand_ldr_str_zt0 (function_expander &e, insn_code icode) +{ + rtx base = e.convert_to_pmode (e.args[1]); + rtx mem = gen_rtx_MEM (V8DImode, force_reg (Pmode, base)); + e.add_fixed_operand (mem); + return e.generate_insn (icode); +} + /* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec. IS_LOAD is true if E is a load, false if it is a store. */ @@ -309,6 +320,55 @@ public: } }; +class svldr_zt_impl : public load_store_za_zt0_base +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_READ_MEMORY | CP_WRITE_ZT0; + } + + rtx + expand (function_expander &e) const override + { + return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_ldr_zt0); + } +}; + +class svluti_lane_zt_impl : public read_zt0<function_base> +{ +public: + CONSTEXPR svluti_lane_zt_impl (unsigned int bits) : m_bits (bits) {} + + rtx + expand (function_expander &e) const override + { + auto mode = e.tuple_mode (0); + e.args.ordered_remove (0); + return e.use_exact_insn (code_for_aarch64_sme_lut (m_bits, mode)); + } + + unsigned int m_bits; +}; + +class svread_za_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_READ_ZA; + } + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode; + return e.use_exact_insn (code_for_aarch64_sme_read (mode)); + } +}; + using svread_za_tile_impl = add_call_properties<read_write_za_base, CP_READ_ZA>; @@ -337,6 +397,41 @@ public: } }; +class svstr_zt_impl : public load_store_za_zt0_base +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_WRITE_MEMORY | CP_READ_ZT0; + } + + rtx + expand (function_expander &e) const override + { + return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_str_zt0); + } +}; + +class svsudot_za_impl : public read_write_za<function_base> +{ +public: + rtx + expand (function_expander &e) const override + { + if (e.mode_suffix_id == MODE_single) + { + auto icode = code_for_aarch64_sme_single_sudot (e.vector_mode (0), + e.tuple_mode (1)); + return e.use_exact_insn (icode); + } + std::swap (e.args[1], e.args[2]); + return e.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT, + e.vector_mode (0), + e.tuple_mode (1))); + } +}; + class svundef_za_impl : public write_za<function_base> { public: @@ -349,6 +444,24 @@ public: } }; +class svwrite_za_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_WRITE_ZA; + } + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode; + e.args[1] = lowpart_subreg (mode, e.args[1], e.tuple_mode (1)); + return e.use_exact_insn (code_for_aarch64_sme_write (mode)); + } +}; + using svwrite_za_tile_impl = add_call_properties<read_write_za_base, CP_READ_ZA | CP_WRITE_ZA>; @@ -373,40 +486,94 @@ public: } }; +class svzero_zt_impl : public write_zt0<function_base> +{ +public: + rtx + expand (function_expander &) const override + { + emit_insn (gen_aarch64_sme_zero_zt0 ()); + return const0_rtx; + } +}; + } /* end anonymous namespace */ namespace aarch64_sve { FUNCTION (arm_has_sme, arm_has_sme_impl, ) FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, ) +FUNCTION (svadd_za, sme_1mode_function, (UNSPEC_SME_ADD, UNSPEC_SME_ADD, + UNSPEC_SME_FADD)) +FUNCTION (svadd_write_za, sme_1mode_function, (UNSPEC_SME_ADD_WRITE, + UNSPEC_SME_ADD_WRITE, -1)) FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA, UNSPEC_SME_ADDHA, -1)) FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA, UNSPEC_SME_ADDVA, -1)) +FUNCTION (svbmopa_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPA, -1)) +FUNCTION (svbmops_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPS, -1)) FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode)) FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode)) FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode)) FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode)) +FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT, + UNSPEC_SME_FDOT)) +FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT, + UNSPEC_SME_UDOT, + UNSPEC_SME_FDOT)) FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR)) FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER)) FUNCTION (svldr_za, svldr_za_impl, ) +FUNCTION (svldr_zt, svldr_zt_impl, ) +FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2)) +FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4)) +FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA, + UNSPEC_SME_FMLA)) +FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA, + UNSPEC_SME_UMLA, + UNSPEC_SME_FMLA)) +FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS, + UNSPEC_SME_FMLS)) +FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS, + UNSPEC_SME_UMLS, + UNSPEC_SME_FMLS)) FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA, UNSPEC_SME_FMOPA)) FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS, UNSPEC_SME_FMOPS)) +FUNCTION (svread_za, svread_za_impl,) FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR)) FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER)) FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR)) FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER)) FUNCTION (svstr_za, svstr_za_impl, ) +FUNCTION (svstr_zt, svstr_zt_impl, ) +FUNCTION (svsub_za, sme_1mode_function, (UNSPEC_SME_SUB, UNSPEC_SME_SUB, + UNSPEC_SME_FSUB)) +FUNCTION (svsub_write_za, sme_1mode_function, (UNSPEC_SME_SUB_WRITE, + UNSPEC_SME_SUB_WRITE, -1)) +FUNCTION (svsudot_za, svsudot_za_impl,) +FUNCTION (svsudot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUDOT, -1, -1)) +FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUVDOT, + -1, -1)) FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1)) FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1)) FUNCTION (svundef_za, svundef_za_impl, ) +FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1)) +FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1)) +FUNCTION (svusvdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USVDOT, + -1)) FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1)) FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1)) +FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT, + UNSPEC_SME_UVDOT, + UNSPEC_SME_FVDOT)) +FUNCTION (svwrite_za, svwrite_za_impl,) FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR)) FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER)) FUNCTION (svzero_mask_za, svzero_mask_za_impl, ) FUNCTION (svzero_za, svzero_za_impl, ) +FUNCTION (svzero_zt, svzero_zt_impl, ) } /* end namespace aarch64_sve */ diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index 5bdcc93f40f59a357614a34c8da863c65ccbe259..5f76d0014809435ec3e217a95c02aa5d4261cb0e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -74,3 +74,125 @@ DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m) DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m) DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 +DEF_SVE_FUNCTION (svldr_zt, ldr_zt, none, none) +DEF_SVE_FUNCTION (svstr_zt, str_zt, none, none) +DEF_SVE_FUNCTION (svzero_zt, inherent_zt, none, none) +#undef REQUIRED_EXTENSIONS + +/* The d_za entries in this section just declare C _za64 overloads, + which will then be resolved to either an integer function or a + floating-point function. They are needed because the integer and + floating-point functions have different architecture requirements. */ +#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 | AARCH64_FL_SM_ON +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_s_data, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, d_za, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_s_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION (svbmopa, binary_za_m, za_s_unsigned, za_m) +DEF_SME_ZA_FUNCTION (svbmops, binary_za_m, za_s_unsigned, za_m) +DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_h_data, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_s_b_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_h_data, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_b_integer, + vg1x24, none) +DEF_SVE_FUNCTION_GS (svluti2_lane_zt, luti2_lane_zt, bhs_data, x124, none) +DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, bhs_data, x12, none) +DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, hs_data, x4, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_h_data, + vg2, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_b_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_h_data, + vg2, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_s_b_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_h_data, + vg2, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_s_b_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_h_data, + vg2, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_s_b_integer, + vg4, none) +DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_s_h_integer, za_m) +DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_s_h_integer, za_m) +DEF_SME_ZA_FUNCTION_GS (svread, read_za_slice, za_bhsd_data, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svread_hor, read_za, za_bhsd_data, vg24, none) +DEF_SME_ZA_FUNCTION_GS (svread_ver, read_za, za_bhsd_data, vg24, none) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_s_data, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, d_za, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_s_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsudot, binary_za_slice_uint_opt_single, + za_s_b_signed, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsudot_lane, dot_za_slice_uint_lane, + za_s_b_signed, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsuvdot_lane, dot_za_slice_uint_lane, + za_s_b_signed, vg1x4, none) +DEF_SME_ZA_FUNCTION_GS (svusdot, binary_za_slice_int_opt_single, + za_s_b_unsigned, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svusdot_lane, dot_za_slice_int_lane, + za_s_b_unsigned, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svusvdot_lane, dot_za_slice_int_lane, + za_s_b_unsigned, vg1x4, none) +DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_h_data, + vg1x2, none) +DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_s_b_integer, + vg1x4, none) +DEF_SME_ZA_FUNCTION_GS (svwrite, write_za_slice, za_bhsd_data, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svwrite_hor, write_za, za_bhsd_data, vg24, none) +DEF_SME_ZA_FUNCTION_GS (svwrite_ver, write_za, za_bhsd_data, vg24, none) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \ + | AARCH64_FL_SME_I16I64 \ + | AARCH64_FL_SM_ON) +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_integer, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_d_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svdot, binary_za_slice_opt_single, za_d_h_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_d_h_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_h_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_h_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_h_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_h_integer, + vg4, none) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_integer, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsub_write, binary_za_slice_opt_single, za_d_integer, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_d_h_integer, + vg1x4, none) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \ + | AARCH64_FL_SME_F64F64 \ + | AARCH64_FL_SM_ON) +DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_float, vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_d_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_d_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float, + vg1x24, none) +DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h index acfed77006b5ee7eb59e60094ee2d258ea8679d6..69aca0f9a759b9fda3fa2ba4b13466d7f5fac61d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h @@ -26,31 +26,57 @@ namespace aarch64_sve { extern const function_base *const arm_has_sme; extern const function_base *const arm_in_streaming_mode; + extern const function_base *const svadd_za; + extern const function_base *const svadd_write_za; extern const function_base *const svaddha_za; extern const function_base *const svaddva_za; + extern const function_base *const svbmopa_za; + extern const function_base *const svbmops_za; extern const function_base *const svcntsb; extern const function_base *const svcntsd; extern const function_base *const svcntsh; extern const function_base *const svcntsw; + extern const function_base *const svdot_za; + extern const function_base *const svdot_lane_za; extern const function_base *const svld1_hor_za; extern const function_base *const svld1_ver_za; extern const function_base *const svldr_za; + extern const function_base *const svldr_zt; + extern const function_base *const svluti2_lane_zt; + extern const function_base *const svluti4_lane_zt; + extern const function_base *const svmla_za; + extern const function_base *const svmla_lane_za; + extern const function_base *const svmls_za; + extern const function_base *const svmls_lane_za; extern const function_base *const svmopa_za; extern const function_base *const svmops_za; + extern const function_base *const svread_za; extern const function_base *const svread_hor_za; extern const function_base *const svread_ver_za; extern const function_base *const svst1_hor_za; extern const function_base *const svst1_ver_za; extern const function_base *const svstr_za; + extern const function_base *const svstr_zt; + extern const function_base *const svsub_za; + extern const function_base *const svsub_write_za; + extern const function_base *const svsudot_za; + extern const function_base *const svsudot_lane_za; + extern const function_base *const svsuvdot_lane_za; extern const function_base *const svsumopa_za; extern const function_base *const svsumops_za; + extern const function_base *const svusdot_za; + extern const function_base *const svusdot_lane_za; + extern const function_base *const svusvdot_lane_za; extern const function_base *const svusmopa_za; extern const function_base *const svusmops_za; + extern const function_base *const svwrite_za; extern const function_base *const svwrite_hor_za; extern const function_base *const svwrite_ver_za; extern const function_base *const svundef_za; - extern const function_base *const svzero_za; + extern const function_base *const svvdot_lane_za; extern const function_base *const svzero_mask_za; + extern const function_base *const svzero_za; + extern const function_base *const svzero_zt; } } diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 73f9e5a899c823008ec14966d7576881c31679cd..045e0d0d28df3ed6d8a79774195f656f3a292816 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -116,6 +116,39 @@ public: } }; +class svclamp_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + auto mode = e.tuple_mode (0); + insn_code icode; + if (e.type_suffix (0).float_p) + icode = (e.vectors_per_tuple () > 1 + ? code_for_aarch64_sve_fclamp_single (mode) + : code_for_aarch64_sve_fclamp (mode)); + else + { + auto max = e.type_suffix (0).unsigned_p ? UMAX : SMAX; + icode = (e.vectors_per_tuple () > 1 + ? code_for_aarch64_sve_clamp_single (max, mode) + : code_for_aarch64_sve_clamp (max, mode)); + } + return e.use_exact_insn (icode); + } +}; + +class svcvtn_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + return e.use_exact_insn (code_for_aarch64_sve_cvtn (e.result_mode ())); + } +}; + class svldnt1_gather_impl : public full_width_access { public: @@ -188,6 +221,30 @@ public: } }; +class svpext_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + unsigned int bits = e.type_suffix (0).element_bits; + return e.use_exact_insn (e.vectors_per_tuple () == 2 + ? code_for_aarch64_sve_pextx2 (bits) + : code_for_aarch64_sve_pext (bits)); + } +}; + +class svpsel_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + unsigned int bits = e.type_suffix (0).element_bits; + return e.use_exact_insn (code_for_aarch64_sve_psel (bits)); + } +}; + class svqcadd_impl : public function_base { public: @@ -255,8 +312,9 @@ public: /* The saturation has no effect, and [SU]RSHL has immediate forms that we can use for sensible shift amounts. */ function_instance instance ("svrshl", functions::svrshl, - shapes::binary_int_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + shapes::binary_int_opt_single_n, + MODE_n, f.type_suffix_ids, GROUP_none, + f.pred); return f.redirect_call (instance); } } @@ -309,6 +367,9 @@ public: gimple * fold (gimple_folder &f) const override { + if (f.vectors_per_tuple () > 1) + return nullptr; + if (tree amount = uniform_integer_cst_p (gimple_call_arg (f.call, 2))) { if (wi::to_widest (amount) >= 0) @@ -349,7 +410,7 @@ public: machine_mode mode = e.vector_mode (0); if (e.pred == PRED_x && aarch64_sve_sqadd_sqsub_immediate_p (mode, e.args[2], false)) - return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1); + return e.map_to_rtx_codes (UNKNOWN, US_PLUS, -1, -1); return e.map_to_unspecs (-1, UNSPEC_USQADD, -1); } }; @@ -412,6 +473,19 @@ public: } }; +class svunpk_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + optab op = (e.type_suffix (0).unsigned_p ? zext_optab : sext_optab); + insn_code icode = convert_optab_handler (op, e.result_mode (), + GET_MODE (e.args[0])); + return e.use_exact_insn (icode); + } +}; + class svuqadd_impl : public function_base { public: @@ -474,13 +548,21 @@ FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc)) FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),) FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1)) FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1)) +FUNCTION (svbfmlslb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlslbvnx4sf)) +FUNCTION (svbfmlslb_lane, fixed_insn_function, + (CODE_FOR_aarch64_sve_bfmlslb_lanevnx4sf)) +FUNCTION (svbfmlslt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlsltvnx4sf)) +FUNCTION (svbfmlslt_lane, fixed_insn_function, + (CODE_FOR_aarch64_sve_bfmlslt_lanevnx4sf)) FUNCTION (svbgrp, unspec_based_function, (UNSPEC_BGRP, UNSPEC_BGRP, -1)) FUNCTION (svbsl, CODE_FOR_MODE0 (aarch64_sve2_bsl),) FUNCTION (svbsl1n, CODE_FOR_MODE0 (aarch64_sve2_bsl1n),) FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),) FUNCTION (svcdot, svcdot_impl,) FUNCTION (svcdot_lane, svcdot_lane_impl,) +FUNCTION (svclamp, svclamp_impl,) FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT)) +FUNCTION (svcvtn, svcvtn_impl,) FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX)) FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),) FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),) @@ -537,13 +619,19 @@ FUNCTION (svmullt_lane, unspec_based_lane_function, (UNSPEC_SMULLT, UNSPEC_UMULLT, -1)) FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),) FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH)) +FUNCTION (svpext, svpext_impl,) FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),) FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1)) FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1)) FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1)) FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1)) +FUNCTION (svpsel, svpsel_impl,) FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN)) FUNCTION (svqcadd, svqcadd_impl,) +FUNCTION (svqcvt, integer_conversion, (UNSPEC_SQCVT, UNSPEC_SQCVTU, + UNSPEC_UQCVT, -1)) +FUNCTION (svqcvtn, integer_conversion, (UNSPEC_SQCVTN, UNSPEC_SQCVTUN, + UNSPEC_UQCVTN, -1)) FUNCTION (svqdmlalb, unspec_based_qadd_function, (UNSPEC_SQDMULLB, -1, -1)) FUNCTION (svqdmlalb_lane, unspec_based_qadd_lane_function, (UNSPEC_SQDMULLB, -1, -1)) @@ -579,10 +667,16 @@ FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1)) FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH, -1, -1)) FUNCTION (svqrshl, svqrshl_impl,) +FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR, + UNSPEC_UQRSHR, -1, 1)) +FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN, + UNSPEC_UQRSHRN, -1, 1)) FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB, UNSPEC_UQRSHRNB, -1)) FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT, UNSPEC_UQRSHRNT, -1)) +FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1)) +FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1)) FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1)) FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1)) FUNCTION (svqshl, svqshl_impl,) @@ -603,6 +697,8 @@ FUNCTION (svraddhnb, unspec_based_function, (UNSPEC_RADDHNB, FUNCTION (svraddhnt, unspec_based_function, (UNSPEC_RADDHNT, UNSPEC_RADDHNT, -1)) FUNCTION (svrax1, fixed_insn_function, (CODE_FOR_aarch64_sve2_rax1)) +FUNCTION (svrevd, unspec_based_function, (UNSPEC_REVD, UNSPEC_REVD, + UNSPEC_REVD)) FUNCTION (svrhadd, unspec_based_function, (UNSPEC_SRHADD, UNSPEC_URHADD, -1)) FUNCTION (svrshl, svrshl_impl,) FUNCTION (svrshr, unspec_based_function, (UNSPEC_SRSHR, UNSPEC_URSHR, -1)) @@ -639,7 +735,12 @@ FUNCTION (svsubwb, unspec_based_function, (UNSPEC_SSUBWB, UNSPEC_USUBWB, -1)) FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1)) FUNCTION (svtbl2, svtbl2_impl,) FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),) +FUNCTION (svunpk, svunpk_impl,) FUNCTION (svuqadd, svuqadd_impl,) +FUNCTION (svuzp, multireg_permute, (UNSPEC_UZP)) +FUNCTION (svuzpq, multireg_permute, (UNSPEC_UZPQ)) +FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP)) +FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ)) FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS)) FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI)) FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 4aac1ac942abab208aecff66196f80d746968c1b..f37a5cc6b684b5ed52ee932e5c5bdf4ba92d7df4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -94,7 +94,7 @@ DEF_SVE_FUNCTION (svqdmlslb_lane, ternary_long_lane, sd_signed, none) DEF_SVE_FUNCTION (svqdmlslbt, ternary_long_opt_n, hsd_signed, none) DEF_SVE_FUNCTION (svqdmlslt, ternary_long_opt_n, hsd_signed, none) DEF_SVE_FUNCTION (svqdmlslt_lane, ternary_long_lane, sd_signed, none) -DEF_SVE_FUNCTION (svqdmulh, binary_opt_n, all_signed, none) +DEF_SVE_FUNCTION (svqdmulh, binary_opt_single_n, all_signed, none) DEF_SVE_FUNCTION (svqdmulh_lane, binary_lane, hsd_signed, none) DEF_SVE_FUNCTION (svqdmullb, binary_long_opt_n, hsd_signed, none) DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none) @@ -131,7 +131,7 @@ DEF_SVE_FUNCTION (svraddhnt, binary_narrowt_opt_n, hsd_integer, none) DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz) DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz) DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz) -DEF_SVE_FUNCTION (svrshl, binary_int_opt_n, all_integer, mxz) +DEF_SVE_FUNCTION (svrshl, binary_int_opt_single_n, all_integer, mxz) DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz) DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none) DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none) @@ -229,3 +229,73 @@ DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ + | AARCH64_FL_SME \ + | AARCH64_FL_SM_ON) +DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none) +DEF_SVE_FUNCTION (svpsel, select_pred, all_pred_count, none) +DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ + | AARCH64_FL_SME2 \ + | AARCH64_FL_SM_ON) +DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none) +DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none) +DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none) +DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none) +DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none) +DEF_SVE_FUNCTION (svclamp, clamp, all_float, none) +DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none) +DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none) +DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none) +DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none) +DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none) +DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none) +DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none) +DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit) +DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit) +DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none) +DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none) +DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none) +DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none) +DEF_SVE_FUNCTION_GS (svpext, extract_pred, all_count, x12, none) +DEF_SVE_FUNCTION (svptrue, inherent, all_count, none) +DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none) +DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none) +DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none) +DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none) +DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none) +DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none) +DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none) +DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none) +DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none) +DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none) +DEF_SVE_FUNCTION_GS (svrintn, unaryxn, s_float, x24, none) +DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none) +DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none) +DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit) +DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit) +DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit) +DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none) +DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none) +DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none) +DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none) +DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none) +DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none) +DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none) +DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none) +DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none) +DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none) +DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none) +DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none) +DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index 1cd4477acb3c7faa2ce18a4e184b7ec8c0d034d1..24ee612536912aaaa5bab8b3a0b48d68fd1e9ed8 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -47,13 +47,20 @@ namespace aarch64_sve extern const function_base *const svbcax; extern const function_base *const svbdep; extern const function_base *const svbext; + extern const function_base *const svbfmlslb; + extern const function_base *const svbfmlslb_lane; + extern const function_base *const svbfmlslt; + extern const function_base *const svbfmlslt_lane; extern const function_base *const svbgrp; extern const function_base *const svbsl; extern const function_base *const svbsl1n; extern const function_base *const svbsl2n; extern const function_base *const svcdot; extern const function_base *const svcdot_lane; + extern const function_base *const svclamp; + extern const function_base *const svcntp; extern const function_base *const svcvtlt; + extern const function_base *const svcvtn; extern const function_base *const svcvtx; extern const function_base *const svcvtxnt; extern const function_base *const sveor3; @@ -93,13 +100,17 @@ namespace aarch64_sve extern const function_base *const svmullt_lane; extern const function_base *const svnbsl; extern const function_base *const svnmatch; + extern const function_base *const svpext; extern const function_base *const svpmul; extern const function_base *const svpmullb; extern const function_base *const svpmullb_pair; extern const function_base *const svpmullt; extern const function_base *const svpmullt_pair; + extern const function_base *const svpsel; extern const function_base *const svqabs; extern const function_base *const svqcadd; + extern const function_base *const svqcvt; + extern const function_base *const svqcvtn; extern const function_base *const svqdmlalb; extern const function_base *const svqdmlalb_lane; extern const function_base *const svqdmlalbt; @@ -126,8 +137,12 @@ namespace aarch64_sve extern const function_base *const svqrdmlsh; extern const function_base *const svqrdmlsh_lane; extern const function_base *const svqrshl; + extern const function_base *const svqrshr; + extern const function_base *const svqrshrn; extern const function_base *const svqrshrnb; extern const function_base *const svqrshrnt; + extern const function_base *const svqrshru; + extern const function_base *const svqrshrun; extern const function_base *const svqrshrunb; extern const function_base *const svqrshrunt; extern const function_base *const svqshl; @@ -144,6 +159,7 @@ namespace aarch64_sve extern const function_base *const svraddhnb; extern const function_base *const svraddhnt; extern const function_base *const svrax1; + extern const function_base *const svrevd; extern const function_base *const svrhadd; extern const function_base *const svrshl; extern const function_base *const svrshr; @@ -178,7 +194,12 @@ namespace aarch64_sve extern const function_base *const svsubwt; extern const function_base *const svtbl2; extern const function_base *const svtbx; + extern const function_base *const svunpk; extern const function_base *const svuqadd; + extern const function_base *const svuzp; + extern const function_base *const svuzpq; + extern const function_base *const svzip; + extern const function_base *const svzipq; extern const function_base *const svwhilege; extern const function_base *const svwhilegt; extern const function_base *const svwhilerw; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 7e4b9e67ed800b3e3861b44e22f6f79f87b0397e..15fa5907de5fee96602ee4d526d1e129cf8b8115 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -184,6 +184,16 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_all_pred(S, D) \ S (b8), S (b16), S (b32), S (b64) +/* _c8 _c16 _c32 _c64. */ +#define TYPES_all_count(S, D) \ + S (c8), S (c16), S (c32), S (c64) + +/* _b8 _b16 _b32 _b64 + _c8 _c16 _c32 _c64. */ +#define TYPES_all_pred_count(S, D) \ + TYPES_all_pred (S, D), \ + TYPES_all_count (S, D) + /* _f16 _f32 _f64. */ #define TYPES_all_float(S, D) \ S (f16), S (f32), S (f64) @@ -223,6 +233,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_b(S, D) \ S (b) +/* _c only. */ +#define TYPES_c(S, D) \ + S (c) + /* _u8. */ #define TYPES_b_unsigned(S, D) \ S (u8) @@ -254,6 +268,19 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_bhs_integer(S, D) \ TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D) +/* _bf16 + _f16 _f32 + _s8 _s16 _s32 + _u8 _u16 _u32. */ +#define TYPES_bhs_data(S, D) \ + S (bf16), S (f16), S (f32), TYPES_bhs_integer (S, D) + +/* _s16_s8 _s32_s16 _s64_s32 + _u16_u8 _u32_u16 _u64_u32. */ +#define TYPES_bhs_widen(S, D) \ + D (s16, s8), D (s32, s16), D (s64, s32), \ + D (u16, u8), D (u32, u16), D (u64, u32) + /* _s16 _u16. */ #define TYPES_h_integer(S, D) \ @@ -272,6 +299,13 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_hs_float(S, D) \ S (f16), S (f32) +/* _bf16 + _f16 _f32 + _s16 _s32 + _u16 _u32. */ +#define TYPES_hs_data(S, D) \ + S (bf16), S (f16), S (f32), TYPES_hs_integer (S, D) + /* _u16 _u64. */ #define TYPES_hd_unsigned(S, D) \ S (u16), S (u64) @@ -383,6 +417,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_cvt_bfloat(S, D) \ D (bf16, f32) +/* { _bf16 _f16 } x _f32. */ +#define TYPES_cvt_h_s_float(S, D) \ + D (bf16, f32), D (f16, f32) + /* _f32_f16 _f64_f32. */ #define TYPES_cvt_long(S, D) \ @@ -397,6 +435,15 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_cvt_narrow(S, D) \ D (f16, f32), TYPES_cvt_narrow_s (S, D) +/* { _s32 _u32 } x _f32 + + _f32 x { _s32 _u32 }. */ +#define TYPES_cvt_s_s(S, D) \ + D (s32, f32), \ + D (u32, f32), \ + D (f32, s32), \ + D (f32, u32) + /* { _s32 _s64 } x { _b8 _b16 _b32 _b64 } { _u32 _u64 }. */ #define TYPES_inc_dec_n1(D, A) \ @@ -407,6 +454,55 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_inc_dec_n1 (D, u32), \ TYPES_inc_dec_n1 (D, u64) +/* { _s16 _u16 } x _s32 + + { _u16 } x _u32. */ +#define TYPES_qcvt_x2(S, D) \ + D (s16, s32), \ + D (u16, u32), \ + D (u16, s32) + +/* { _s8 _u8 } x _s32 + + { _u8 } x _u32 + + { _s16 _u16 } x _s64 + + { _u16 } x _u64. */ +#define TYPES_qcvt_x4(S, D) \ + D (s8, s32), \ + D (u8, u32), \ + D (u8, s32), \ + D (s16, s64), \ + D (u16, u64), \ + D (u16, s64) + +/* _s16_s32 + _u16_u32. */ +#define TYPES_qrshr_x2(S, D) \ + D (s16, s32), \ + D (u16, u32) + +/* _u16_s32. */ +#define TYPES_qrshru_x2(S, D) \ + D (u16, s32) + +/* _s8_s32 + _s16_s64 + _u8_u32 + _u16_u64. */ +#define TYPES_qrshr_x4(S, D) \ + D (s8, s32), \ + D (s16, s64), \ + D (u8, u32), \ + D (u16, u64) + +/* _u8_s32 + _u16_s64. */ +#define TYPES_qrshru_x4(S, D) \ + D (u8, s32), \ + D (u16, s64) + /* { _bf16 } { _bf16 } { _f16 _f32 _f64 } { _f16 _f32 _f64 } { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 } @@ -446,6 +542,28 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_while1 (D, b32), \ TYPES_while1 (D, b64) +/* { _b8 _b16 _b32 _b64 } x { _s64 } + { _u64 } */ +#define TYPES_while_x(S, D) \ + D (b8, s64), D (b8, u64), \ + D (b16, s64), D (b16, u64), \ + D (b32, s64), D (b32, u64), \ + D (b64, s64), D (b64, u64) + +/* { _c8 _c16 _c32 _c64 } x { _s64 } + { _u64 } */ +#define TYPES_while_x_c(S, D) \ + D (c8, s64), D (c8, u64), \ + D (c16, s64), D (c16, u64), \ + D (c32, s64), D (c32, u64), \ + D (c64, s64), D (c64, u64) + +/* _f32_f16 + _s32_s16 + _u32_u16. */ +#define TYPES_s_narrow_fsu(S, D) \ + D (f32, f16), D (s32, s16), D (u32, u16) + /* _za8 _za16 _za32 _za64 _za128. */ #define TYPES_all_za(S, D) \ S (za8), S (za16), S (za32), S (za64), S (za128) @@ -478,10 +596,45 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_za_bhsd_data (S, D), \ TYPES_reinterpret1 (D, za128) +/* _za32_s8. */ +#define TYPES_za_s_b_signed(S, D) \ + D (za32, s8) + +/* _za32_u8. */ +#define TYPES_za_s_b_unsigned(S, D) \ + D (za32, u8) + +/* _za32 x { _s8 _u8 }. */ +#define TYPES_za_s_b_integer(S, D) \ + D (za32, s8), D (za32, u8) + +/* _za32 x { _s16 _u16 }. */ +#define TYPES_za_s_h_integer(S, D) \ + D (za32, s16), D (za32, u16) + +/* _za32 x { _bf16 _f16 _s16 _u16 }. */ +#define TYPES_za_s_h_data(S, D) \ + D (za32, bf16), D (za32, f16), D (za32, s16), D (za32, u16) + +/* _za32_u32. */ +#define TYPES_za_s_unsigned(S, D) \ + D (za32, u32) + /* _za32 x { _s32 _u32 }. */ #define TYPES_za_s_integer(S, D) \ D (za32, s32), D (za32, u32) +/* _za32_f32. */ +#define TYPES_za_s_float(S, D) \ + D (za32, f32) + +/* _za32 x { _f32 _s32 _u32 }. */ +#define TYPES_za_s_data(S, D) \ + D (za32, f32), D (za32, s32), D (za32, u32) + +/* _za64 x { _s16 _u16 }. */ +#define TYPES_za_d_h_integer(S, D) \ + D (za64, s16), D (za64, u16) /* _za64_f64. */ #define TYPES_za_d_float(S, D) \ @@ -541,6 +694,8 @@ static const type_suffix_pair types_none[] = { /* Create an array for each TYPES_<combination> macro above. */ DEF_SVE_TYPES_ARRAY (all_pred); +DEF_SVE_TYPES_ARRAY (all_count); +DEF_SVE_TYPES_ARRAY (all_pred_count); DEF_SVE_TYPES_ARRAY (all_float); DEF_SVE_TYPES_ARRAY (all_signed); DEF_SVE_TYPES_ARRAY (all_float_and_signed); @@ -556,10 +711,14 @@ DEF_SVE_TYPES_ARRAY (bs_unsigned); DEF_SVE_TYPES_ARRAY (bhs_signed); DEF_SVE_TYPES_ARRAY (bhs_unsigned); DEF_SVE_TYPES_ARRAY (bhs_integer); +DEF_SVE_TYPES_ARRAY (bhs_data); +DEF_SVE_TYPES_ARRAY (bhs_widen); +DEF_SVE_TYPES_ARRAY (c); DEF_SVE_TYPES_ARRAY (h_integer); DEF_SVE_TYPES_ARRAY (hs_signed); DEF_SVE_TYPES_ARRAY (hs_integer); DEF_SVE_TYPES_ARRAY (hs_float); +DEF_SVE_TYPES_ARRAY (hs_data); DEF_SVE_TYPES_ARRAY (hd_unsigned); DEF_SVE_TYPES_ARRAY (hsd_signed); DEF_SVE_TYPES_ARRAY (hsd_integer); @@ -580,17 +739,38 @@ DEF_SVE_TYPES_ARRAY (d_integer); DEF_SVE_TYPES_ARRAY (d_data); DEF_SVE_TYPES_ARRAY (cvt); DEF_SVE_TYPES_ARRAY (cvt_bfloat); +DEF_SVE_TYPES_ARRAY (cvt_h_s_float); DEF_SVE_TYPES_ARRAY (cvt_long); DEF_SVE_TYPES_ARRAY (cvt_narrow_s); DEF_SVE_TYPES_ARRAY (cvt_narrow); +DEF_SVE_TYPES_ARRAY (cvt_s_s); DEF_SVE_TYPES_ARRAY (inc_dec_n); +DEF_SVE_TYPES_ARRAY (qcvt_x2); +DEF_SVE_TYPES_ARRAY (qcvt_x4); +DEF_SVE_TYPES_ARRAY (qrshr_x2); +DEF_SVE_TYPES_ARRAY (qrshr_x4); +DEF_SVE_TYPES_ARRAY (qrshru_x2); +DEF_SVE_TYPES_ARRAY (qrshru_x4); DEF_SVE_TYPES_ARRAY (reinterpret); DEF_SVE_TYPES_ARRAY (reinterpret_b); DEF_SVE_TYPES_ARRAY (while); +DEF_SVE_TYPES_ARRAY (while_x); +DEF_SVE_TYPES_ARRAY (while_x_c); +DEF_SVE_TYPES_ARRAY (s_narrow_fsu); DEF_SVE_TYPES_ARRAY (all_za); DEF_SVE_TYPES_ARRAY (d_za); +DEF_SVE_TYPES_ARRAY (za_bhsd_data); DEF_SVE_TYPES_ARRAY (za_all_data); +DEF_SVE_TYPES_ARRAY (za_s_b_signed); +DEF_SVE_TYPES_ARRAY (za_s_b_unsigned); +DEF_SVE_TYPES_ARRAY (za_s_b_integer); +DEF_SVE_TYPES_ARRAY (za_s_h_integer); +DEF_SVE_TYPES_ARRAY (za_s_h_data); +DEF_SVE_TYPES_ARRAY (za_s_unsigned); DEF_SVE_TYPES_ARRAY (za_s_integer); +DEF_SVE_TYPES_ARRAY (za_s_float); +DEF_SVE_TYPES_ARRAY (za_s_data); +DEF_SVE_TYPES_ARRAY (za_d_h_integer); DEF_SVE_TYPES_ARRAY (za_d_float); DEF_SVE_TYPES_ARRAY (za_d_integer); DEF_SVE_TYPES_ARRAY (mop_base); @@ -605,10 +785,50 @@ static const group_suffix_index groups_none[] = { GROUP_none, NUM_GROUP_SUFFIXES }; +static const group_suffix_index groups_x2[] = { GROUP_x2, NUM_GROUP_SUFFIXES }; + +static const group_suffix_index groups_x12[] = { + GROUP_none, GROUP_x2, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_x4[] = { GROUP_x4, NUM_GROUP_SUFFIXES }; + +static const group_suffix_index groups_x24[] = { + GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_x124[] = { + GROUP_none, GROUP_x2, GROUP_x4, NUM_GROUP_SUFFIXES +}; + static const group_suffix_index groups_x1234[] = { GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES }; +static const group_suffix_index groups_vg1x2[] = { + GROUP_vg1x2, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_vg1x4[] = { + GROUP_vg1x4, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_vg1x24[] = { + GROUP_vg1x2, GROUP_vg1x4, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_vg2[] = { + GROUP_vg2x1, GROUP_vg2x2, GROUP_vg2x4, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_vg4[] = { + GROUP_vg4x1, GROUP_vg4x2, GROUP_vg4x4, NUM_GROUP_SUFFIXES +}; + +static const group_suffix_index groups_vg24[] = { + GROUP_vg2, GROUP_vg4, NUM_GROUP_SUFFIXES +}; + /* Used by functions that have no governing predicate. */ static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; @@ -1007,7 +1227,7 @@ function_instance::reads_global_state_p () const return true; /* Handle direct reads of global state. */ - return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA); + return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA | CP_READ_ZT0); } /* Return true if calls to the function could modify some form of @@ -1028,7 +1248,7 @@ function_instance::modifies_global_state_p () const return true; /* Handle direct modifications of global state. */ - return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA); + return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA | CP_WRITE_ZT0); } /* Return true if calls to the function could raise a signal. */ @@ -1183,7 +1403,8 @@ add_shared_state_attribute (const char *name, bool is_in, bool is_out, }; static state_flag_info state_flags[] = { - { "za", CP_READ_ZA, CP_WRITE_ZA } + { "za", CP_READ_ZA, CP_WRITE_ZA }, + { "zt0", CP_READ_ZT0, CP_WRITE_ZT0 } }; tree args = NULL_TREE; @@ -1379,6 +1600,10 @@ function_builder::add_overloaded_functions (const function_group_info &group, auto add_group_suffix = [&](group_suffix_index group_suffix_id, unsigned int pi) { + if (mode == MODE_single + && group_suffixes[group_suffix_id].vectors_per_tuple == 1) + return; + if (!explicit_type0 && !explicit_type1) /* Deal with the common case in which there is one overloaded function for all type combinations. */ @@ -1482,6 +1707,48 @@ function_resolver::report_incorrect_num_vectors (unsigned int argno, argno + 1, fndecl, num_vectors); } +/* Report that arguments FIRST_ARGNO and ARGNO have different numbers + of vectors, but are required to have the same number of vectors. + FIRST_TYPE and TYPE are the types that arguments FIRST_ARGNO and + ARGNO actually have. */ +void +function_resolver::report_mismatched_num_vectors (unsigned int first_argno, + sve_type first_type, + unsigned int argno, + sve_type type) +{ + /* If the tuple size is implied by the group suffix, and if the first + type had the right number of vectors, treat argument ARGNO as being + individually wrong, rather than wrong in relation to FIRST_ARGNO. */ + if (group_suffix_id != GROUP_none + && first_type.num_vectors == vectors_per_tuple ()) + { + report_incorrect_num_vectors (argno, type, first_type.num_vectors); + return; + } + + /* Make sure that FIRST_TYPE itself is sensible before using it + as a basis for an error message. */ + if (resolve_to (mode_suffix_id, first_type) == error_mark_node) + return; + + if (type.num_vectors != 1 && first_type.num_vectors == 1) + error_at (location, "passing tuple %qT to argument %d of %qE after" + " passing single vector %qT to argument %d", + get_vector_type (type), argno + 1, fndecl, + get_vector_type (first_type), first_argno + 1); + else if (type.num_vectors == 1 && first_type.num_vectors != 1) + error_at (location, "passing single vector %qT to argument %d" + " of %qE after passing tuple %qT to argument %d", + get_vector_type (type), argno + 1, fndecl, + get_vector_type (first_type), first_argno + 1); + else + error_at (location, "passing mismatched tuple types %qT and %qT" + " to arguments %d and %d of %qE", + get_vector_type (first_type), get_vector_type (type), + first_argno + 1, argno + 1, fndecl); +} + /* Report that the function has no form that takes type TYPE. Return error_mark_node. */ tree @@ -1548,8 +1815,9 @@ function_resolver::resolve_to (mode_suffix_index mode, return report_no_such_form (type0); if (type0 == type_suffix_ids[0]) return report_no_such_form (type1); - /* To be filled in when we have other cases. */ - gcc_unreachable (); + error_at (location, "%qE has no form that takes %qT and %qT arguments", + fndecl, get_vector_type (type0), get_vector_type (type1)); + return error_mark_node; } return res; } @@ -1567,6 +1835,54 @@ function_resolver::resolve_to (mode_suffix_index mode, sve_type type) return report_no_such_form (type); } +/* Like resolve_to, but used for a conversion function with the following + properties: + + - The function has an explicit first type suffix. + - The elements of the argument (which has type TYPE) might be narrower + or wider than the elements of the return type. + - The return type has enough vectors to represent the converted value + of every element. + - The group suffix describes the wider of the argument type and the + return type. */ +tree +function_resolver::resolve_conversion (mode_suffix_index mode, sve_type type) +{ + auto ret_type = type_suffix_ids[0]; + unsigned int num_ret_vectors = (type.num_vectors + * type_suffixes[ret_type].element_bits + / type_suffixes[type.type].element_bits); + if (num_ret_vectors == 1 + || num_ret_vectors == 2 + || num_ret_vectors == 4) + { + unsigned int num_vectors = MAX (num_ret_vectors, type.num_vectors); + if (tree res = lookup_form (mode, { type.type, num_vectors })) + return res; + } + return report_no_such_form (type); +} + +/* Require argument ARGNO to be an svbool_t or svcount_t predicate. + Return its type on success, otherwise report an error and return + NUM_VECTOR_TYPES. */ +vector_type_index +function_resolver::infer_predicate_type (unsigned int argno) +{ + tree actual = get_argument_type (argno); + if (actual == error_mark_node) + return NUM_VECTOR_TYPES; + + for (auto index : { VECTOR_TYPE_svbool_t, VECTOR_TYPE_svcount_t }) + if (matches_type_p (acle_vector_types[0][index], actual)) + return index; + + error_at (location, "passing %qT to argument %d of %qE, which expects" + " an %qs or %qs", actual, argno + 1, fndecl, "svbool_t", + "svcount_t"); + return NUM_VECTOR_TYPES; +} + /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. Return the associated type suffix on success, otherwise report an error and return NUM_TYPE_SUFFIXES. */ @@ -1597,6 +1913,50 @@ function_resolver::infer_integer_scalar_type (unsigned int argno) return NUM_TYPE_SUFFIXES; } +/* Return arguments ARGNO and ARGNO + 1 to be 64-bit scalar integers + of the same signedness, or be a combination that converts unambiguously + to such a pair. Return the associated type suffix if they are, + otherwise report an error and return NUM_TYPE_SUFFIXES. */ +type_suffix_index +function_resolver::infer_64bit_scalar_integer_pair (unsigned int argno) +{ + /* Require two scalar integers, with one having 64 bits and the other + one being no bigger. */ + tree types[] = { get_argument_type (argno), get_argument_type (argno + 1) }; + if (!INTEGRAL_TYPE_P (types[0]) + || !INTEGRAL_TYPE_P (types[1]) + || MAX (TYPE_PRECISION (types[0]), TYPE_PRECISION (types[1])) != 64) + { + error_at (location, "passing %qT and %qT to arguments %d and %d of %qE," + " which expects a pair of 64-bit integers", types[0], types[1], + argno + 1, argno + 2, fndecl); + return NUM_TYPE_SUFFIXES; + } + + /* Allow signed integers smaller than int64_t to be paired with an int64_t. + Allow unsigned integers smaller than uint64_t to be paired with any + 64-bit integer. */ + for (int i = 0; i < 2; ++i) + { + if (TYPE_PRECISION (types[i]) != 64) + continue; + + if (TYPE_UNSIGNED (types[1 - i]) != TYPE_UNSIGNED (types[i])) + { + if (TYPE_PRECISION (types[1 - i]) == 64) + continue; + if (!TYPE_UNSIGNED (types[1 - i])) + continue; + } + return TYPE_UNSIGNED (types[i]) ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64; + } + + error_at (location, "passing mismatched integer types %qT and %qT" + " to arguments %d and %d of %qE", types[0], types[1], + argno + 1, argno + 2, fndecl); + return NUM_TYPE_SUFFIXES; +} + /* Require argument ARGNO to be a pointer to a scalar type that has a corresponding type suffix. Return that type suffix on success, otherwise report an error and return NUM_TYPE_SUFFIXES. @@ -1783,6 +2143,37 @@ function_resolver::infer_tuple_type (unsigned int argno) return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); } +/* PRED_TYPE is the type of a governing predicate argument and DATA_TYPE + is the type of an argument that it predicates. Require the two types + to "agree": svcount_t must be used for multiple vectors and svbool_t + for single vectors. + + Return true if they do agree, otherwise report an error and + return false. */ +bool function_resolver:: +require_matching_predicate_type (vector_type_index pred_type, + sve_type data_type) +{ + if (pred_type == VECTOR_TYPE_svbool_t && data_type.num_vectors == 1) + return true; + + if (pred_type == VECTOR_TYPE_svcount_t && data_type.num_vectors != 1) + return true; + + /* Make sure that FIRST_TYPE itself is sensible before using it + as a basis for an error message. */ + if (resolve_to (mode_suffix_id, data_type) == error_mark_node) + return false; + + if (data_type.num_vectors > 1) + error_at (location, "operations on multiple vectors must be predicated" + " by %qs rather than %qs", "svcount_t", "svbool_t"); + else + error_at (location, "operations on single vectors must be predicated" + " by %qs rather than %qs", "svbool_t", "svcount_t"); + return false; +} + /* Require argument ARGNO to be a vector or scalar argument. Return true if it is, otherwise report an appropriate error. */ bool @@ -1835,6 +2226,12 @@ function_resolver::require_matching_vector_type (unsigned int argno, if (!new_type) return false; + if (type.num_vectors != new_type.num_vectors) + { + report_mismatched_num_vectors (first_argno, type, argno, new_type); + return false; + } + if (type != new_type) { error_at (location, "passing %qT to argument %d of %qE, but" @@ -1846,7 +2243,8 @@ function_resolver::require_matching_vector_type (unsigned int argno, return true; } -/* Require argument ARGNO to be a vector type with the following properties: +/* Require argument ARGNO to be a vector or tuple type with the following + properties: - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself. @@ -1858,6 +2256,9 @@ function_resolver::require_matching_vector_type (unsigned int argno, - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE - EXPECTED_BITS itself otherwise + - the number of vectors must be the same as FIRST_TYPE's if + EXPECTED_NUM_VECTORS is zero, otherwise it must be EXPECTED_NUM_VECTORS. + Return true if the argument has the required type, otherwise report an appropriate error. @@ -1877,13 +2278,14 @@ require_derived_vector_type (unsigned int argno, unsigned int first_argno, sve_type first_type, type_class_index expected_tclass, - unsigned int expected_bits) + unsigned int expected_bits, + unsigned int expected_num_vectors) { /* If the type needs to match FIRST_ARGNO exactly, use the preferred error message for that case. */ - if (first_type.num_vectors == 1 - && expected_tclass == SAME_TYPE_CLASS - && expected_bits == SAME_SIZE) + if (expected_tclass == SAME_TYPE_CLASS + && expected_bits == SAME_SIZE + && expected_num_vectors == 0) { /* There's no need to resolve this case out of order. */ gcc_assert (argno > first_argno); @@ -1904,10 +2306,15 @@ require_derived_vector_type (unsigned int argno, else if (expected_bits == QUARTER_SIZE) expected_bits = first_type_suffix.element_bits / 4; + unsigned int orig_expected_num_vectors = expected_num_vectors; + if (expected_num_vectors == 0) + expected_num_vectors = first_type.num_vectors; + /* If the expected type doesn't depend on FIRST_TYPE at all, just check for the fixed choice of vector type. */ if (expected_tclass == orig_expected_tclass - && expected_bits == orig_expected_bits) + && expected_bits == orig_expected_bits + && orig_expected_num_vectors == 1) { const type_suffix_info &expected_suffix = type_suffixes[find_type_suffix (expected_tclass, expected_bits)]; @@ -1916,20 +2323,37 @@ require_derived_vector_type (unsigned int argno, /* Require the argument to be some form of SVE vector type, without being specific about the type of vector we want. */ - sve_type actual_type = infer_vector_type (argno); + sve_type actual_type = infer_sve_type (argno); if (!actual_type) return false; + if (actual_type.num_vectors != expected_num_vectors) + { + if (orig_expected_num_vectors == 0) + report_mismatched_num_vectors (first_argno, first_type, + argno, actual_type); + else + report_incorrect_num_vectors (argno, actual_type, + expected_num_vectors); + return false; + } + if (orig_expected_tclass == SAME_TYPE_CLASS && orig_expected_bits == SAME_SIZE) { if (actual_type.type == first_type.type) return true; - error_at (location, "passing %qT to argument %d of %qE, but" - " argument %d was a tuple of %qT", - get_vector_type (actual_type), argno + 1, fndecl, - first_argno + 1, get_vector_type (first_type.type)); + if (first_type.num_vectors > 1) + error_at (location, "passing %qT to argument %d of %qE, but" + " argument %d was a tuple of %qT", + get_vector_type (actual_type), argno + 1, fndecl, + first_argno + 1, get_vector_type (first_type.type)); + else + error_at (location, "passing %qT to argument %d of %qE, but" + " argument %d had type %qT", + get_vector_type (actual_type), argno + 1, fndecl, + first_argno + 1, get_vector_type (first_type)); return false; } @@ -1944,10 +2368,16 @@ require_derived_vector_type (unsigned int argno, size requirement, without having to refer to FIRST_TYPE. */ if (!size_ok_p && expected_bits == orig_expected_bits) { - error_at (location, "passing %qT to argument %d of %qE, which" - " expects a vector of %d-bit elements", - get_vector_type (actual_type), argno + 1, fndecl, - expected_bits); + if (expected_num_vectors == 1) + error_at (location, "passing %qT to argument %d of %qE, which" + " expects a vector of %d-bit elements", + get_vector_type (actual_type), argno + 1, fndecl, + expected_bits); + else + error_at (location, "passing %qT to argument %d of %qE, which" + " expects vectors of %d-bit elements", + get_vector_type (actual_type), argno + 1, fndecl, + expected_bits); return false; } @@ -1956,16 +2386,30 @@ require_derived_vector_type (unsigned int argno, translation work for other type classes. */ if (!tclass_ok_p && orig_expected_tclass == TYPE_signed) { - error_at (location, "passing %qT to argument %d of %qE, which" - " expects a vector of signed integers", - get_vector_type (actual_type), argno + 1, fndecl); + if (expected_num_vectors == 1) + error_at (location, "passing %qT to argument %d of %qE, which" + " expects a vector of signed integers", + get_vector_type (actual_type), argno + 1, fndecl); + else + /* Translation note: could also be written "expects a tuple of + signed integer vectors". */ + error_at (location, "passing %qT to argument %d of %qE, which" + " expects vectors of signed integers", + get_vector_type (actual_type), argno + 1, fndecl); return false; } if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned) { - error_at (location, "passing %qT to argument %d of %qE, which" - " expects a vector of unsigned integers", - get_vector_type (actual_type), argno + 1, fndecl); + if (expected_num_vectors == 1) + error_at (location, "passing %qT to argument %d of %qE, which" + " expects a vector of unsigned integers", + get_vector_type (actual_type), argno + 1, fndecl); + else + /* Translation note: could also be written "expects a tuple of + unsigned integer vectors". */ + error_at (location, "passing %qT to argument %d of %qE, which" + " expects vectors of unsigned integers", + get_vector_type (actual_type), argno + 1, fndecl); return false; } @@ -1976,9 +2420,7 @@ require_derived_vector_type (unsigned int argno, /* If the arguments have consistent type classes, but a link between the sizes has been broken, try to describe the error in those terms. */ - if (first_type.num_vectors == 1 - && tclass_ok_p - && orig_expected_bits == SAME_SIZE) + if (tclass_ok_p && orig_expected_bits == SAME_SIZE) { if (argno < first_argno) { @@ -1995,8 +2437,7 @@ require_derived_vector_type (unsigned int argno, /* Likewise in reverse: look for cases in which the sizes are consistent but a link between the type classes has been broken. */ - if (first_type.num_vectors == 1 - && size_ok_p + if (size_ok_p && orig_expected_tclass == SAME_TYPE_CLASS && first_type_suffix.integer_p && actual_type_suffix.integer_p) @@ -2055,10 +2496,29 @@ function_resolver::require_scalar_type (unsigned int argno, const char *expected) { if (!scalar_argument_p (argno)) + { + if (expected) + error_at (location, "passing %qT to argument %d of %qE, which" + " expects %qs", get_argument_type (argno), argno + 1, + fndecl, expected); + return false; + } + return true; +} + +/* Require argument ARGNO to be a nonscalar type, given that it has already + passed require_vector_or_scalar_type. Return true if it is, otherwise + report an error. This is used when two sets of instructions share the + same overloaded function and one accepts scalars while the other + doesn't. */ +bool +function_resolver::require_nonscalar_type (unsigned int argno) +{ + if (scalar_argument_p (argno)) { error_at (location, "passing %qT to argument %d of %qE, which" - " expects %qs", get_argument_type (argno), argno + 1, - fndecl, expected); + " does not accept scalars for this combination of arguments", + get_argument_type (argno), argno + 1, fndecl); return false; } return true; @@ -2493,7 +2953,7 @@ function_resolver::check_gp_argument (unsigned int nops, gcc_assert (!shape->has_merge_argument_p (*this, nops)); nargs = nops + 1; if (!check_num_arguments (nargs) - || !require_vector_type (i, VECTOR_TYPE_svbool_t)) + || !require_vector_type (i, gp_type_index ())) return false; i += 1; } @@ -2563,6 +3023,58 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno, return resolve_to (mode_suffix_id, inferred_type); } +/* Finish resolving a function whose final argument can be a tuple + or a vector, with the function having an implicit "_single" suffix + in the latter case. This "_single" form might only exist for certain + type suffixes. + + ARGNO is the index of the final argument. The inferred type suffix + was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE. + EXPECTED_TCLASS gives the expected type class for the final tuple + or vector. + + Return the function decl of the resolved function on success, + otherwise report a suitable error and return error_mark_node. */ +tree function_resolver:: +finish_opt_single_resolution (unsigned int argno, unsigned int first_argno, + sve_type first_type, + type_class_index expected_tclass) +{ + sve_type new_type = infer_sve_type (argno); + if (!new_type) + return error_mark_node; + + /* If the type is a tuple, require it to match the group suffix. */ + unsigned int num_vectors = vectors_per_tuple (); + if (num_vectors != 1 + && new_type.num_vectors != 1 + && new_type.num_vectors != num_vectors) + { + report_incorrect_num_vectors (argno, new_type, num_vectors); + return error_mark_node; + } + + auto expected_num_vectors = (new_type.num_vectors == 1 ? 1 : 0); + if (!require_derived_vector_type (argno, first_argno, first_type, + expected_tclass, SAME_SIZE, + expected_num_vectors)) + return error_mark_node; + + if (new_type.num_vectors == 1 && first_type.num_vectors > 1) + { + if (tree single_form = lookup_form (MODE_single, first_type)) + return single_form; + + if (resolve_to (mode_suffix_id, first_type) != error_mark_node) + error_at (location, "passing %qT to argument %d of %qE, but its" + " %qT form does not accept single vectors", + get_vector_type (new_type), argno + 1, fndecl, + get_vector_type (first_type)); + return error_mark_node; + } + return resolve_to (mode_suffix_id, first_type); +} + /* Resolve a (possibly predicated) unary function. If the function uses merge predication or if TREAT_AS_MERGE_P is true, there is an extra vector argument before the governing predicate that specifies the @@ -2747,7 +3259,7 @@ function_checker::require_immediate_either_or (unsigned int rel_argno, if (actual != value0 && actual != value1) { - report_neither_nor (location, fndecl, argno, actual, 90, 270); + report_neither_nor (location, fndecl, argno, actual, value0, value1); return false; } @@ -3117,7 +3629,7 @@ function_expander::function_expander (const function_instance &instance, insn_code function_expander::direct_optab_handler (optab op, unsigned int suffix_i) { - return ::direct_optab_handler (op, vector_mode (suffix_i)); + return ::direct_optab_handler (op, tuple_mode (suffix_i)); } /* Choose between signed and unsigned direct optabs SIGNED_OP and @@ -3552,7 +4064,8 @@ function_expander::use_pred_x_insn (insn_code icode) has_float_operand_p = true; } - if (has_float_operand_p) + if (has_float_operand_p + && insn_data[icode].n_operands > (int) nops + 2) { /* Add a flag that indicates whether unpredicated instructions are allowed. */ @@ -3685,7 +4198,8 @@ function_expander::use_contiguous_store_insn (insn_code icode) - CODE_FOR_SINT for signed integers - CODE_FOR_UINT for unsigned integers - - UNSPEC_FOR_FP for floating-point values + - UNSPEC_FOR_COND_FP for predicated floating-point + - UNSPEC_FOR_UNCOND_FP for unpredicated floating-point and where <code_optab> is like <optab>, but uses CODE_FOR_SINT instead of UNSPEC_FOR_FP for floating-point values. @@ -3695,13 +4209,24 @@ function_expander::use_contiguous_store_insn (insn_code icode) rtx function_expander::map_to_rtx_codes (rtx_code code_for_sint, rtx_code code_for_uint, - int unspec_for_fp, + int unspec_for_cond_fp, + int unspec_for_uncond_fp, unsigned int merge_argno) { - machine_mode mode = vector_mode (0); + machine_mode mode = tuple_mode (0); rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint); insn_code icode; + if (mode_suffix_id == MODE_single) + { + gcc_assert (pred == PRED_none); + if (type_suffix (0).integer_p) + icode = code_for_aarch64_sve_single (code, mode); + else + icode = code_for_aarch64_sve_single (unspec_for_uncond_fp, mode); + return use_exact_insn (icode); + } + /* Handle predicate logic operations, which always use _z predication. */ if (type_suffix (0).tclass == TYPE_bool) { @@ -3716,7 +4241,7 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint, if (type_suffix (0).integer_p) icode = maybe_code_for_aarch64_pred (code, mode); else - icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode); + icode = maybe_code_for_aarch64_pred (unspec_for_cond_fp, mode); if (icode != CODE_FOR_nothing) return use_pred_x_insn (icode); } @@ -3725,7 +4250,10 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint, Floating-point operations conventionally use the signed rtx code. */ if (pred == PRED_none || pred == PRED_x) { - icode = direct_optab_handler (code_to_optab (code), 0); + if (type_suffix (0).float_p && unspec_for_uncond_fp >= 0) + icode = maybe_code_for_aarch64_sve (unspec_for_uncond_fp, mode); + else + icode = direct_optab_handler (code_to_optab (code), 0); if (icode == CODE_FOR_nothing) icode = code_for_aarch64_sve (code, mode); return use_unpred_insn (icode); @@ -3735,7 +4263,7 @@ function_expander::map_to_rtx_codes (rtx_code code_for_sint, if (type_suffix (0).integer_p) icode = code_for_cond (code, mode); else - icode = code_for_cond (unspec_for_fp, mode); + icode = code_for_cond (unspec_for_cond_fp, mode); return use_cond_insn (icode, merge_argno); } @@ -3761,11 +4289,17 @@ rtx function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint, int unspec_for_fp, unsigned int merge_argno) { - machine_mode mode = vector_mode (0); + machine_mode mode = tuple_mode (0); int unspec = (!type_suffix (0).integer_p ? unspec_for_fp : type_suffix (0).unsigned_p ? unspec_for_uint : unspec_for_sint); + if (mode_suffix_id == MODE_single) + { + gcc_assert (pred == PRED_none); + return use_exact_insn (code_for_aarch64_sve_single (unspec, mode)); + } + if (pred == PRED_x) { insn_code icode = maybe_code_for_aarch64_pred (unspec, mode); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def index 297904f3e47bcf93f751029a52bac3fd6add15e0..23ef7889c513ed32512c8f3dfe7fe2124b71fa6d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.def +++ b/gcc/config/aarch64/aarch64-sve-builtins.def @@ -57,6 +57,7 @@ #endif DEF_SVE_MODE (n, none, none, none) +DEF_SVE_MODE (single, none, none, none) DEF_SVE_MODE (index, none, none, elements) DEF_SVE_MODE (offset, none, none, bytes) DEF_SVE_MODE (s32index, none, svint32_t, elements) @@ -108,6 +109,10 @@ DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode) DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode) DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode) DEF_SVE_TYPE_SUFFIX (c, svcount_t, count, 8, VNx16BImode) +DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode) +DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode) +DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode) +DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode) DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode) DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode) DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode) @@ -133,6 +138,16 @@ DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode) DEF_SVE_GROUP_SUFFIX (x2, 0, 2) DEF_SVE_GROUP_SUFFIX (x3, 0, 3) DEF_SVE_GROUP_SUFFIX (x4, 0, 4) +DEF_SVE_GROUP_SUFFIX (vg1x2, 1, 2) +DEF_SVE_GROUP_SUFFIX (vg1x4, 1, 4) +DEF_SVE_GROUP_SUFFIX (vg2, 2, 2) +DEF_SVE_GROUP_SUFFIX (vg2x1, 2, 1) +DEF_SVE_GROUP_SUFFIX (vg2x2, 2, 2) +DEF_SVE_GROUP_SUFFIX (vg2x4, 2, 4) +DEF_SVE_GROUP_SUFFIX (vg4, 4, 4) +DEF_SVE_GROUP_SUFFIX (vg4x1, 4, 1) +DEF_SVE_GROUP_SUFFIX (vg4x2, 4, 2) +DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4) #include "aarch64-sve-builtins-base.def" #include "aarch64-sve-builtins-sve2.def" diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 51774825c23792ad4f6d1a8b5f4bdcb00312dd23..e67c46581f3708951f659d5c90850577515af579 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -99,6 +99,8 @@ const unsigned int CP_READ_FFR = 1U << 5; const unsigned int CP_WRITE_FFR = 1U << 6; const unsigned int CP_READ_ZA = 1U << 7; const unsigned int CP_WRITE_ZA = 1U << 8; +const unsigned int CP_READ_ZT0 = 1U << 9; +const unsigned int CP_WRITE_ZT0 = 1U << 10; /* Enumerates the SVE predicate and (data) vector types, together called "vector types" for brevity. */ @@ -361,6 +363,9 @@ public: bool modifies_global_state_p () const; bool could_trap_p () const; + vector_type_index gp_type_index () const; + tree gp_type () const; + unsigned int vectors_per_tuple () const; tree memory_scalar_type () const; machine_mode memory_vector_mode () const; @@ -469,6 +474,8 @@ public: bool scalar_argument_p (unsigned int); void report_incorrect_num_vectors (unsigned int, sve_type, unsigned int); + void report_mismatched_num_vectors (unsigned int, sve_type, + unsigned int, sve_type); tree report_no_such_form (sve_type); tree lookup_form (mode_suffix_index, @@ -481,8 +488,11 @@ public: type_suffix_index = NUM_TYPE_SUFFIXES, group_suffix_index = GROUP_none); tree resolve_to (mode_suffix_index, sve_type); + tree resolve_conversion (mode_suffix_index, sve_type); + vector_type_index infer_predicate_type (unsigned int); type_suffix_index infer_integer_scalar_type (unsigned int); + type_suffix_index infer_64bit_scalar_integer_pair (unsigned int); type_suffix_index infer_pointer_type (unsigned int, bool = false); sve_type infer_sve_type (unsigned int); sve_type infer_vector_or_tuple_type (unsigned int, unsigned int); @@ -494,13 +504,16 @@ public: bool require_vector_or_scalar_type (unsigned int); + bool require_matching_predicate_type (vector_type_index, sve_type); bool require_vector_type (unsigned int, vector_type_index); bool require_matching_vector_type (unsigned int, unsigned int, sve_type); bool require_derived_vector_type (unsigned int, unsigned int, sve_type, type_class_index = SAME_TYPE_CLASS, - unsigned int = SAME_SIZE); + unsigned int = SAME_SIZE, + unsigned int = 1); bool require_scalar_type (unsigned int, const char *); + bool require_nonscalar_type (unsigned int); bool require_pointer_type (unsigned int); bool require_matching_integer_scalar_type (unsigned int, unsigned int, type_suffix_index); @@ -529,6 +542,8 @@ public: type_class_index = SAME_TYPE_CLASS, unsigned int = SAME_SIZE, type_suffix_index = NUM_TYPE_SUFFIXES); + tree finish_opt_single_resolution (unsigned int, unsigned int, sve_type, + type_class_index = SAME_TYPE_CLASS); tree resolve (); @@ -653,7 +668,7 @@ public: rtx use_contiguous_prefetch_insn (insn_code); rtx use_contiguous_store_insn (insn_code); - rtx map_to_rtx_codes (rtx_code, rtx_code, int, + rtx map_to_rtx_codes (rtx_code, rtx_code, int, int, unsigned int = DEFAULT_MERGE_ARGNO); rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO); @@ -784,13 +799,6 @@ extern tree acle_svprfop; bool vector_cst_all_same (tree, unsigned int); bool is_ptrue (tree, unsigned int); -/* Return the ACLE type svbool_t. */ -inline tree -get_svbool_t (void) -{ - return acle_vector_types[0][VECTOR_TYPE_svbool_t]; -} - /* Try to find a mode with the given mode_suffix_info fields. Return the mode on success or MODE_none on failure. */ inline mode_suffix_index @@ -864,6 +872,24 @@ function_instance::operator!= (const function_instance &other) const return !operator== (other); } +/* Return the index of the type that should be used as the governing + predicate of this function. */ +inline vector_type_index +function_instance::gp_type_index () const +{ + if (group_suffix ().vectors_per_tuple > 1) + return VECTOR_TYPE_svcount_t; + return VECTOR_TYPE_svbool_t; +} + +/* Return the type that should be used as the governing predicate of + this function. */ +inline tree +function_instance::gp_type () const +{ + return acle_vector_types[0][gp_type_index ()]; +} + /* If the function operates on tuples of vectors, return the number of vectors in the tuples, otherwise return 1. */ inline unsigned int @@ -997,6 +1023,10 @@ function_instance::tuple_mode (unsigned int i) const inline machine_mode function_instance::gp_mode (unsigned int i) const { + /* Multi-vector operations are predicated on an svcount_t, which has + mode VNx16BI. */ + if (group_suffix ().vectors_per_tuple > 1) + return VNx16BImode; return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require (); } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3729c67eb69754ff0b4194ec447beb6c87b35fb7..d911f657871fac9d673f139c0b91c4d9044f2a0a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1266,7 +1266,7 @@ ;; - LD4W ;; ------------------------------------------------------------------------- -;; Predicated LD1. +;; Predicated LD1 (single). (define_insn "maskload<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w") (unspec:SVE_ALL @@ -1277,6 +1277,17 @@ "ld1<Vesize>\t%0.<Vctype>, %2/z, %1" ) +;; Predicated LD1 (multi), with a count as predicate. +(define_insn "@aarch64_ld1<mode>" + [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "memory_operand" "m")] + UNSPEC_LD1_SVE_COUNT))] + "TARGET_SME2 && TARGET_STREAMING" + "ld1<Vesize>\t%0, %K2/z, %1" +) + ;; Unpredicated LD[234]. (define_expand "vec_load_lanes<mode><vsingle>" [(set (match_operand:SVE_STRUCT 0 "register_operand") @@ -1408,7 +1419,7 @@ ;; - LDNT1W ;; ------------------------------------------------------------------------- -;; Predicated contiguous non-temporal load. +;; Predicated contiguous non-temporal load (single). (define_insn "@aarch64_ldnt1<mode>" [(set (match_operand:SVE_FULL 0 "register_operand" "=w") (unspec:SVE_FULL @@ -1419,6 +1430,17 @@ "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1" ) +;; Predicated contiguous non-temporal load (multi). +(define_insn "@aarch64_ldnt1<mode>" + [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "memory_operand" "m")] + UNSPEC_LDNT1_SVE_COUNT))] + "TARGET_SVE" + "ldnt1<Vesize>\t%0, %K2/z, %1" +) + ;; ------------------------------------------------------------------------- ;; ---- Normal gather loads ;; ------------------------------------------------------------------------- @@ -2229,7 +2251,7 @@ ;; - ST4W ;; ------------------------------------------------------------------------- -;; Predicated ST1. +;; Predicated ST1 (single). (define_insn "maskstore<mode><vpred>" [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") (unspec:SVE_ALL @@ -2241,6 +2263,17 @@ "st1<Vesize>\t%1.<Vctype>, %2, %0" ) +(define_insn "@aarch64_st1<mode>" + [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_dup 0)] + UNSPEC_ST1_SVE_COUNT))] + "TARGET_SME2 && TARGET_STREAMING" + "st1<Vesize>\t%1, %K2, %0" +) + ;; Unpredicated ST[234]. This is always a full update, so the dependence ;; on the old value of the memory location (via (match_dup 0)) is redundant. ;; There doesn't seem to be any obvious benefit to treating the all-true @@ -2340,6 +2373,17 @@ "stnt1<Vesize>\t%1.<Vetype>, %2, %0" ) +(define_insn "@aarch64_stnt1<mode>" + [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_dup 0)] + UNSPEC_STNT1_SVE_COUNT))] + "TARGET_SME2 && TARGET_STREAMING" + "stnt1<Vesize>\t%1, %K2, %0" +) + ;; ------------------------------------------------------------------------- ;; ---- Normal scatter stores ;; ------------------------------------------------------------------------- @@ -7133,21 +7177,25 @@ ) ;; Four-element integer dot-product by selected lanes with accumulation. -(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" +(define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>" [(set (match_operand:SVE_FULL_SDI 0 "register_operand") (plus:SVE_FULL_SDI (unspec:SVE_FULL_SDI - [(match_operand:<VSI2QI> 1 "register_operand") - (unspec:<VSI2QI> - [(match_operand:<VSI2QI> 2 "register_operand") + [(match_operand:SVE_FULL_BHI 1 "register_operand") + (unspec:SVE_FULL_BHI + [(match_operand:SVE_FULL_BHI 2 "register_operand") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] DOTPROD) (match_operand:SVE_FULL_SDI 4 "register_operand")))] - "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ] - [ w , w , <sve_lane_con> , 0 ; * ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3] - [ ?&w , w , <sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3] + "TARGET_SVE + && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4 + || (TARGET_STREAMING_SME2 + && <SVE_FULL_SDI:elem_bits> == 32 + && <SVE_FULL_BHI:elem_bits> == 16))" + {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ] + [ w , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; * ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3] + [ ?&w , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3] } ) @@ -7166,13 +7214,13 @@ } ) -(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" +(define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>" [(set (match_operand:VNx4SI_ONLY 0 "register_operand") (plus:VNx4SI_ONLY (unspec:VNx4SI_ONLY - [(match_operand:<VSI2QI> 1 "register_operand") - (unspec:<VSI2QI> - [(match_operand:<VSI2QI> 2 "register_operand") + [(match_operand:VNx16QI_ONLY 1 "register_operand") + (unspec:VNx16QI_ONLY + [(match_operand:VNx16QI_ONLY 2 "register_operand") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] DOTPROD_I8MM) @@ -7758,6 +7806,8 @@ ;; - BFDOT (BF16) ;; - BFMLALB (BF16) ;; - BFMLALT (BF16) +;; - BFMLSLB (SME2) +;; - BFMLSLT (SME2) ;; - BFMMLA (BF16) ;; ------------------------------------------------------------------------- @@ -8239,11 +8289,18 @@ ;; - WHILEWR (SVE2) ;; ------------------------------------------------------------------------- +(define_constants [ + (SVE_WHILE_B 0) + (SVE_WHILE_B_X2 1) + (SVE_WHILE_C 2) +]) + ;; Set element I of the result if (cmp (plus operand1 J) operand2) is ;; true for all J in [0, I]. (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>" [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (unspec:PRED_ALL [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] SVE_WHILE)) (clobber (reg:CC_NZC CC_REGNUM))] @@ -8261,12 +8318,14 @@ (match_operand 4) (const_int SVE_KNOWN_PTRUE) (unspec:PRED_ALL - [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] SVE_WHILE)] UNSPEC_PTEST)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL [(match_dup 1) + (unspec:PRED_ALL [(const_int SVE_WHILE_B) + (match_dup 1) (match_dup 2)] SVE_WHILE))] "TARGET_SVE" @@ -8288,7 +8347,8 @@ (match_operand 4) (const_int SVE_KNOWN_PTRUE) (unspec:PRED_ALL - [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] SVE_WHILE)] UNSPEC_PTEST)) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 79e19699bc4559e2e5a43210611b12fb4529e8e8..29c41ca3c931c5249a70b552ed9aa86db54a5854 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -25,12 +25,24 @@ ;; ---- Non-temporal gather loads ;; ---- Non-temporal scatter stores ;; +;; == Predicate manipulation +;; ---- [PRED] Predicate-as-counter PTRUE +;; ---- [PRED] Predicate extraction +;; ---- [PRED] Predicate selection +;; ---- [PRED] Predicate count +;; +;; == Uniform unary arithmnetic +;; ---- [FP] Multi-register unary operations +;; ;; == Uniform binary arithmnetic +;; ---- [INT] Multi-register operations +;; ---- [INT] Clamp to minimum/maximum ;; ---- [INT] Multiplication ;; ---- [INT] Scaled high-part multiplication ;; ---- [INT] General binary arithmetic that maps to unspecs ;; ---- [INT] Saturating binary arithmetic ;; ---- [INT] Saturating left shifts +;; ---- [FP] Clamp to minimum/maximum ;; ;; == Uniform ternary arithmnetic ;; ---- [INT] General ternary arithmetic that maps to unspecs @@ -42,16 +54,20 @@ ;; ---- [INT] Sum of absolute differences ;; ;; == Extending arithmetic +;; ---- [INT] Multi-register widening conversions ;; ---- [INT] Wide binary arithmetic ;; ---- [INT] Long binary arithmetic ;; ---- [INT] Long left shifts ;; ---- [INT] Long binary arithmetic with accumulation +;; ---- [FP] Multi-register operations ;; ---- [FP] Long multiplication with accumulation ;; ;; == Narrowing arithnetic ;; ---- [INT] Narrowing unary arithmetic +;; ---- [INT] Multi-vector narrowing unary arithmetic ;; ---- [INT] Narrowing binary arithmetic ;; ---- [INT] Narrowing right shifts +;; ---- [INT] Multi-vector narrowing right shifts ;; ;; == Pairwise arithmetic ;; ---- [INT] Pairwise arithmetic @@ -66,14 +82,23 @@ ;; == Conversions ;; ---- [FP<-FP] Widening conversions ;; ---- [FP<-FP] Narrowing conversions +;; ---- [FP<-FP] Multi-vector narrowing conversions +;; ---- [FP<-INT] Multi-vector conversions +;; ---- [INT<-FP] Multi-vector conversions ;; ;; == Other arithmetic ;; ---- [INT] Reciprocal approximation ;; ---- [INT<-FP] Base-2 logarithm ;; ---- [INT] Polynomial multiplication ;; +;; == Comparisons and selects +;; ---- [INT,FP] Select based on predicates as counters +;; ---- [INT] While tests +;; ;; == Permutation +;; ---- [INT,FP] Reversal ;; ---- [INT,FP] General permutes +;; ---- [INT,FP] Multi-register permutes ;; ---- [INT] Optional bit-permute extensions ;; ;; == General @@ -192,10 +217,256 @@ } ) +;; ========================================================================= +;; == Predicate manipulation +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [PRED] Predicate-as-counter PTRUE +;; ------------------------------------------------------------------------- +;; - PTRUE (predicate-as-counter form) +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Uph") + (unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))] + "TARGET_STREAMING_SME2" + "ptrue\t%K0.<bits_etype>" +) + +;; ------------------------------------------------------------------------- +;; ---- [PRED] Predicate extraction +;; ------------------------------------------------------------------------- +;; Includes +;; - PEXT +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_pext<BHSD_BITS>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand" "Uph") + (match_operand:DI 2 "const_int_operand") + (const_int BHSD_BITS)] + UNSPEC_PEXT))] + "TARGET_STREAMING_SME2" + "pext\t%0.<bits_etype>, %K1[%2]" +) + +(define_insn "@aarch64_sve_pext<BHSD_BITS>x2" + [(set (match_operand:VNx32BI 0 "register_operand" "=Up2") + (unspec:VNx32BI + [(match_operand:VNx16BI 1 "register_operand" "Uph") + (match_operand:DI 2 "const_int_operand") + (const_int BHSD_BITS)] + UNSPEC_PEXTx2))] + "TARGET_STREAMING_SME2" + "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]" +) + +;; ------------------------------------------------------------------------- +;; ---- [PRED] Predicate selection +;; ------------------------------------------------------------------------- +;; Includes +;; - PSEL +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_psel<BHSD_BITS>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:VNx16BI 2 "register_operand" "Upa") + (match_operand:SI 3 "register_operand" "Ucj") + (const_int BHSD_BITS)] + UNSPEC_PSEL))] + "TARGET_STREAMING_SME2" + "psel\t%0, %1, %2.<bits_etype>[%w3, 0]" +) + +(define_insn "*aarch64_sve_psel<BHSD_BITS>_plus" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:VNx16BI 2 "register_operand" "Upa") + (plus:SI + (match_operand:SI 3 "register_operand" "Ucj") + (match_operand:SI 4 "const_int_operand")) + (const_int BHSD_BITS)] + UNSPEC_PSEL))] + "TARGET_STREAMING_SME2 + && UINTVAL (operands[4]) < 128 / <BHSD_BITS>" + "psel\t%0, %1, %2.<bits_etype>[%w3, %4]" +) + +;; ------------------------------------------------------------------------- +;; ---- [PRED] Predicate count +;; ------------------------------------------------------------------------- +;; Includes +;; - CNTP (predicate as counter) +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_cntp_c<BHSD_BITS>" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:DI 2 "const_int_operand") + (const_int BHSD_BITS)] + UNSPEC_CNTP_C))] + "TARGET_STREAMING_SME2" + "cntp\t%x0, %K1.<bits_etype>, vlx%2" +) + +;; ========================================================================= +;; == Uniform unary arithmnetic +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [FP] Multi-register unary operations +;; ------------------------------------------------------------------------- +;; Includes: +;; - FRINTA +;; - FRINTM +;; - FRINTN +;; - FRINTP +;; ------------------------------------------------------------------------- + +(define_insn "<frint_pattern><mode>2" + [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_SFx24 + [(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")] + SVE2_SFx24_UNARY))] + "TARGET_STREAMING_SME2" + "frint<frint_suffix>\t%0, %1" +) + ;; ========================================================================= ;; == Uniform binary arithmnetic ;; ========================================================================= +;; ------------------------------------------------------------------------- +;; ---- [INT] Multi-register operations +;; ------------------------------------------------------------------------- +;; Includes the multi-register forms of: +;; - ADD +;; - SMAX +;; - SMIN +;; - SQMULH +;; - SRSHL +;; - UMAX +;; - UMIN +;; - URSHL +;; ------------------------------------------------------------------------- + +(define_expand "<optab><mode>3" + [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>") + (SVE_INT_BINARY_MULTI:SVE_Ix24 + (match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))] + "TARGET_STREAMING_SME2" +) + +(define_insn "*<optab><mode>3" + [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>") + (SVE_INT_BINARY_MULTI:SVE_Ix24 + (match_operand:SVE_Ix24 1 "aligned_register_operand" "%0") + (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))] + "TARGET_STREAMING_SME2" + "<sve_int_op>\t%0, %0, %2" +) + +(define_insn "@aarch64_sve_single_<optab><mode>" + [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>") + (SVE_INT_BINARY_SINGLE:SVE_Ix24 + (match_operand:SVE_Ix24 1 "aligned_register_operand" "0") + (vec_duplicate:SVE_Ix24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))))] + "TARGET_STREAMING_SME2" + "<sve_int_op>\t%0, %0, %2.<Vetype>" +) + +(define_insn "@aarch64_sve_<sve_int_op><mode>" + [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_Ix24 + [(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0") + (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")] + SVE_INT_BINARY_MULTI))] + "TARGET_STREAMING_SME2" + "<sve_int_op>\t%0, %0, %2" +) + +(define_insn "@aarch64_sve_single_<sve_int_op><mode>" + [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_Ix24 + [(match_operand:SVE_Ix24 1 "aligned_register_operand" "0") + (vec_duplicate:SVE_Ix24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SVE_INT_BINARY_MULTI))] + "TARGET_STREAMING_SME2" + "<sve_int_op>\t%0, %0, %2.<Vetype>" +) + +;; ------------------------------------------------------------------------- +;; ---- [INT] Clamp to minimum/maximum +;; ------------------------------------------------------------------------- +;; - SCLAMP +;; - UCLAMP +;; ------------------------------------------------------------------------- + +;; The minimum is applied after the maximum, which matters if the maximum +;; bound is (unexpectedly) less than the minimum bound. +(define_insn "@aarch64_sve_<su>clamp<mode>" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (<max_opp>:SVE_FULL_I + (USMAX:SVE_FULL_I + (match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand")) + (match_operand:SVE_FULL_I 3 "register_operand")))] + "TARGET_STREAMING_SME" + {@ [cons: =0, 1, 2, 3; attrs: movprfx] + [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> + [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> + } +) + +(define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand 4) + (<max_opp>:SVE_FULL_I + (unspec:SVE_FULL_I + [(match_operand 5) + (USMAX:SVE_FULL_I + (match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand"))] + UNSPEC_PRED_X) + (match_operand:SVE_FULL_I 3 "register_operand"))] + UNSPEC_PRED_X))] + "TARGET_STREAMING_SME" + {@ [cons: =0, 1, 2, 3; attrs: movprfx] + [ w, %0, w, w; * ] # + [ ?&w, w, w, w; yes ] # + } + "&& true" + [(set (match_dup 0) + (<max_opp>:SVE_FULL_I + (USMAX:SVE_FULL_I + (match_dup 1) + (match_dup 2)) + (match_dup 3)))] +) + +(define_insn "@aarch64_sve_<su>clamp_single<mode>" + [(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw<vector_count>") + (<max_opp>:SVE_Ix24 + (USMAX:SVE_Ix24 + (match_operand:SVE_Ix24 1 "register_operand" "0") + (vec_duplicate:SVE_Ix24 + (match_operand:<VSINGLE> 2 "register_operand" "w"))) + (vec_duplicate:SVE_Ix24 + (match_operand:<VSINGLE> 3 "register_operand" "w"))))] + "TARGET_STREAMING_SME2" + "<su>clamp\t%0, %2.<Vetype>, %3.<Vetype>" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Multiplication ;; ------------------------------------------------------------------------- @@ -689,6 +960,74 @@ [(set_attr "movprfx" "yes")] ) +;; ------------------------------------------------------------------------- +;; ---- [FP] Clamp to minimum/maximum +;; ------------------------------------------------------------------------- +;; - FCLAMP +;; ------------------------------------------------------------------------- + +;; The minimum is applied after the maximum, which matters if the maximum +;; bound is (unexpectedly) less than the minimum bound. +(define_insn "@aarch64_sve_fclamp<mode>" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(unspec:SVE_FULL_F + [(match_operand:SVE_FULL_F 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand")] + UNSPEC_FMAXNM) + (match_operand:SVE_FULL_F 3 "register_operand")] + UNSPEC_FMINNM))] + "TARGET_STREAMING_SME" + {@ [cons: =0, 1, 2, 3; attrs: movprfx] + [ w, %0, w, w; * ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> + [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> + } +) + +(define_insn_and_split "*aarch64_sve_fclamp<mode>_x" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (unspec:SVE_FULL_F + [(match_operand 5) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand")] + UNSPEC_COND_FMAXNM) + (match_operand:SVE_FULL_F 3 "register_operand")] + UNSPEC_COND_FMINNM))] + "TARGET_STREAMING_SME" + {@ [cons: =0, 1, 2, 3; attrs: movprfx] + [ w, %0, w, w; * ] # + [ ?&w, w, w, w; yes ] # + } + "&& true" + [(set (match_dup 0) + (unspec:SVE_FULL_F + [(unspec:SVE_FULL_F + [(match_dup 1) + (match_dup 2)] + UNSPEC_FMAXNM) + (match_dup 3)] + UNSPEC_FMINNM))] +) + +(define_insn "@aarch64_sve_fclamp_single<mode>" + [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24 + [(unspec:SVE_Fx24 + [(match_operand:SVE_Fx24 1 "register_operand" "0") + (vec_duplicate:SVE_Fx24 + (match_operand:<VSINGLE> 2 "register_operand" "w"))] + UNSPEC_FMAXNM) + (vec_duplicate:SVE_Fx24 + (match_operand:<VSINGLE> 3 "register_operand" "w"))] + UNSPEC_FMINNM))] + "TARGET_STREAMING_SME2" + "fclamp\t%0, %2.<Vetype>, %3.<Vetype>" +) + ;; ========================================================================= ;; == Uniform ternary arithmnetic ;; ========================================================================= @@ -1256,6 +1595,30 @@ ;; == Extending arithmetic ;; ========================================================================= +;; ------------------------------------------------------------------------- +;; ---- [INT] Multi-register widening conversions +;; ------------------------------------------------------------------------- +;; Includes: +;; - SUNPK +;; - UUNPK +;; ------------------------------------------------------------------------- + +(define_insn "<optab><mode><v2xwide>2" + [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw2") + (ANY_EXTEND:<V2XWIDE> + (match_operand:SVE_FULL_BHSI 1 "register_operand" "w")))] + "TARGET_STREAMING_SME2" + "<su>unpk\t%0, %1.<Vetype>" +) + +(define_insn "<optab><mode><v2xwide>2" + [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw4") + (ANY_EXTEND:<V2XWIDE> + (match_operand:SVE_FULL_BHSIx2 1 "aligned_register_operand" "Uw2")))] + "TARGET_STREAMING_SME2" + "<su>unpk\t%0, %1" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Wide binary arithmetic ;; ------------------------------------------------------------------------- @@ -1357,6 +1720,7 @@ ;; Includes: ;; - SABALB ;; - SABALT +;; - SDOT (SME2 or SVE2p1) ;; - SMLALB ;; - SMLALT ;; - SMLSLB @@ -1369,6 +1733,7 @@ ;; - SQDMLSLT ;; - UABALB ;; - UABALT +;; - UDOT (SME2 or SVE2p1) ;; - UMLALB ;; - UMLALT ;; - UMLSLB @@ -1514,10 +1879,68 @@ [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] } ) + +;; Two-way dot-product. +(define_insn "@aarch64_sve_<sur>dotvnx4sivnx8hi" + [(set (match_operand:VNx4SI 0 "register_operand") + (plus:VNx4SI + (unspec:VNx4SI + [(match_operand:VNx8HI 1 "register_operand") + (match_operand:VNx8HI 2 "register_operand")] + DOTPROD) + (match_operand:VNx4SI 3 "register_operand")))] + "TARGET_STREAMING_SME2" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.h, %2.h + [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h + } +) + +;; ------------------------------------------------------------------------- +;; ---- [FP] Multi-register operations +;; ------------------------------------------------------------------------- +;; Includes the multi-register forms of: +;; - FMAX +;; - FMAXNM +;; - FMIN +;; - FMINNM +;; ------------------------------------------------------------------------- + +(define_expand "@aarch64_sve_<maxmin_uns_op><mode>" + [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24 + [(match_operand:SVE_Fx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")] + SVE_FP_BINARY_MULTI))] + "TARGET_STREAMING_SME2" +) + +(define_insn "*aarch64_sve_<maxmin_uns_op><mode>" + [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24 + [(match_operand:SVE_Fx24 1 "aligned_register_operand" "%0") + (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")] + SVE_FP_BINARY_MULTI))] + "TARGET_STREAMING_SME2" + "<maxmin_uns_op>\t%0, %0, %2" +) + +(define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>" + [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24 + [(match_operand:SVE_Fx24 1 "aligned_register_operand" "0") + (vec_duplicate:SVE_Fx24 + (match_operand:<VSINGLE> 2 "register_operand" "x"))] + SVE_FP_BINARY_MULTI))] + "TARGET_STREAMING_SME2" + "<maxmin_uns_op>\t%0, %0, %2.<Vetype>" +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Long multiplication with accumulation ;; ------------------------------------------------------------------------- ;; Includes: +;; - FDOT (SME2 or SVE2p1) ;; - FMLALB ;; - FMLALT ;; - FMLSLB @@ -1555,6 +1978,40 @@ } ) +;; Two-way dot-product. +(define_insn "aarch64_sve_fdotvnx4sfvnx8hf" + [(set (match_operand:VNx4SF 0 "register_operand") + (plus:VNx4SF + (unspec:VNx4SF + [(match_operand:VNx8HF 1 "register_operand") + (match_operand:VNx8HF 2 "register_operand")] + UNSPEC_FDOT) + (match_operand:VNx4SF 3 "register_operand")))] + "TARGET_STREAMING_SME2" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , w , w , 0 ; * ] fdot\t%0.s, %1.h, %2.h + [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h + } +) + +(define_insn "aarch64_fdot_prod_lanevnx4sfvnx8hf" + [(set (match_operand:VNx4SF 0 "register_operand") + (plus:VNx4SF + (unspec:VNx4SF + [(match_operand:VNx8HF 1 "register_operand") + (unspec:VNx8HF + [(match_operand:VNx8HF 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT)] + UNSPEC_FDOT) + (match_operand:VNx4SF 4 "register_operand")))] + "TARGET_STREAMING_SME2" + {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ] + [ w , w , y , 0 ; * ] fdot\t%0.s, %1.h, %2.h[%3] + [ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3] + } +) + ;; ========================================================================= ;; == Narrowing arithnetic ;; ========================================================================= @@ -1591,6 +2048,43 @@ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>" ) +;; ------------------------------------------------------------------------- +;; ---- [INT] Multi-vector narrowing unary arithmetic +;; ------------------------------------------------------------------------- +;; Includes: +;; - SQCVT +;; - SQCVTN +;; - UQCVT +;; - UQCVTN +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>" + [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") + (unspec:VNx16QI_ONLY + [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")] + SVE_QCVTxN))] + "TARGET_SME2 && TARGET_STREAMING" + "<optab>\t%0.b, %1" +) + +(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>" + [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w") + (unspec:VNx8HI_ONLY + [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")] + SVE_QCVTxN))] + "TARGET_SME2 && TARGET_STREAMING" + "<optab>\t%0.h, %1" +) + +(define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>" + [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w") + (unspec:VNx8HI_ONLY + [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")] + SVE_QCVTxN))] + "TARGET_SME2 && TARGET_STREAMING" + "<optab>\t%0.h, %1" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing binary arithmetic ;; ------------------------------------------------------------------------- @@ -1689,6 +2183,20 @@ "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3" ) +;; ------------------------------------------------------------------------- +;; ---- [INT] Multi-vector narrowing right shifts +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_<sve_int_op><mode>" + [(set (match_operand:<VNARROW> 0 "register_operand" "=w") + (unspec:<VNARROW> + [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>") + (match_operand:DI 2 "const_int_operand")] + SVE2_INT_SHIFT_IMM_NARROWxN))] + "TARGET_STREAMING_SME2" + "<sve_int_op>\t%0.<Ventype>, %1, #%2" +) + ;; ========================================================================= ;; == Pairwise arithmetic ;; ========================================================================= @@ -2162,6 +2670,57 @@ "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" ) +;; ------------------------------------------------------------------------- +;; ---- [FP<-FP] Multi-vector narrowing conversions +;; ------------------------------------------------------------------------- +;; Includes the multi-register forms of: +;; - BFCVT +;; - BFCVTN +;; - FCVT +;; - FCVTN +;; ------------------------------------------------------------------------- + +(define_insn "truncvnx8sf<mode>2" + [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w") + (float_truncate:SVE_FULL_HF + (match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")))] + "TARGET_STREAMING_SME2" + "<b>fcvt\t%0.h, %1" +) + +(define_insn "@aarch64_sve_cvtn<mode>" + [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w") + (unspec:SVE_FULL_HF + [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")] + UNSPEC_FCVTN))] + "TARGET_STREAMING_SME2" + "<b>fcvtn\t%0.h, %1" +) + +;; ------------------------------------------------------------------------- +;; ---- [FP<-INT] Multi-vector conversions +;; ------------------------------------------------------------------------- + +(define_insn "<optab><v_int_equiv><mode>2" + [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>") + (FLOATUORS:SVE_SFx24 + (match_operand:<V_INT_EQUIV> 1 "aligned_register_operand" "Uw<vector_count>")))] + "TARGET_STREAMING_SME2" + "<su_optab>cvtf\t%0, %1" +) + +;; ------------------------------------------------------------------------- +;; ---- [INT<-FP] Multi-vector conversions +;; ------------------------------------------------------------------------- + +(define_insn "<optab><mode><v_int_equiv>2" + [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>") + (FIXUORS:<V_INT_EQUIV> + (match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")))] + "TARGET_STREAMING_SME2" + "fcvtz<su>\t%0, %1" +) + ;; ========================================================================= ;; == Other arithmetic ;; ========================================================================= @@ -2357,10 +2916,108 @@ "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" ) +;; ========================================================================= +;; == Comparisons and selects +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- [INT,FP] Select based on predicates as counters +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_sel<mode>" + [(set (match_operand:SVE_FULLx24 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_FULLx24 + [(match_operand:<VPRED> 3 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")] + UNSPEC_SEL))] + "TARGET_STREAMING_SME2" + "sel\t%0, %K3, %1, %2" +) + +;; ------------------------------------------------------------------------- +;; ---- [INT] While tests +;; ------------------------------------------------------------------------- +;; Includes the x2 and count versions of: +;; - WHILEGE +;; - WHILEGT +;; - WHILEHI +;; - WHILEHS +;; - WHILELE +;; - WHILELO +;; - WHILELS +;; - WHILELT +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2" + [(set (match_operand:VNx32BI 0 "register_operand" "=Up2") + (unspec:VNx32BI + [(const_int SVE_WHILE_B_X2) + (match_operand:DI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:DI 2 "aarch64_reg_or_zero" "rZ") + (const_int BHSD_BITS)] + SVE_WHILE_ORDER)) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_STREAMING_SME2" + "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2" +) + +(define_insn "@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Uph") + (unspec:VNx16BI + [(const_int SVE_WHILE_C) + (match_operand:DI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:DI 2 "aarch64_reg_or_zero" "rZ") + (const_int BHSD_BITS) + (match_operand:DI 3 "const_int_operand")] + SVE_WHILE_ORDER)) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_STREAMING_SME2" + "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3" +) + ;; ========================================================================= ;; == Permutation ;; ========================================================================= +;; ------------------------------------------------------------------------- +;; ---- [INT,FP] Reversal +;; ------------------------------------------------------------------------- +;; Includes: +;; - REVD +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_pred_<optab><mode>" + [(set (match_operand:SVE_FULL 0 "register_operand") + (unspec:SVE_FULL + [(match_operand:VNx2BI 1 "register_operand") + (unspec:SVE_FULL + [(match_operand:SVE_FULL 2 "register_operand")] + UNSPEC_REVD_ONLY)] + UNSPEC_PRED_X))] + "TARGET_STREAMING_SME" + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] + [ w , Upl , 0 ; * ] revd\t%0.q, %1/m, %2.q + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q + } +) + +(define_insn "@cond_<optab><mode>" + [(set (match_operand:SVE_FULL 0 "register_operand") + (unspec:SVE_FULL + [(match_operand:VNx2BI 1 "register_operand") + (unspec:SVE_FULL + [(match_operand:SVE_FULL 2 "register_operand")] + UNSPEC_REVD_ONLY) + (match_operand:SVE_FULL 3 "register_operand")] + UNSPEC_SEL))] + "TARGET_STREAMING_SME" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , Upl , w , 0 ; * ] revd\t%0.q, %1/m, %2.q + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q + } +) + ;; ------------------------------------------------------------------------- ;; ---- [INT,FP] General permutes ;; ------------------------------------------------------------------------- @@ -2392,6 +3049,52 @@ "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" ) +;; ------------------------------------------------------------------------- +;; ---- [INT,FP] Multi-register permutes +;; ------------------------------------------------------------------------- +;; Includes: +;; - ZIP +;; - UZP +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_<optab><mode>" + [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2") + (unspec:SVE_FULLx2 + [(match_operand:<VSINGLE> 1 "register_operand" "w") + (match_operand:<VSINGLE> 2 "register_operand" "w")] + SVE2_x24_PERMUTE))] + "TARGET_STREAMING_SME2" + "<perm_insn>\t%0, %1.<Vetype>, %2.<Vetype>" +) + +(define_insn "@aarch64_sve_<optab><mode>" + [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2") + (unspec:SVE_FULLx2 + [(match_operand:<VSINGLE> 1 "register_operand" "w") + (match_operand:<VSINGLE> 2 "register_operand" "w")] + SVE2_x24_PERMUTEQ))] + "TARGET_STREAMING_SME2" + "<perm_insn>\t{%S0.q - %T0.q}, %1.q, %2.q" +) + +(define_insn "@aarch64_sve_<optab><mode>" + [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4") + (unspec:SVE_FULLx4 + [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")] + SVE2_x24_PERMUTE))] + "TARGET_STREAMING_SME2" + "<perm_insn>\t%0, %1" +) + +(define_insn "@aarch64_sve_<optab><mode>" + [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4") + (unspec:SVE_FULLx4 + [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")] + SVE2_x24_PERMUTEQ))] + "TARGET_STREAMING_SME2" + "<perm_insn>\t{%S0.q - %V0.q}, {%S1.q - %V1.q}" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Optional bit-permute extensions ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 8f34ca1463536cb037490eb47257081f6e9ed6b2..0ea5950ddca1df1375c20621b8d70658bde5d3c5 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -11689,6 +11689,7 @@ sizetochar (int size) '0': Print a normal operand, if it's a general register, then we assume DImode. 'k': Print NZCV for conditional compare instructions. + 'K': Print a predicate register as pn<N> rather than p<N> 'A': Output address constant representing the first argument of X, specifying a relocation offset if appropriate. @@ -11865,14 +11866,17 @@ aarch64_print_operand (FILE *f, rtx x, int code) case 'T': case 'U': case 'V': - if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + if (!REG_P (x) || (!FP_REGNUM_P (REGNO (x)) && !PR_REGNUM_P (REGNO (x)))) { - output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); + output_operand_lossage ("incompatible operand for '%%%c'", code); return; } - asm_fprintf (f, "%c%d", - aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v', - REGNO (x) - V0_REGNUM + (code - 'S')); + if (PR_REGNUM_P (REGNO (x))) + asm_fprintf (f, "p%d", REGNO (x) - P0_REGNUM + (code - 'S')); + else + asm_fprintf (f, "%c%d", + aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v', + REGNO (x) - V0_REGNUM + (code - 'S')); break; case 'R': @@ -12153,6 +12157,15 @@ aarch64_print_operand (FILE *f, rtx x, int code) } break; + case 'K': + if (!REG_P (x) || !PR_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + asm_fprintf (f, "pn%d", REGNO (x) - P0_REGNUM); + break; + case 'y': case 'z': { @@ -12355,6 +12368,9 @@ aarch64_label_mentioned_p (rtx x) enum reg_class aarch64_regno_regclass (unsigned regno) { + if (W8_W11_REGNUM_P (regno)) + return W8_W11_REGS; + if (W12_W15_REGNUM_P (regno)) return W12_W15_REGS; @@ -12722,6 +12738,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) unsigned int nregs, vec_flags; switch (regclass) { + case W8_W11_REGS: case W12_W15_REGS: case STUB_REGS: case TAILCALL_ADDR_REGS: diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 5599c98ee94c29c7cfc294e71000b120397649a1..bcdd13b015a284bce81930ac02f6ba21c19d66bd 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -324,7 +324,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; imply anything about the state of PSTATE.SM. */ #define TARGET_SME (AARCH64_ISA_SME) -/* Streaming-mode SME instructions. */ +/* Same with streaming mode enabled. */ #define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME) /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ @@ -336,6 +336,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; /* SME2 instructions, enabled through +sme2. */ #define TARGET_SME2 (AARCH64_ISA_SME2) +/* Same with streaming mode enabled. */ +#define TARGET_STREAMING_SME2 (TARGET_STREAMING && TARGET_SME2) + /* ARMv8.3-A features. */ #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) @@ -541,6 +544,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; {"b" # N, V0_REGNUM + (N)}, \ {"z" # N, V0_REGNUM + (N)} +#define P_ALIASES(N) {"pn" # N, P0_REGNUM + (N)} + /* Provide aliases for all of the ISA defined register name forms. These aliases are convenient for use in the clobber lists of inline asm statements. */ @@ -561,7 +566,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; V_ALIASES(16), V_ALIASES(17), V_ALIASES(18), V_ALIASES(19), \ V_ALIASES(20), V_ALIASES(21), V_ALIASES(22), V_ALIASES(23), \ V_ALIASES(24), V_ALIASES(25), V_ALIASES(26), V_ALIASES(27), \ - V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \ + V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31), \ + P_ALIASES(0), P_ALIASES(1), P_ALIASES(2), P_ALIASES(3), \ + P_ALIASES(4), P_ALIASES(5), P_ALIASES(6), P_ALIASES(7), \ + P_ALIASES(8), P_ALIASES(9), P_ALIASES(10), P_ALIASES(11), \ + P_ALIASES(12), P_ALIASES(13), P_ALIASES(14), P_ALIASES(15) \ } #define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO)) @@ -682,6 +691,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; && (REGNO) != R17_REGNUM \ && (REGNO) != R30_REGNUM) \ +#define W8_W11_REGNUM_P(REGNO) \ + IN_RANGE (REGNO, R8_REGNUM, R11_REGNUM) + #define W12_W15_REGNUM_P(REGNO) \ IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM) @@ -711,6 +723,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; enum reg_class { NO_REGS, + W8_W11_REGS, W12_W15_REGS, TAILCALL_ADDR_REGS, STUB_REGS, @@ -736,6 +749,7 @@ enum reg_class #define REG_CLASS_NAMES \ { \ "NO_REGS", \ + "W8_W11_REGS", \ "W12_W15_REGS", \ "TAILCALL_ADDR_REGS", \ "STUB_REGS", \ @@ -758,6 +772,7 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00000f00, 0x00000000, 0x00000000 }, /* W8_W11_REGS */ \ { 0x0000f000, 0x00000000, 0x00000000 }, /* W12_W15_REGS */ \ { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 9e9ccefbfed470c22eeab94acf58fca940eee4bb..b8e12fc1d4ba7a21c80d10f2163420001ae8271e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -290,9 +290,13 @@ UNSPEC_NZCV UNSPEC_XPACLRI UNSPEC_LD1_SVE + UNSPEC_LD1_SVE_COUNT UNSPEC_ST1_SVE + UNSPEC_ST1_SVE_COUNT UNSPEC_LDNT1_SVE + UNSPEC_LDNT1_SVE_COUNT UNSPEC_STNT1_SVE + UNSPEC_STNT1_SVE_COUNT UNSPEC_LD1RQ UNSPEC_LD1_GATHER UNSPEC_LDFF1_GATHER diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 055a87320caaf2ec47667906f7b1e9fc60b0b3d3..3ca7f23554fca9cfe11cce2370649e469dc3eb4c 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -39,7 +39,7 @@ (define_mode_attr cas_short_expected_pred [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")]) (define_mode_attr cas_short_expected_imm - [(QI "n") (HI "Uph")]) + [(QI "n") (HI "Uih")]) (define_insn_and_split "@aarch64_compare_and_swap<mode>" [(set (reg:CC CC_REGNUM) ;; bool out diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 78a62af1abff24bf5a2f437e05b6c62cda7eb468..8b65cab29fb801557fe13b90805d7a3cbe619690 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -21,6 +21,9 @@ (define_register_constraint "k" "STACK_REG" "@internal The stack register.") +(define_register_constraint "Uci" "W8_W11_REGS" + "@internal r8-r11, which can be used to index ZA.") + (define_register_constraint "Ucj" "W12_W15_REGS" "@internal r12-r15, which can be used to index ZA.") @@ -39,6 +42,20 @@ (define_register_constraint "w" "FP_REGS" "Floating point and SIMD vector registers.") +(define_register_constraint "x" "FP_LO_REGS" + "Floating point and SIMD vector registers V0 - V15.") + +(define_register_constraint "y" "FP_LO8_REGS" + "Floating point and SIMD vector registers V0 - V7.") + +(define_register_constraint "Uw2" "FP_REGS" + "Even floating point and SIMD vector registers." + "regno % 2 == 0") + +(define_register_constraint "Uw4" "FP_REGS" + "4-tuple-aligned floating point and SIMD vector registers." + "regno % 4 == 0") + (define_register_constraint "Upa" "PR_REGS" "SVE predicate registers p0 - p15.") @@ -49,11 +66,8 @@ (define_register_constraint "Upl" "PR_LO_REGS" "SVE predicate registers p0 - p7.") -(define_register_constraint "x" "FP_LO_REGS" - "Floating point and SIMD vector registers V0 - V15.") - -(define_register_constraint "y" "FP_LO8_REGS" - "Floating point and SIMD vector registers V0 - V7.") +(define_register_constraint "Uph" "PR_HI_REGS" + "SVE predicate registers p8 - p15.") (define_constraint "c" "@internal The condition code register." @@ -285,7 +299,7 @@ (and (match_code "const_int") (match_test "(unsigned) exact_log2 (ival) <= 4"))) -(define_constraint "Uph" +(define_constraint "Uih" "@internal A constraint that matches HImode integers zero extendable to SImode plus_operand." diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 1a14069485d2f51eaec2f87bc3846364bce0c2b8..f204850850c48b6ec304343d290eba09da211e1d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -423,8 +423,11 @@ ;; Iterators for single modes, for "@" patterns. (define_mode_iterator VNx16QI_ONLY [VNx16QI]) +(define_mode_iterator VNx16SI_ONLY [VNx16SI]) (define_mode_iterator VNx8HI_ONLY [VNx8HI]) (define_mode_iterator VNx8BF_ONLY [VNx8BF]) +(define_mode_iterator VNx8SI_ONLY [VNx8SI]) +(define_mode_iterator VNx8DI_ONLY [VNx8DI]) (define_mode_iterator VNx4SI_ONLY [VNx4SI]) (define_mode_iterator VNx4SF_ONLY [VNx4SF]) (define_mode_iterator VNx2DI_ONLY [VNx2DI]) @@ -448,6 +451,12 @@ ;; elements. (define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI]) +;; Pairs of the above. +(define_mode_iterator SVE_FULL_BHSIx2 [VNx32QI VNx16HI VNx8SI]) + +;; Fully-packed SVE vector modes that have 16-bit float elements. +(define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF]) + ;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements. (define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI VNx8BF VNx8HF VNx4SF VNx2DF]) @@ -473,6 +482,9 @@ ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements. (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI]) +;; 2x and 4x tuples of the above, excluding 2x DI. +(define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI]) + ;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit ;; elements. (define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF]) @@ -481,6 +493,10 @@ (define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM") (VNx2DF "TARGET_SVE_F64MM")]) +;; Fully-packed SVE vector modes that have 32-bit or smaller elements. +(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI + VNx8BF VNx8HF VNx4SF]) + ;; Fully-packed SVE vector modes that have 32-bit elements. (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF]) @@ -514,6 +530,8 @@ (define_mode_iterator SVE_FULLx4 [VNx64QI VNx32HI VNx16SI VNx8DI VNx32BF VNx32HF VNx16SF VNx8DF]) +(define_mode_iterator SVE_FULLx24 [SVE_FULLx2 SVE_FULLx4]) + ;; All SVE vector structure modes. (define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4]) @@ -531,6 +549,8 @@ VNx4SI VNx2SI VNx2DI]) +(define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI]) + ;; SVE modes with 2 or 4 elements. (define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2BF VNx2SI VNx2SF VNx2DI VNx2DF @@ -588,12 +608,47 @@ ;; Bfloat16 modes to which V4SF can be converted (define_mode_iterator V4SF_TO_BF [V4BF V8BF]) +(define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI + VNx16BF VNx16HF VNx8SF + VNx64QI VNx32HI VNx16SI + VNx32BF VNx32HF VNx16SF]) + +(define_mode_iterator SVE_Ix24 [VNx32QI VNx16HI VNx8SI VNx4DI + VNx64QI VNx32HI VNx16SI VNx8DI]) + +(define_mode_iterator SVE_Fx24 [VNx16HF VNx8SF VNx4DF + VNx32HF VNx16SF VNx8DF]) + +(define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF]) + ;; The modes used to represent different ZA access sizes. (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI]) (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")]) (define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")]) +(define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI]) + +(define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI + VNx8HI VNx16HI VNx32HI]) + +(define_mode_iterator SME_ZA_BHIx24 [VNx32QI VNx64QI VNx16HI VNx32HI]) + +(define_mode_iterator SME_ZA_HFx124 [VNx8BF VNx16BF VNx32BF + VNx8HF VNx16HF VNx32HF]) + +(define_mode_iterator SME_ZA_HFx24 [VNx16BF VNx32BF VNx16HF VNx32HF]) + +(define_mode_iterator SME_ZA_HIx124 [VNx8HI VNx16HI VNx32HI]) + +(define_mode_iterator SME_ZA_HIx24 [VNx16HI VNx32HI]) + +(define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64") + VNx16SI (VNx8DI "TARGET_SME_I16I64")]) + +(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64") + VNx16SF (VNx8DF "TARGET_SME_F64F64")]) + ;; The modes for which outer product instructions are supported. (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) (define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF @@ -731,6 +786,7 @@ UNSPEC_IORF ; Used in aarch64-sve.md. UNSPEC_XORF ; Used in aarch64-sve.md. UNSPEC_REVB ; Used in aarch64-sve.md. + UNSPEC_REVD ; Used in aarch64-sve2.md. UNSPEC_REVH ; Used in aarch64-sve.md. UNSPEC_REVW ; Used in aarch64-sve.md. UNSPEC_REVBHW ; Used in aarch64-sve.md. @@ -845,6 +901,7 @@ UNSPEC_CMLA180_CONJ ; Used in aarch64-sve2.md. UNSPEC_CMUL ; Used in aarch64-sve2.md. UNSPEC_CMUL_CONJ ; Used in aarch64-sve2.md. + UNSPEC_CNTP_C ; Used in aarch64-sve2.md. UNSPEC_COND_FCVTLT ; Used in aarch64-sve2.md. UNSPEC_COND_FCVTNT ; Used in aarch64-sve2.md. UNSPEC_COND_FCVTX ; Used in aarch64-sve2.md. @@ -865,10 +922,14 @@ UNSPEC_HISTSEG ; Used in aarch64-sve2.md. UNSPEC_MATCH ; Used in aarch64-sve2.md. UNSPEC_NMATCH ; Used in aarch64-sve2.md. + UNSPEC_PEXT ; Used in aarch64-sve2.md. + UNSPEC_PEXTx2 ; Used in aarch64-sve2.md. UNSPEC_PMULLB ; Used in aarch64-sve2.md. UNSPEC_PMULLB_PAIR ; Used in aarch64-sve2.md. UNSPEC_PMULLT ; Used in aarch64-sve2.md. UNSPEC_PMULLT_PAIR ; Used in aarch64-sve2.md. + UNSPEC_PSEL ; Used in aarch64-sve2.md. + UNSPEC_PTRUE_C ; Used in aarch64-sve2.md. UNSPEC_RADDHNB ; Used in aarch64-sve2.md. UNSPEC_RADDHNT ; Used in aarch64-sve2.md. UNSPEC_RSHRNB ; Used in aarch64-sve2.md. @@ -902,8 +963,12 @@ UNSPEC_SQRDCMLAH180 ; Used in aarch64-sve2.md. UNSPEC_SQRDCMLAH270 ; Used in aarch64-sve2.md. UNSPEC_SQRDCMLAH90 ; Used in aarch64-sve2.md. + UNSPEC_SQRSHR ; Used in aarch64-sve2.md. + UNSPEC_SQRSHRN ; Used in aarch64-sve2.md. UNSPEC_SQRSHRNB ; Used in aarch64-sve2.md. UNSPEC_SQRSHRNT ; Used in aarch64-sve2.md. + UNSPEC_SQRSHRU ; Used in aarch64-sve2.md. + UNSPEC_SQRSHRUN ; Used in aarch64-sve2.md. UNSPEC_SQRSHRUNB ; Used in aarch64-sve2.md. UNSPEC_SQRSHRUNT ; Used in aarch64-sve2.md. UNSPEC_SQSHRNB ; Used in aarch64-sve2.md. @@ -938,6 +1003,8 @@ UNSPEC_UMULHS ; Used in aarch64-sve2.md. UNSPEC_UMULLB ; Used in aarch64-sve2.md. UNSPEC_UMULLT ; Used in aarch64-sve2.md. + UNSPEC_UQRSHR ; Used in aarch64-sve2.md. + UNSPEC_UQRSHRN ; Used in aarch64-sve2.md. UNSPEC_UQRSHRNB ; Used in aarch64-sve2.md. UNSPEC_UQRSHRNT ; Used in aarch64-sve2.md. UNSPEC_UQSHRNB ; Used in aarch64-sve2.md. @@ -951,35 +1018,77 @@ UNSPEC_USUBWB ; Used in aarch64-sve2.md. UNSPEC_USUBWT ; Used in aarch64-sve2.md. UNSPEC_USDOT ; Used in aarch64-simd.md. + UNSPEC_UZP ; Used in aarch64-sve2.md. + UNSPEC_UZPQ ; Used in aarch64-sve2.md. + UNSPEC_ZIP ; Used in aarch64-sve2.md. + UNSPEC_ZIPQ ; Used in aarch64-sve2.md. UNSPEC_SUDOT ; Used in aarch64-simd.md. UNSPEC_BFDOT ; Used in aarch64-simd.md. UNSPEC_BFMLALB ; Used in aarch64-sve.md. UNSPEC_BFMLALT ; Used in aarch64-sve.md. + UNSPEC_BFMLSLB ; Used in aarch64-sve.md. + UNSPEC_BFMLSLT ; Used in aarch64-sve.md. UNSPEC_BFMMLA ; Used in aarch64-sve.md. UNSPEC_BFCVTN ; Used in aarch64-simd.md. UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. UNSPEC_BFCVT ; Used in aarch64-simd.md. UNSPEC_FCVTXN ; Used in aarch64-simd.md. + ;; All used in aarch64-sve2.md + UNSPEC_FCVTN + UNSPEC_FDOT + UNSPEC_SQCVT + UNSPEC_SQCVTN + UNSPEC_SQCVTU + UNSPEC_SQCVTUN + UNSPEC_UQCVT + UNSPEC_UQCVTN + ;; All used in aarch64-sme.md + UNSPEC_SME_ADD + UNSPEC_SME_ADD_WRITE UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA + UNSPEC_SME_BMOPA + UNSPEC_SME_BMOPS + UNSPEC_SME_FADD + UNSPEC_SME_FDOT + UNSPEC_SME_FVDOT + UNSPEC_SME_FMLA + UNSPEC_SME_FMLS UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS + UNSPEC_SME_FSUB UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER + UNSPEC_SME_READ UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER + UNSPEC_SME_SDOT + UNSPEC_SME_SVDOT + UNSPEC_SME_SMLA + UNSPEC_SME_SMLS UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER + UNSPEC_SME_SUB + UNSPEC_SME_SUB_WRITE + UNSPEC_SME_SUDOT + UNSPEC_SME_SUVDOT UNSPEC_SME_SUMOPA UNSPEC_SME_SUMOPS + UNSPEC_SME_UDOT + UNSPEC_SME_UVDOT + UNSPEC_SME_UMLA + UNSPEC_SME_UMLS UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS + UNSPEC_SME_USDOT + UNSPEC_SME_USVDOT UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS + UNSPEC_SME_WRITE UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER ]) @@ -1253,6 +1362,14 @@ (VNx2DI "d") (VNx2DF "d") (VNx1TI "q") + (VNx32QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx32HF "h") + (VNx16BF "h") (VNx32BF "h") + (VNx8SI "s") (VNx16SI "s") + (VNx8SF "s") (VNx16SF "s") + (VNx4DI "d") (VNx8DI "d") + (VNx4DF "d") (VNx8DF "d") (BF "h") (V4BF "h") (V8BF "h") (HF "h") (SF "s") (DF "d") @@ -1526,7 +1643,9 @@ ;; Narrowed modes of vector modes. (define_mode_attr VNARROW [(VNx8HI "VNx16QI") (VNx4SI "VNx8HI") (VNx4SF "VNx8HF") - (VNx2DI "VNx4SI") (VNx2DF "VNx4SF")]) + (VNx2DI "VNx4SI") (VNx2DF "VNx4SF") + (VNx8SI "VNx8HI") (VNx16SI "VNx16QI") + (VNx8DI "VNx8HI")]) ;; Register suffix narrowed modes for VQN. (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h") @@ -1554,7 +1673,25 @@ (V16QI "V16HI") (V8HI "V8SI") (V2SI "V2DI") (V4SI "V4DI") (V2DI "V2TI") (DI "TI") - (HI "SI") (SI "DI")]) + (HI "SI") (SI "DI") + (VNx16QI "VNx16HI") + (VNx8HI "VNx8SI") + (VNx4SI "VNx4DI") + (VNx32QI "VNx32HI") + (VNx16HI "VNx16SI") + (VNx8SI "VNx8DI")]) + +(define_mode_attr v2xwide [(V8QI "v8hi") (V4HI "v4si") + (V16QI "v16hi") (V8HI "v8si") + (V2SI "v2di") (V4SI "v4di") + (V2DI "v2ti") (DI "ti") + (HI "si") (SI "di") + (VNx16QI "vnx16hi") + (VNx8HI "vnx8si") + (VNx4SI "vnx4di") + (VNx32QI "vnx32hi") + (VNx16HI "vnx16si") + (VNx8SI "vnx8di")]) ;; Predicate mode associated with VWIDE. (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")]) @@ -1598,7 +1735,9 @@ ;; SVE vector after narrowing. (define_mode_attr Ventype [(VNx8HI "b") (VNx4SI "h") (VNx4SF "h") - (VNx2DI "s") (VNx2DF "s")]) + (VNx2DI "s") (VNx2DF "s") + (VNx8SI "h") (VNx16SI "b") + (VNx8DI "h")]) ;; SVE vector after widening. (define_mode_attr Vewtype [(VNx16QI "h") @@ -1694,6 +1833,7 @@ (VNx8BF "VNx8HI") (VNx4SI "VNx4SI") (VNx4SF "VNx4SI") (VNx2DI "VNx2DI") (VNx2DF "VNx2DI") + (VNx8SF "VNx8SI") (VNx16SF "VNx16SI") ]) ;; Lower case mode with floating-point values replaced by like-sized integers. @@ -1711,6 +1851,7 @@ (VNx8BF "vnx8hi") (VNx4SI "vnx4si") (VNx4SF "vnx4si") (VNx2DI "vnx2di") (VNx2DF "vnx2di") + (VNx8SF "vnx8si") (VNx16SF "vnx16si") ]) ;; Floating-point equivalent of selected modes. @@ -2044,7 +2185,11 @@ (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) ;; The type of a subvector in an SVE_STRUCT. -(define_mode_attr VSINGLE [(VNx32QI "VNx16QI") +(define_mode_attr VSINGLE [(VNx16QI "VNx16QI") + (VNx8BF "VNx8BF") + (VNx8HF "VNx8HF") + (VNx8HI "VNx8HI") + (VNx32QI "VNx16QI") (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") (VNx16BF "VNx8BF") (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") @@ -2061,7 +2206,8 @@ (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) ;; ...and again in lower case. -(define_mode_attr vsingle [(VNx32QI "vnx16qi") +(define_mode_attr vsingle [(VNx8HI "vnx8hi") + (VNx32QI "vnx16qi") (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") (VNx16BF "vnx8bf") (VNx8SI "vnx4si") (VNx8SF "vnx4sf") @@ -2144,6 +2290,47 @@ (V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]") ]) +(define_mode_attr za32_offset_range [(VNx16QI "0_to_12_step_4") + (VNx8BF "0_to_14_step_2") + (VNx8HF "0_to_14_step_2") + (VNx8HI "0_to_14_step_2") + (VNx32QI "0_to_4_step_4") + (VNx16BF "0_to_6_step_2") + (VNx16HF "0_to_6_step_2") + (VNx16HI "0_to_6_step_2") + (VNx64QI "0_to_4_step_4") + (VNx32BF "0_to_6_step_2") + (VNx32HF "0_to_6_step_2") + (VNx32HI "0_to_6_step_2")]) + +(define_mode_attr za64_offset_range [(VNx8HI "0_to_12_step_4") + (VNx16HI "0_to_4_step_4") + (VNx32HI "0_to_4_step_4")]) + +(define_mode_attr za32_long [(VNx16QI "ll") (VNx32QI "ll") (VNx64QI "ll") + (VNx8HI "l") (VNx16HI "l") (VNx32HI "l")]) + +(define_mode_attr za32_last_offset [(VNx16QI "3") (VNx32QI "3") (VNx64QI "3") + (VNx8HI "1") (VNx16HI "1") (VNx32HI "1")]) + +(define_mode_attr vg_modifier [(VNx16QI "") + (VNx32QI ", vgx2") + (VNx64QI ", vgx4") + (VNx8BF "") + (VNx16BF ", vgx2") + (VNx32BF ", vgx4") + (VNx8HF "") + (VNx16HF ", vgx2") + (VNx32HF ", vgx4") + (VNx8HI "") + (VNx16HI ", vgx2") + (VNx32HI ", vgx4")]) + +(define_mode_attr z_suffix [(VNx16QI ".b") (VNx32QI "") (VNx64QI "") + (VNx8BF ".h") (VNx16BF "") (VNx32BF "") + (VNx8HF ".h") (VNx16HF "") (VNx32HF "") + (VNx8HI ".h") (VNx16HI "") (VNx32HI "")]) + ;; The number of bytes controlled by a predicate (define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") (VNx4BI "4") (VNx2BI "8")]) @@ -2173,7 +2360,29 @@ (V8HI "vec") (V2SI "vec") (V4SI "vec") (V2DI "vec") (DI "offset")]) -(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")]) +(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "") + (VNx16BF "b") (VNx16HF "") + (VNx32BF "b") (VNx32HF "")]) + +(define_mode_attr aligned_operand [(VNx16QI "register_operand") + (VNx8HI "register_operand") + (VNx8BF "register_operand") + (VNx8HF "register_operand") + (VNx32QI "aligned_register_operand") + (VNx16HI "aligned_register_operand") + (VNx16BF "aligned_register_operand") + (VNx16HF "aligned_register_operand") + (VNx64QI "aligned_register_operand") + (VNx32HI "aligned_register_operand") + (VNx32BF "aligned_register_operand") + (VNx32HF "aligned_register_operand")]) + +(define_mode_attr aligned_fpr [(VNx16QI "w") (VNx8HI "w") + (VNx8BF "w") (VNx8HF "w") + (VNx32QI "Uw2") (VNx16HI "Uw2") + (VNx16BF "Uw2") (VNx16HF "Uw2") + (VNx64QI "Uw4") (VNx32HI "Uw4") + (VNx32BF "Uw4") (VNx32HF "Uw4")]) ;; ------------------------------------------------------------------- ;; Code Iterators @@ -2304,6 +2513,10 @@ ;; SVE integer binary operations that have an immediate form. (define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin]) +(define_code_iterator SVE_INT_BINARY_MULTI [smax smin umax umin]) + +(define_code_iterator SVE_INT_BINARY_SINGLE [plus smax smin umax umin]) + ;; SVE floating-point operations with an unpredicated all-register form. (define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult]) @@ -2759,18 +2972,30 @@ (UNSPEC_SRSHR "TARGET_SVE2") (UNSPEC_URSHR "TARGET_SVE2")]) +(define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH + UNSPEC_SRSHL UNSPEC_URSHL]) + (define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS]) (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL]) -(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT - UNSPEC_BFMLALB - UNSPEC_BFMLALT - (UNSPEC_BFMMLA "TARGET_NON_STREAMING")]) +(define_int_iterator SVE_FP_BINARY_MULTI [UNSPEC_FMAX UNSPEC_FMAXNM + UNSPEC_FMIN UNSPEC_FMINNM]) + +(define_int_iterator SVE_BFLOAT_TERNARY_LONG + [UNSPEC_BFDOT + UNSPEC_BFMLALB + UNSPEC_BFMLALT + (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME") + (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME") + (UNSPEC_BFMMLA "TARGET_NON_STREAMING")]) -(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT - UNSPEC_BFMLALB - UNSPEC_BFMLALT]) +(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE + [UNSPEC_BFDOT + UNSPEC_BFMLALB + UNSPEC_BFMLALT + (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME") + (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")]) (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV UNSPEC_IORV @@ -2914,6 +3139,11 @@ (define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR]) +(define_int_iterator SVE_WHILE_ORDER [UNSPEC_WHILEGE UNSPEC_WHILEGT + UNSPEC_WHILEHI UNSPEC_WHILEHS + UNSPEC_WHILELE UNSPEC_WHILELO + UNSPEC_WHILELS UNSPEC_WHILELT]) + (define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE UNSPEC_ASHIFTRT_WIDE UNSPEC_LSHIFTRT_WIDE]) @@ -3025,6 +3255,13 @@ UNSPEC_UQRSHRNT UNSPEC_UQSHRNT]) +(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR + UNSPEC_SQRSHRN + UNSPEC_SQRSHRU + UNSPEC_SQRSHRUN + UNSPEC_UQRSHR + UNSPEC_UQRSHRN]) + (define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI]) (define_int_iterator SVE2_INT_CADD [UNSPEC_CADD90 @@ -3168,6 +3405,16 @@ (define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR]) +(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN + UNSPEC_SQCVTU UNSPEC_SQCVTUN + UNSPEC_UQCVT UNSPEC_UQCVTN]) + +(define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM + UNSPEC_FRINTN UNSPEC_FRINTP]) + +(define_int_iterator SVE2_x24_PERMUTE [UNSPEC_ZIP UNSPEC_UZP]) +(define_int_iterator SVE2_x24_PERMUTEQ [UNSPEC_ZIPQ UNSPEC_UZPQ]) + (define_int_iterator FCADD [UNSPEC_FCADD90 UNSPEC_FCADD270]) @@ -3203,6 +3450,8 @@ (define_int_iterator FCMUL_OP [UNSPEC_FCMUL UNSPEC_FCMUL_CONJ]) +(define_int_iterator UNSPEC_REVD_ONLY [UNSPEC_REVD]) + (define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER]) (define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER]) (define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER]) @@ -3215,8 +3464,37 @@ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS]) +(define_int_iterator SME2_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS + UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS]) + (define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS]) +(define_int_iterator SME2_BMOP [UNSPEC_SME_BMOPA UNSPEC_SME_BMOPS]) + +(define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB]) + +(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB]) + +(define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE + UNSPEC_SME_SUB_WRITE]) + +(define_int_iterator SME_INT_DOTPROD [UNSPEC_SME_SDOT UNSPEC_SME_UDOT + UNSPEC_SME_USDOT]) + +(define_int_iterator SME_INT_DOTPROD_LANE [UNSPEC_SME_SDOT UNSPEC_SME_SVDOT + UNSPEC_SME_UDOT UNSPEC_SME_UVDOT + UNSPEC_SME_SUDOT UNSPEC_SME_SUVDOT + UNSPEC_SME_USDOT UNSPEC_SME_USVDOT]) + +(define_int_iterator SME_FP_DOTPROD [UNSPEC_SME_FDOT]) + +(define_int_iterator SME_FP_DOTPROD_LANE [UNSPEC_SME_FDOT UNSPEC_SME_FVDOT]) + +(define_int_iterator SME_INT_TERNARY_SLICE [UNSPEC_SME_SMLA UNSPEC_SME_SMLS + UNSPEC_SME_UMLA UNSPEC_SME_UMLS]) + +(define_int_iterator SME_FP_TERNARY_SLICE [UNSPEC_SME_FMLA UNSPEC_SME_FMLS]) + ;; Iterators for atomic operations. (define_int_iterator ATOMIC_LDOP @@ -3233,6 +3511,10 @@ (define_int_iterator SUBDI_BITS [8 16 32]) +(define_int_iterator BHSD_BITS [8 16 32 64]) + +(define_int_iterator LUTI_BITS [2 4]) + ;; ------------------------------------------------------------------- ;; Int Iterators Attributes. ;; ------------------------------------------------------------------- @@ -3254,6 +3536,7 @@ (UNSPEC_RSQRTS "frsqrts") (UNSPEC_RBIT "rbit") (UNSPEC_REVB "revb") + (UNSPEC_REVD "revd") (UNSPEC_REVH "revh") (UNSPEC_REVW "revw") (UNSPEC_UMAXV "umax") @@ -3291,28 +3574,60 @@ (UNSPEC_PMULLT "pmullt") (UNSPEC_PMULLT_PAIR "pmullt_pair") (UNSPEC_SMATMUL "smatmul") + (UNSPEC_UZP "uzp") + (UNSPEC_UZPQ "uzpq") + (UNSPEC_ZIP "zip") + (UNSPEC_ZIPQ "zipq") + (UNSPEC_SME_ADD "add") + (UNSPEC_SME_ADD_WRITE "add_write") (UNSPEC_SME_ADDHA "addha") (UNSPEC_SME_ADDVA "addva") + (UNSPEC_SME_BMOPA "bmopa") + (UNSPEC_SME_BMOPS "bmops") + (UNSPEC_SME_FADD "fadd") + (UNSPEC_SME_FDOT "fdot") + (UNSPEC_SME_FVDOT "fvdot") + (UNSPEC_SME_FMLA "fmla") + (UNSPEC_SME_FMLS "fmls") (UNSPEC_SME_FMOPA "fmopa") (UNSPEC_SME_FMOPS "fmops") + (UNSPEC_SME_FSUB "fsub") (UNSPEC_SME_LD1_HOR "ld1_hor") (UNSPEC_SME_LD1_VER "ld1_ver") (UNSPEC_SME_READ_HOR "read_hor") (UNSPEC_SME_READ_VER "read_ver") + (UNSPEC_SME_SDOT "sdot") + (UNSPEC_SME_SVDOT "svdot") + (UNSPEC_SME_SMLA "smla") + (UNSPEC_SME_SMLS "smls") (UNSPEC_SME_SMOPA "smopa") (UNSPEC_SME_SMOPS "smops") (UNSPEC_SME_ST1_HOR "st1_hor") (UNSPEC_SME_ST1_VER "st1_ver") + (UNSPEC_SME_SUB "sub") + (UNSPEC_SME_SUB_WRITE "sub_write") + (UNSPEC_SME_SUDOT "sudot") + (UNSPEC_SME_SUVDOT "suvdot") (UNSPEC_SME_SUMOPA "sumopa") (UNSPEC_SME_SUMOPS "sumops") + (UNSPEC_SME_UDOT "udot") + (UNSPEC_SME_UVDOT "uvdot") + (UNSPEC_SME_UMLA "umla") + (UNSPEC_SME_UMLS "umls") (UNSPEC_SME_UMOPA "umopa") (UNSPEC_SME_UMOPS "umops") + (UNSPEC_SME_USDOT "usdot") + (UNSPEC_SME_USVDOT "usvdot") (UNSPEC_SME_USMOPA "usmopa") (UNSPEC_SME_USMOPS "usmops") (UNSPEC_SME_WRITE_HOR "write_hor") (UNSPEC_SME_WRITE_VER "write_ver") (UNSPEC_SQCADD90 "sqcadd90") (UNSPEC_SQCADD270 "sqcadd270") + (UNSPEC_SQCVT "sqcvt") + (UNSPEC_SQCVTN "sqcvtn") + (UNSPEC_SQCVTU "sqcvtu") + (UNSPEC_SQCVTUN "sqcvtun") (UNSPEC_SQRDCMLAH "sqrdcmlah") (UNSPEC_SQRDCMLAH90 "sqrdcmlah90") (UNSPEC_SQRDCMLAH180 "sqrdcmlah180") @@ -3320,6 +3635,8 @@ (UNSPEC_TRN1Q "trn1q") (UNSPEC_TRN2Q "trn2q") (UNSPEC_UMATMUL "umatmul") + (UNSPEC_UQCVT "uqcvt") + (UNSPEC_UQCVTN "uqcvtn") (UNSPEC_USMATMUL "usmatmul") (UNSPEC_UZP1Q "uzp1q") (UNSPEC_UZP2Q "uzp2q") @@ -3549,7 +3866,9 @@ (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2") (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2") (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2") - (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")]) + (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2") + (UNSPEC_UZP "uzp") (UNSPEC_UZPQ "uzp") + (UNSPEC_ZIP "zip") (UNSPEC_ZIPQ "zip")]) ; op code for REV instructions (size within which elements are reversed). (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") @@ -3727,8 +4046,12 @@ (UNSPEC_SQRDMLSH "sqrdmlsh") (UNSPEC_SQRDMULH "sqrdmulh") (UNSPEC_SQRSHL "sqrshl") + (UNSPEC_SQRSHR "sqrshr") + (UNSPEC_SQRSHRN "sqrshrn") (UNSPEC_SQRSHRNB "sqrshrnb") (UNSPEC_SQRSHRNT "sqrshrnt") + (UNSPEC_SQRSHRU "sqrshru") + (UNSPEC_SQRSHRUN "sqrshrun") (UNSPEC_SQRSHRUNB "sqrshrunb") (UNSPEC_SQRSHRUNT "sqrshrunt") (UNSPEC_SQSHL "sqshl") @@ -3773,6 +4096,8 @@ (UNSPEC_UMULLB "umullb") (UNSPEC_UMULLT "umullt") (UNSPEC_UQRSHL "uqrshl") + (UNSPEC_UQRSHR "uqrshr") + (UNSPEC_UQRSHRN "uqrshrn") (UNSPEC_UQRSHRNB "uqrshrnb") (UNSPEC_UQRSHRNT "uqrshrnt") (UNSPEC_UQSHL "uqshl") @@ -3829,6 +4154,8 @@ (define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot") (UNSPEC_BFMLALB "bfmlalb") (UNSPEC_BFMLALT "bfmlalt") + (UNSPEC_BFMLSLB "bfmlslb") + (UNSPEC_BFMLSLT "bfmlslt") (UNSPEC_BFMMLA "bfmmla") (UNSPEC_FRECPE "frecpe") (UNSPEC_FRECPS "frecps") @@ -3889,6 +4216,9 @@ (UNSPEC_COND_FMULX "fmulx") (UNSPEC_COND_FSUB "fsubr")]) +(define_int_attr sme_int_op [(UNSPEC_SME_ADD_WRITE "add") + (UNSPEC_SME_SUB_WRITE "sub")]) + (define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270") (UNSPEC_CDOT "0") @@ -4065,6 +4395,15 @@ (UNSPEC_SME_WRITE_HOR "h") (UNSPEC_SME_WRITE_VER "v")]) +(define_int_attr has_16bit_form [(UNSPEC_SME_SDOT "true") + (UNSPEC_SME_SVDOT "true") + (UNSPEC_SME_UDOT "true") + (UNSPEC_SME_UVDOT "true") + (UNSPEC_SME_SUDOT "false") + (UNSPEC_SME_SUVDOT "false") + (UNSPEC_SME_USDOT "false") + (UNSPEC_SME_USVDOT "false")]) + ;; Iterators and attributes for fpcr fpsr getter setters (define_int_iterator GET_FPSCR @@ -4079,4 +4418,4 @@ (UNSPECV_GET_FPCR "fpcr") (UNSPECV_SET_FPCR "fpcr")]) -(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s")]) +(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 5f304898a8c49ffccfb74e3b03e8f683201536e0..c60a9e19c7014185e570d14325c5d9b4693cd3b0 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -42,6 +42,30 @@ (and (match_code "const_int") (match_test "op == CONST0_RTX (mode)"))) +(define_predicate "const_0_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "const_0_to_4_step_4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 4)") + (match_test "(INTVAL (op) & 3) == 0"))) + +(define_predicate "const_0_to_6_step_2_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 6)") + (match_test "(INTVAL (op) & 1) == 0"))) + +(define_predicate "const_0_to_12_step_4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 12)") + (match_test "(INTVAL (op) & 3) == 0"))) + +(define_predicate "const_0_to_14_step_2_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 14)") + (match_test "(INTVAL (op) & 1) == 0"))) + (define_predicate "const_1_to_3_operand" (match_code "const_int,const_vector") { @@ -564,8 +588,7 @@ ;; Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset) ;; Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize) (define_predicate "aarch64_simd_shift_imm_qi" - (and (match_code "const_int") - (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + (match_operand 0 "const_0_to_7_operand")) (define_predicate "aarch64_simd_shift_imm_hi" (and (match_code "const_int") diff --git a/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp new file mode 100644 index 0000000000000000000000000000000000000000..46c88367dd00db2d32fa3ef55fee735f962e6edd --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp @@ -0,0 +1,82 @@ +# Assembly-based regression-test driver for the SME2 ACLE. +# Copyright (C) 2009-2023 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } { + return +} + +# Load support procs. +load_lib g++-dg.exp + +# Initialize `dg'. +dg-init + +# Force SME2 if we're not testing it already. +if { [check_effective_target_aarch64_sme2] } { + set sme2_flags "" +} else { + set sme2_flags "-march=armv9-a+sme2" +} + +# Turn off any codegen tweaks by default that may affect expected assembly. +# Tests relying on those should turn them on explicitly. +set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none" + +global gcc_runtest_parallelize_limit_minor +if { [info exists gcc_runtest_parallelize_limit_minor] } { + set old_limit_minor $gcc_runtest_parallelize_limit_minor + set gcc_runtest_parallelize_limit_minor 1 +} + +torture-init +set-torture-options { + "-std=c++11 -O0 -g" + "-std=c++14 -O1 -g" + "-std=c++17 -Og -g" + "-std=c++23 -Os -g" + "-std=gnu++11 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" + "-std=gnu++23 -Ofast -g" +} { + "-DTEST_FULL" + "-DTEST_OVERLOADS" +} + +# Main loop. +set gcc_subdir [string replace $subdir 0 2 gcc] +set files [glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c] +set save-dg-do-what-default ${dg-do-what-default} +if { [check_effective_target_aarch64_asm_sme2_ok] } { + set dg-do-what-default assemble +} else { + set dg-do-what-default compile +} +gcc-dg-runtest [lsort $files] "" "$sme2_flags -fno-ipa-icf" +set dg-do-what-default ${save-dg-do-what-default} + +torture-finish + +if { [info exists gcc_runtest_parallelize_limit_minor] } { + set gcc_runtest_parallelize_limit_minor $old_limit_minor +} + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..8609b13b44d7286ce0b220b78f63959dc2956906 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s16.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_s16_tied1: +** sclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_s16_tied1, svint16_t, + z0 = svclamp_s16 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_s16_tied2: +** sclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_s16_tied2, svint16_t, + z0 = svclamp_s16 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_s16_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** sclamp z0\.h, z2\.h, \1\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_s16_tied3, svint16_t, + z0 = svclamp_s16 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_s16_untied: +** movprfx z0, z1 +** sclamp z0\.h, z2\.h, z3\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_s16_untied, svint16_t, + z0 = svclamp_s16 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..a8d4305933b48a6f77dcccfbcbd75597d99bb186 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s32.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_s32_tied1: +** sclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_s32_tied1, svint32_t, + z0 = svclamp_s32 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_s32_tied2: +** sclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_s32_tied2, svint32_t, + z0 = svclamp_s32 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_s32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** sclamp z0\.s, z2\.s, \1\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_s32_tied3, svint32_t, + z0 = svclamp_s32 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_s32_untied: +** movprfx z0, z1 +** sclamp z0\.s, z2\.s, z3\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_s32_untied, svint32_t, + z0 = svclamp_s32 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..364d185906150b4e7be37f2154679b86484cae6e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s64.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_s64_tied1: +** sclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_s64_tied1, svint64_t, + z0 = svclamp_s64 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_s64_tied2: +** sclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_s64_tied2, svint64_t, + z0 = svclamp_s64 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_s64_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** sclamp z0\.d, z2\.d, \1\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_s64_tied3, svint64_t, + z0 = svclamp_s64 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_s64_untied: +** movprfx z0, z1 +** sclamp z0\.d, z2\.d, z3\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_s64_untied, svint64_t, + z0 = svclamp_s64 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..cabb40bb181e588c104301664dbdfad8efb7232b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_s8.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_s8_tied1: +** sclamp z0\.b, z1\.b, z2\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_s8_tied1, svint8_t, + z0 = svclamp_s8 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_s8_tied2: +** sclamp z0\.b, z1\.b, z2\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_s8_tied2, svint8_t, + z0 = svclamp_s8 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_s8_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** sclamp z0\.b, z2\.b, \1\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_s8_tied3, svint8_t, + z0 = svclamp_s8 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_s8_untied: +** movprfx z0, z1 +** sclamp z0\.b, z2\.b, z3\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_s8_untied, svint8_t, + z0 = svclamp_s8 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..af8c7128e933c563ec9be102b074fd1c9fb98393 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u16.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_u16_tied1: +** uclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_u16_tied1, svuint16_t, + z0 = svclamp_u16 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_u16_tied2: +** uclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_u16_tied2, svuint16_t, + z0 = svclamp_u16 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_u16_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** uclamp z0\.h, z2\.h, \1\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_u16_tied3, svuint16_t, + z0 = svclamp_u16 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_u16_untied: +** movprfx z0, z1 +** uclamp z0\.h, z2\.h, z3\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_u16_untied, svuint16_t, + z0 = svclamp_u16 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..cca413a9c347340fa6a2ed503b8b59cda7a8cb2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u32.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_u32_tied1: +** uclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_u32_tied1, svuint32_t, + z0 = svclamp_u32 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_u32_tied2: +** uclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_u32_tied2, svuint32_t, + z0 = svclamp_u32 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_u32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** uclamp z0\.s, z2\.s, \1\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_u32_tied3, svuint32_t, + z0 = svclamp_u32 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_u32_untied: +** movprfx z0, z1 +** uclamp z0\.s, z2\.s, z3\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_u32_untied, svuint32_t, + z0 = svclamp_u32 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..93d3757952b31c28036d831241bd83ee5a1c9ee1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u64.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_u64_tied1: +** uclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_u64_tied1, svuint64_t, + z0 = svclamp_u64 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_u64_tied2: +** uclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_u64_tied2, svuint64_t, + z0 = svclamp_u64 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_u64_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** uclamp z0\.d, z2\.d, \1\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_u64_tied3, svuint64_t, + z0 = svclamp_u64 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_u64_untied: +** movprfx z0, z1 +** uclamp z0\.d, z2\.d, z3\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_u64_untied, svuint64_t, + z0 = svclamp_u64 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..092b33698ed444ba35ef8b21bc4f4bc918bc7e78 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/clamp_u8.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** clamp_u8_tied1: +** uclamp z0\.b, z1\.b, z2\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_u8_tied1, svuint8_t, + z0 = svclamp_u8 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_u8_tied2: +** uclamp z0\.b, z1\.b, z2\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_u8_tied2, svuint8_t, + z0 = svclamp_u8 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_u8_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** uclamp z0\.b, z2\.b, \1\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_u8_tied3, svuint8_t, + z0 = svclamp_u8 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_u8_untied: +** movprfx z0, z1 +** uclamp z0\.b, z2\.b, z3\.b +** ret +*/ +TEST_UNIFORM_Z (clamp_u8_untied, svuint8_t, + z0 = svclamp_u8 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_bf16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_bf16.c new file mode 100644 index 0000000000000000000000000000000000000000..6507c5a9c15477de473e077d3a33a866f899fba5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_bf16.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_bf16_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_m_tied12, svbfloat16_t, + z0 = svrevd_bf16_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_bf16_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_m_tied1, svbfloat16_t, + z0 = svrevd_bf16_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_bf16_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_m_tied2, svbfloat16_t, + z0 = svrevd_bf16_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_bf16_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_m_untied, svbfloat16_t, + z0 = svrevd_bf16_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_bf16_z_tied1, svbfloat16_t, + z0 = svrevd_bf16_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_bf16_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_z_untied, svbfloat16_t, + z0 = svrevd_bf16_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_bf16_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_x_tied1, svbfloat16_t, + z0 = svrevd_bf16_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_bf16_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_bf16_x_untied, svbfloat16_t, + z0 = svrevd_bf16_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f16.c new file mode 100644 index 0000000000000000000000000000000000000000..1a2f893d686139b0b396c576ebfc615c42cbf388 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f16.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_f16_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_m_tied12, svfloat16_t, + z0 = svrevd_f16_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_f16_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_m_tied1, svfloat16_t, + z0 = svrevd_f16_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_f16_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_m_tied2, svfloat16_t, + z0 = svrevd_f16_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_f16_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_m_untied, svfloat16_t, + z0 = svrevd_f16_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_f16_z_tied1, svfloat16_t, + z0 = svrevd_f16_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_f16_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_z_untied, svfloat16_t, + z0 = svrevd_f16_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_f16_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_x_tied1, svfloat16_t, + z0 = svrevd_f16_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_f16_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f16_x_untied, svfloat16_t, + z0 = svrevd_f16_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..81c77d52460c5734779deb9e1cfe6689cb259600 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f32.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_f32_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_m_tied12, svfloat32_t, + z0 = svrevd_f32_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_f32_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_m_tied1, svfloat32_t, + z0 = svrevd_f32_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_f32_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_m_tied2, svfloat32_t, + z0 = svrevd_f32_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_f32_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_m_untied, svfloat32_t, + z0 = svrevd_f32_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_f32_z_tied1, svfloat32_t, + z0 = svrevd_f32_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_f32_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_z_untied, svfloat32_t, + z0 = svrevd_f32_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_f32_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_x_tied1, svfloat32_t, + z0 = svrevd_f32_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_f32_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f32_x_untied, svfloat32_t, + z0 = svrevd_f32_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f64.c new file mode 100644 index 0000000000000000000000000000000000000000..fce6d6514c73a708f56a5f42d77daa9915631e4c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_f64.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_f64_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_m_tied12, svfloat64_t, + z0 = svrevd_f64_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_f64_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_m_tied1, svfloat64_t, + z0 = svrevd_f64_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_f64_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_m_tied2, svfloat64_t, + z0 = svrevd_f64_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_f64_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_m_untied, svfloat64_t, + z0 = svrevd_f64_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_f64_z_tied1, svfloat64_t, + z0 = svrevd_f64_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_f64_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_z_untied, svfloat64_t, + z0 = svrevd_f64_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_f64_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_x_tied1, svfloat64_t, + z0 = svrevd_f64_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_f64_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_f64_x_untied, svfloat64_t, + z0 = svrevd_f64_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..a2eba6a609fb23da1bdf7f78cfbcd7d4379bfac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s16.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_s16_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_m_tied12, svint16_t, + z0 = svrevd_s16_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_s16_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_m_tied1, svint16_t, + z0 = svrevd_s16_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_s16_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_m_tied2, svint16_t, + z0 = svrevd_s16_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_s16_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_m_untied, svint16_t, + z0 = svrevd_s16_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_s16_z_tied1, svint16_t, + z0 = svrevd_s16_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_s16_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_z_untied, svint16_t, + z0 = svrevd_s16_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_s16_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_x_tied1, svint16_t, + z0 = svrevd_s16_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_s16_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s16_x_untied, svint16_t, + z0 = svrevd_s16_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..cbc0dc0a0b669dbb0bfdf860fd22cb291dc89540 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s32.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_s32_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_m_tied12, svint32_t, + z0 = svrevd_s32_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_s32_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_m_tied1, svint32_t, + z0 = svrevd_s32_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_s32_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_m_tied2, svint32_t, + z0 = svrevd_s32_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_s32_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_m_untied, svint32_t, + z0 = svrevd_s32_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_s32_z_tied1, svint32_t, + z0 = svrevd_s32_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_s32_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_z_untied, svint32_t, + z0 = svrevd_s32_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_s32_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_x_tied1, svint32_t, + z0 = svrevd_s32_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_s32_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s32_x_untied, svint32_t, + z0 = svrevd_s32_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..aa963d388e005d1df80524343a9ef8895f84f017 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s64.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_s64_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_m_tied12, svint64_t, + z0 = svrevd_s64_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_s64_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_m_tied1, svint64_t, + z0 = svrevd_s64_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_s64_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_m_tied2, svint64_t, + z0 = svrevd_s64_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_s64_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_m_untied, svint64_t, + z0 = svrevd_s64_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_s64_z_tied1, svint64_t, + z0 = svrevd_s64_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_s64_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_z_untied, svint64_t, + z0 = svrevd_s64_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_s64_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_x_tied1, svint64_t, + z0 = svrevd_s64_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_s64_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s64_x_untied, svint64_t, + z0 = svrevd_s64_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..4291b7197c6491fd6ebe475e4ce83a20105f5622 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_s8.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_s8_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_m_tied12, svint8_t, + z0 = svrevd_s8_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_s8_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_m_tied1, svint8_t, + z0 = svrevd_s8_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_s8_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_m_tied2, svint8_t, + z0 = svrevd_s8_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_s8_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_m_untied, svint8_t, + z0 = svrevd_s8_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_s8_z_tied1, svint8_t, + z0 = svrevd_s8_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_s8_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_z_untied, svint8_t, + z0 = svrevd_s8_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_s8_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_x_tied1, svint8_t, + z0 = svrevd_s8_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_s8_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_s8_x_untied, svint8_t, + z0 = svrevd_s8_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..eaed0d13259e9e2641de9c99a704d22bd3a77f36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u16.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_u16_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_m_tied12, svuint16_t, + z0 = svrevd_u16_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_u16_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_m_tied1, svuint16_t, + z0 = svrevd_u16_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_u16_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_m_tied2, svuint16_t, + z0 = svrevd_u16_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_u16_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_m_untied, svuint16_t, + z0 = svrevd_u16_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_u16_z_tied1, svuint16_t, + z0 = svrevd_u16_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_u16_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_z_untied, svuint16_t, + z0 = svrevd_u16_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_u16_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_x_tied1, svuint16_t, + z0 = svrevd_u16_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_u16_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u16_x_untied, svuint16_t, + z0 = svrevd_u16_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..3b76c7000efb99924bf2a50d7d6d43d94342cf19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u32.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_u32_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_m_tied12, svuint32_t, + z0 = svrevd_u32_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_u32_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_m_tied1, svuint32_t, + z0 = svrevd_u32_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_u32_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_m_tied2, svuint32_t, + z0 = svrevd_u32_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_u32_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_m_untied, svuint32_t, + z0 = svrevd_u32_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_u32_z_tied1, svuint32_t, + z0 = svrevd_u32_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_u32_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_z_untied, svuint32_t, + z0 = svrevd_u32_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_u32_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_x_tied1, svuint32_t, + z0 = svrevd_u32_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_u32_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u32_x_untied, svuint32_t, + z0 = svrevd_u32_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..4589c4635e7b91536bd2c86f0b5af05a3d28049a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u64.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_u64_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_m_tied12, svuint64_t, + z0 = svrevd_u64_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_u64_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_m_tied1, svuint64_t, + z0 = svrevd_u64_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_u64_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_m_tied2, svuint64_t, + z0 = svrevd_u64_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_u64_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_m_untied, svuint64_t, + z0 = svrevd_u64_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_u64_z_tied1, svuint64_t, + z0 = svrevd_u64_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_u64_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_z_untied, svuint64_t, + z0 = svrevd_u64_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_u64_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_x_tied1, svuint64_t, + z0 = svrevd_u64_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_u64_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u64_x_untied, svuint64_t, + z0 = svrevd_u64_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..ac5d749818eeaeda173536b30e9262e3c84a62dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_u8.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme_acle.h" + +/* +** revd_u8_m_tied12: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_m_tied12, svuint8_t, + z0 = svrevd_u8_m (z0, p0, z0), + z0 = svrevd_m (z0, p0, z0)) + +/* +** revd_u8_m_tied1: +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_m_tied1, svuint8_t, + z0 = svrevd_u8_m (z0, p0, z1), + z0 = svrevd_m (z0, p0, z1)) + +/* +** revd_u8_m_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** revd z0\.q, p0/m, \1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_m_tied2, svuint8_t, + z0 = svrevd_u8_m (z1, p0, z0), + z0 = svrevd_m (z1, p0, z0)) + +/* +** revd_u8_m_untied: +** movprfx z0, z2 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_m_untied, svuint8_t, + z0 = svrevd_u8_m (z2, p0, z1), + z0 = svrevd_m (z2, p0, z1)) + +/* Awkward register allocation. Don't require specific output. */ +TEST_UNIFORM_Z (revd_u8_z_tied1, svuint8_t, + z0 = svrevd_u8_z (p0, z0), + z0 = svrevd_z (p0, z0)) + +/* +** revd_u8_z_untied: +** mov z0\.[bhsd], #0 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_z_untied, svuint8_t, + z0 = svrevd_u8_z (p0, z1), + z0 = svrevd_z (p0, z1)) + +/* +** revd_u8_x_tied1: +** revd z0\.q, p0/m, z0\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_x_tied1, svuint8_t, + z0 = svrevd_u8_x (p0, z0), + z0 = svrevd_x (p0, z0)) + +/* +** revd_u8_x_untied: +** movprfx z0, z1 +** revd z0\.q, p0/m, z1\.q +** ret +*/ +TEST_UNIFORM_Z (revd_u8_x_untied, svuint8_t, + z0 = svrevd_u8_x (p0, z1), + z0 = svrevd_x (p0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_1.c new file mode 100644 index 0000000000000000000000000000000000000000..fc9d70bc95e983aff34a2bad50be4cbc81c6212a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_1.c @@ -0,0 +1,38 @@ +// { dg-options "-O" } + +#include <arm_sme.h> + +#define TEST(TYPE) \ + TYPE \ + tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c); \ + } \ + \ + TYPE \ + tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c); \ + } + +TEST(svint8_t) +TEST(svint16_t) +TEST(svint32_t) +TEST(svint64_t) + +TEST(svuint8_t) +TEST(svuint16_t) +TEST(svuint32_t) +TEST(svuint64_t) + +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_2.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_2.c new file mode 100644 index 0000000000000000000000000000000000000000..67d3816bde24e09d43dfb00e861968762d96dd0c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_2.c @@ -0,0 +1,32 @@ +// { dg-options "-O" } + +#include <arm_sme.h> + +#define TEST(TYPE) \ + TYPE \ + untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d); \ + } + +TEST(svint8_t) +TEST(svint16_t) +TEST(svint32_t) +TEST(svint64_t) + +TEST(svuint8_t) +TEST(svuint16_t) +TEST(svuint32_t) +TEST(svuint64_t) + +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c new file mode 100644 index 0000000000000000000000000000000000000000..44959f7949092c43ae7a25e7cdfd346629907be1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c @@ -0,0 +1,26 @@ +// { dg-options "-O" } + +#include <arm_sme.h> + +#define TEST(TYPE) \ + TYPE \ + tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \ + { \ + return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c); \ + } \ + \ + TYPE \ + tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \ + { \ + return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c); \ + } + +TEST(svfloat16_t) +TEST(svfloat32_t) +TEST(svfloat64_t) + +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c new file mode 100644 index 0000000000000000000000000000000000000000..643b2635b90e83e2c8a35595cb1dce92bcedd006 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c @@ -0,0 +1,20 @@ +// { dg-options "-O" } + +#include <arm_sme.h> + +#define TEST(TYPE) \ + TYPE \ + untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming \ + { \ + return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, c), d); \ + } + +TEST(svfloat16_t) +TEST(svfloat32_t) +TEST(svfloat64_t) + +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp new file mode 100644 index 0000000000000000000000000000000000000000..5b8cfe40d2905cf195266d620af76aa06c26134d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp @@ -0,0 +1,81 @@ +# Assembly-based regression-test driver for the SME2 ACLE. +# Copyright (C) 2009-2023 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# Initialize `dg'. +dg-init + +# Force SME2 if we're not testing it already. +if { [check_effective_target_aarch64_sme2] } { + set sme2_flags "" +} else { + set sme2_flags "-march=armv9-a+sme2" +} + +# Turn off any codegen tweaks by default that may affect expected assembly. +# Tests relying on those should turn them on explicitly. +set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none" + +global gcc_runtest_parallelize_limit_minor +if { [info exists gcc_runtest_parallelize_limit_minor] } { + set old_limit_minor $gcc_runtest_parallelize_limit_minor + set gcc_runtest_parallelize_limit_minor 1 +} + +torture-init +set-torture-options { + "-std=c90 -O0 -g" + "-std=c99 -Og -g" + "-std=c11 -Os -g" + "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" + "-std=gnu90 -O3 -g" + "-std=gnu23 -Ofast -g" +} { + "-DTEST_FULL" + "-DTEST_OVERLOADS" +} + +# Main loop. +set files [glob -nocomplain $srcdir/$subdir/acle-asm/*.c] +set save-dg-do-what-default ${dg-do-what-default} +if { [check_effective_target_aarch64_asm_sme2_ok] } { + set dg-do-what-default assemble +} else { + set dg-do-what-default compile +} +gcc-dg-runtest [lsort $files] "" "$sme2_flags -fno-ipa-icf" +set dg-do-what-default ${save-dg-do-what-default} + +torture-finish + +if { [info exists gcc_runtest_parallelize_limit_minor] } { + set gcc_runtest_parallelize_limit_minor $old_limit_minor +} + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d193bfd6eef241d3e40ba06bba70e98ebc813272 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint16x2_t, svint16_t, z24, + svadd_single_s16_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** add {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint16x2_t, svint16_t, z24, + svadd_single_s16_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint16x2_t, svint16_t, z24, + svadd_single_s16_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint16x2_t, svint16_t, z1, + svadd_single_s16_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint16x2_t, svint16_t, z1, + svadd_single_s16_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint16x2_t, svint16_t, z18, + svadd_single_s16_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint16x2_t, svint16_t, + z0_res = svadd_single_s16_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint16x2_t, svint16_t, + z0 = svadd_single_s16_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint16x2_t, svint16_t, z24, + svadd_single_s16_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ae7fd4f413802d41a5534d2b3e6489541778ef63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s16_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint16x4_t, svint16_t, z24, + svadd_single_s16_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** add {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint16x4_t, svint16_t, z24, + svadd_single_s16_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint16x4_t, svint16_t, z24, + svadd_single_s16_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint16x4_t, svint16_t, z1, + svadd_single_s16_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint16x4_t, svint16_t, z1, + svadd_single_s16_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint16x4_t, svint16_t, z18, + svadd_single_s16_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint16x4_t, svint16_t, + z0_res = svadd_single_s16_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint16x4_t, svint16_t, + z0 = svadd_single_s16_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint16x4_t, svint16_t, z24, + svadd_single_s16_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..86fa39c4db68f490d330e25f0e1a2d81bf52db3e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint32x2_t, svint32_t, z24, + svadd_single_s32_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** add {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint32x2_t, svint32_t, z24, + svadd_single_s32_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint32x2_t, svint32_t, z24, + svadd_single_s32_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint32x2_t, svint32_t, z1, + svadd_single_s32_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint32x2_t, svint32_t, z1, + svadd_single_s32_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint32x2_t, svint32_t, z18, + svadd_single_s32_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint32x2_t, svint32_t, + z0_res = svadd_single_s32_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint32x2_t, svint32_t, + z0 = svadd_single_s32_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint32x2_t, svint32_t, z24, + svadd_single_s32_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..75eadebf66bbd517f363e511b3927516d78cc244 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s32_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint32x4_t, svint32_t, z24, + svadd_single_s32_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** add {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint32x4_t, svint32_t, z24, + svadd_single_s32_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint32x4_t, svint32_t, z24, + svadd_single_s32_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint32x4_t, svint32_t, z1, + svadd_single_s32_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint32x4_t, svint32_t, z1, + svadd_single_s32_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint32x4_t, svint32_t, z18, + svadd_single_s32_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint32x4_t, svint32_t, + z0_res = svadd_single_s32_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint32x4_t, svint32_t, + z0 = svadd_single_s32_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint32x4_t, svint32_t, z24, + svadd_single_s32_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9d51064ec2fc7fd0218d538e4b1d889df8feec65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint64x2_t, svint64_t, z24, + svadd_single_s64_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** add {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint64x2_t, svint64_t, z24, + svadd_single_s64_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint64x2_t, svint64_t, z24, + svadd_single_s64_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint64x2_t, svint64_t, z1, + svadd_single_s64_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint64x2_t, svint64_t, z1, + svadd_single_s64_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint64x2_t, svint64_t, z18, + svadd_single_s64_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint64x2_t, svint64_t, + z0_res = svadd_single_s64_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint64x2_t, svint64_t, + z0 = svadd_single_s64_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint64x2_t, svint64_t, z24, + svadd_single_s64_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ac5e959815c2fc154f65dec3ffbc96dcdbad6cb7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s64_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint64x4_t, svint64_t, z24, + svadd_single_s64_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** add {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint64x4_t, svint64_t, z24, + svadd_single_s64_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint64x4_t, svint64_t, z24, + svadd_single_s64_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint64x4_t, svint64_t, z1, + svadd_single_s64_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint64x4_t, svint64_t, z1, + svadd_single_s64_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint64x4_t, svint64_t, z18, + svadd_single_s64_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint64x4_t, svint64_t, + z0_res = svadd_single_s64_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint64x4_t, svint64_t, + z0 = svadd_single_s64_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint64x4_t, svint64_t, z24, + svadd_single_s64_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5ac04c07378230a4a959bdb3653a998a518f20cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint8x2_t, svint8_t, z24, + svadd_single_s8_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** add {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint8x2_t, svint8_t, z24, + svadd_single_s8_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint8x2_t, svint8_t, z24, + svadd_single_s8_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint8x2_t, svint8_t, z1, + svadd_single_s8_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint8x2_t, svint8_t, z1, + svadd_single_s8_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint8x2_t, svint8_t, z18, + svadd_single_s8_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint8x2_t, svint8_t, + z0_res = svadd_single_s8_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint8x2_t, svint8_t, + z0 = svadd_single_s8_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint8x2_t, svint8_t, z24, + svadd_single_s8_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..df91a6ed53d03640946a0cc223e14cc67c4c9516 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_s8_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svint8x4_t, svint8_t, z24, + svadd_single_s8_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** add {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svint8x4_t, svint8_t, z24, + svadd_single_s8_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svint8x4_t, svint8_t, z24, + svadd_single_s8_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svint8x4_t, svint8_t, z1, + svadd_single_s8_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svint8x4_t, svint8_t, z1, + svadd_single_s8_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svint8x4_t, svint8_t, z18, + svadd_single_s8_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svint8x4_t, svint8_t, + z0_res = svadd_single_s8_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svint8x4_t, svint8_t, + z0 = svadd_single_s8_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svint8x4_t, svint8_t, z24, + svadd_single_s8_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..06866f6be6ba4d42a0876374994f02ddbc34503d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint16x2_t, svuint16_t, z24, + svadd_single_u16_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** add {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint16x2_t, svuint16_t, z24, + svadd_single_u16_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint16x2_t, svuint16_t, z24, + svadd_single_u16_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint16x2_t, svuint16_t, z1, + svadd_single_u16_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint16x2_t, svuint16_t, z1, + svadd_single_u16_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint16x2_t, svuint16_t, z18, + svadd_single_u16_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint16x2_t, svuint16_t, + z0_res = svadd_single_u16_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint16x2_t, svuint16_t, + z0 = svadd_single_u16_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint16x2_t, svuint16_t, z24, + svadd_single_u16_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a00959fdc1062a12450c59f7756709c2ddc3e834 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u16_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint16x4_t, svuint16_t, z24, + svadd_single_u16_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** add {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint16x4_t, svuint16_t, z24, + svadd_single_u16_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint16x4_t, svuint16_t, z24, + svadd_single_u16_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint16x4_t, svuint16_t, z1, + svadd_single_u16_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint16x4_t, svuint16_t, z1, + svadd_single_u16_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint16x4_t, svuint16_t, z18, + svadd_single_u16_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint16x4_t, svuint16_t, + z0_res = svadd_single_u16_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint16x4_t, svuint16_t, + z0 = svadd_single_u16_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint16x4_t, svuint16_t, z24, + svadd_single_u16_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6672a6a09f3b5afae0e74d434e7cad5ea2117189 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint32x2_t, svuint32_t, z24, + svadd_single_u32_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** add {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint32x2_t, svuint32_t, z24, + svadd_single_u32_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint32x2_t, svuint32_t, z24, + svadd_single_u32_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint32x2_t, svuint32_t, z1, + svadd_single_u32_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint32x2_t, svuint32_t, z1, + svadd_single_u32_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint32x2_t, svuint32_t, z18, + svadd_single_u32_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint32x2_t, svuint32_t, + z0_res = svadd_single_u32_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint32x2_t, svuint32_t, + z0 = svadd_single_u32_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint32x2_t, svuint32_t, z24, + svadd_single_u32_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7e3a718e270b54bb69d4e67f52891b8498c195eb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u32_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint32x4_t, svuint32_t, z24, + svadd_single_u32_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** add {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint32x4_t, svuint32_t, z24, + svadd_single_u32_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint32x4_t, svuint32_t, z24, + svadd_single_u32_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint32x4_t, svuint32_t, z1, + svadd_single_u32_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint32x4_t, svuint32_t, z1, + svadd_single_u32_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint32x4_t, svuint32_t, z18, + svadd_single_u32_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint32x4_t, svuint32_t, + z0_res = svadd_single_u32_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint32x4_t, svuint32_t, + z0 = svadd_single_u32_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint32x4_t, svuint32_t, z24, + svadd_single_u32_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6800d14ebedd3cc1b2a3bd9c5cf6034bb64eae7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint64x2_t, svuint64_t, z24, + svadd_single_u64_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** add {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint64x2_t, svuint64_t, z24, + svadd_single_u64_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint64x2_t, svuint64_t, z24, + svadd_single_u64_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint64x2_t, svuint64_t, z1, + svadd_single_u64_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint64x2_t, svuint64_t, z1, + svadd_single_u64_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint64x2_t, svuint64_t, z18, + svadd_single_u64_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint64x2_t, svuint64_t, + z0_res = svadd_single_u64_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint64x2_t, svuint64_t, + z0 = svadd_single_u64_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint64x2_t, svuint64_t, z24, + svadd_single_u64_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..91ced4cc0ac4dcca5156e7bb3e2eea22c19ef1b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u64_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint64x4_t, svuint64_t, z24, + svadd_single_u64_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** add {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint64x4_t, svuint64_t, z24, + svadd_single_u64_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint64x4_t, svuint64_t, z24, + svadd_single_u64_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint64x4_t, svuint64_t, z1, + svadd_single_u64_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint64x4_t, svuint64_t, z1, + svadd_single_u64_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint64x4_t, svuint64_t, z18, + svadd_single_u64_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint64x4_t, svuint64_t, + z0_res = svadd_single_u64_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint64x4_t, svuint64_t, + z0 = svadd_single_u64_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint64x4_t, svuint64_t, z24, + svadd_single_u64_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d72600997504baf191ae5b54069932cbaef70e38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x2.c @@ -0,0 +1,115 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint8x2_t, svuint8_t, z24, + svadd_single_u8_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** add {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint8x2_t, svuint8_t, z24, + svadd_single_u8_x2 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint8x2_t, svuint8_t, z24, + svadd_single_u8_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint8x2_t, svuint8_t, z1, + svadd_single_u8_x2 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint8x2_t, svuint8_t, z1, + svadd_single_u8_x2 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** add {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint8x2_t, svuint8_t, z18, + svadd_single_u8_x2 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint8x2_t, svuint8_t, + z0_res = svadd_single_u8_x2 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint8x2_t, svuint8_t, + z0 = svadd_single_u8_x2 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint8x2_t, svuint8_t, z24, + svadd_single_u8_x2 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..11fa76606b0f2a85dec5e8e791a58a4c20b135f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_u8_x4.c @@ -0,0 +1,125 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_z24_z24_z0: +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z0, svuint8x4_t, svuint8_t, z24, + svadd_single_u8_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** add {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (add_z24_z28_z0, svuint8x4_t, svuint8_t, z24, + svadd_single_u8_x4 (z28, z0), + svadd (z28, z0)) + +/* +** add_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z1_z0, svuint8x4_t, svuint8_t, z24, + svadd_single_u8_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z1_z24_z0: +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z24_z0, svuint8x4_t, svuint8_t, z1, + svadd_single_u8_x4 (z24, z0), + svadd (z24, z0)) + +/* +** add_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z1_z1_z0, svuint8x4_t, svuint8_t, z1, + svadd_single_u8_x4 (z1, z0), + svadd (z1, z0)) + +/* +** add_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (add_z18_z18_z0, svuint8x4_t, svuint8_t, z18, + svadd_single_u8_x4 (z18, z0), + svadd (z18, z0)) + +/* +** add_awkward: +** ... +** add ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (add_awkward, svuint8x4_t, svuint8_t, + z0_res = svadd_single_u8_x4 (z1, z0), + z0_res = svadd (z1, z0)) + +/* +** add_z0_z0_z15: +** ... +** add {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (add_z0_z0_z15, svuint8x4_t, svuint8_t, + z0 = svadd_single_u8_x4 (z0, z15), + z0 = svadd (z0, z15)) + +/* +** add_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** add {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (add_z24_z24_z16, svuint8x4_t, svuint8_t, z24, + svadd_single_u8_x4 (z24, z16), + svadd (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..19db69d2e83bc346463af1937a4822a5daa41041 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svint32x2_t, + svadd_write_za32_s32_vg1x2 (0, z0, z0), + svadd_write_za32_vg1x2 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w0, z0, z0), + svadd_write_za32_vg1x2 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8, z0, z4), + svadd_write_za32_vg1x2 (w8, z0, z4)) + +/* +** add_write_w8_z4_z18: +** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z18, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8, z4, z18), + svadd_write_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z23_z0: +** ... +** add za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z23_z0, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8, z23, z0), + svadd_write_za32_vg1x2 (w8, z23, z0)) + +/* +** add_write_w8_z18_z23: +** ... +** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z23, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8, z18, z23), + svadd_write_za32_vg1x2 (w8, z18, z23)) + +/* +** add_write_w8_z4_z28: +** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z28, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8, z4, z28), + svadd_write_za32_vg1x2 (w8, z4, z28)) + +/* +** add_write_w8p7_z4_z0: +** add za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8 + 7, z4, z0), + svadd_write_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8 + 8, z4, z4), + svadd_write_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svint32x2_t, + svadd_write_za32_s32_vg1x2 (w8 - 1, z4, z0), + svadd_write_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (0, z1, z0), + svadd_write_za32_vg1x2 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w0, z1, z0), + svadd_write_za32_vg1x2 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w8, z1, z0), + svadd_write_za32_vg1x2 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w8 + 7, z1, z0), + svadd_write_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w8 + 8, z1, z0), + svadd_write_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w0 - 1, z1, z0), + svadd_write_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w8, z0, z15), + svadd_write_za32_vg1x2 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint32x2_t, svint32_t, + svadd_write_single_za32_s32_vg1x2 (w8, z20, z16), + svadd_write_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..40d0153e8817ffea1084e6a95d7fe617cdc4cc68 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_s32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svint32x4_t, + svadd_write_za32_s32_vg1x4 (0, z0, z0), + svadd_write_za32_vg1x4 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w0, z0, z0), + svadd_write_za32_vg1x4 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8, z0, z4), + svadd_write_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z0_z18: +** ... +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z18, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8, z0, z18), + svadd_write_za32_vg1x4 (w8, z0, z18)) + +/* +** add_write_w8_z18_z28: +** ... +** add za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z28, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8, z18, z28), + svadd_write_za32_vg1x4 (w8, z18, z28)) + +/* +** add_write_w8_z28_z23: +** ... +** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z28_z23, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8, z28, z23), + svadd_write_za32_vg1x4 (w8, z28, z23)) + +/* +** add_write_w8p7_z4_z0: +** add za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8 + 7, z4, z0), + svadd_write_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8 + 8, z4, z4), + svadd_write_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svint32x4_t, + svadd_write_za32_s32_vg1x4 (w8 - 1, z4, z0), + svadd_write_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (0, z1, z0), + svadd_write_za32_vg1x4 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w0, z1, z0), + svadd_write_za32_vg1x4 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w8, z1, z0), + svadd_write_za32_vg1x4 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w8 + 7, z1, z0), + svadd_write_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w8 + 8, z1, z0), + svadd_write_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w0 - 1, z1, z0), + svadd_write_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w8, z0, z15), + svadd_write_za32_vg1x4 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint32x4_t, svint32_t, + svadd_write_single_za32_s32_vg1x4 (w8, z20, z16), + svadd_write_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..65851f1076eaa1a4f6a24daba2e01ff2e81e9c65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (0, z0, z0), + svadd_write_za32_vg1x2 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w0, z0, z0), + svadd_write_za32_vg1x2 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8, z0, z4), + svadd_write_za32_vg1x2 (w8, z0, z4)) + +/* +** add_write_w8_z4_z18: +** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z18, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8, z4, z18), + svadd_write_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z23_z0: +** ... +** add za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z23_z0, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8, z23, z0), + svadd_write_za32_vg1x2 (w8, z23, z0)) + +/* +** add_write_w8_z18_z23: +** ... +** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z23, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8, z18, z23), + svadd_write_za32_vg1x2 (w8, z18, z23)) + +/* +** add_write_w8_z4_z28: +** add za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z28, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8, z4, z28), + svadd_write_za32_vg1x2 (w8, z4, z28)) + +/* +** add_write_w8p7_z4_z0: +** add za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8 + 7, z4, z0), + svadd_write_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8 + 8, z4, z4), + svadd_write_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svuint32x2_t, + svadd_write_za32_u32_vg1x2 (w8 - 1, z4, z0), + svadd_write_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (0, z1, z0), + svadd_write_za32_vg1x2 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w0, z1, z0), + svadd_write_za32_vg1x2 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w8, z1, z0), + svadd_write_za32_vg1x2 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w8 + 7, z1, z0), + svadd_write_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w8 + 8, z1, z0), + svadd_write_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w0 - 1, z1, z0), + svadd_write_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w8, z0, z15), + svadd_write_za32_vg1x2 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint32x2_t, svuint32_t, + svadd_write_single_za32_u32_vg1x2 (w8, z20, z16), + svadd_write_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..747d9c15e9f34a974b1a9cb4e6822f4eecdd41b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za32_u32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (0, z0, z0), + svadd_write_za32_vg1x4 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w0, z0, z0), + svadd_write_za32_vg1x4 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8, z0, z4), + svadd_write_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z0_z18: +** ... +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z18, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8, z0, z18), + svadd_write_za32_vg1x4 (w8, z0, z18)) + +/* +** add_write_w8_z18_z28: +** ... +** add za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z28, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8, z18, z28), + svadd_write_za32_vg1x4 (w8, z18, z28)) + +/* +** add_write_w8_z28_z23: +** ... +** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z28_z23, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8, z28, z23), + svadd_write_za32_vg1x4 (w8, z28, z23)) + +/* +** add_write_w8p7_z4_z0: +** add za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8 + 7, z4, z0), + svadd_write_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8 + 8, z4, z4), + svadd_write_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svuint32x4_t, + svadd_write_za32_u32_vg1x4 (w8 - 1, z4, z0), + svadd_write_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (0, z1, z0), + svadd_write_za32_vg1x4 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w0, z1, z0), + svadd_write_za32_vg1x4 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w8, z1, z0), + svadd_write_za32_vg1x4 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w8 + 7, z1, z0), + svadd_write_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w8 + 8, z1, z0), + svadd_write_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w0 - 1, z1, z0), + svadd_write_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w8, z0, z15), + svadd_write_za32_vg1x4 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint32x4_t, svuint32_t, + svadd_write_single_za32_u32_vg1x4 (w8, z20, z16), + svadd_write_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..05e6d300f1972978392b774eb4cc7b7c04e03f65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svint64x2_t, + svadd_write_za64_s64_vg1x2 (0, z0, z0), + svadd_write_za64_vg1x2 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w0, z0, z0), + svadd_write_za64_vg1x2 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8, z0, z4), + svadd_write_za64_vg1x2 (w8, z0, z4)) + +/* +** add_write_w8_z4_z18: +** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z18, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8, z4, z18), + svadd_write_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z23_z0: +** ... +** add za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z23_z0, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8, z23, z0), + svadd_write_za64_vg1x2 (w8, z23, z0)) + +/* +** add_write_w8_z18_z23: +** ... +** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z23, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8, z18, z23), + svadd_write_za64_vg1x2 (w8, z18, z23)) + +/* +** add_write_w8_z4_z28: +** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z28, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8, z4, z28), + svadd_write_za64_vg1x2 (w8, z4, z28)) + +/* +** add_write_w8p7_z4_z0: +** add za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8 + 7, z4, z0), + svadd_write_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8 + 8, z4, z4), + svadd_write_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svint64x2_t, + svadd_write_za64_s64_vg1x2 (w8 - 1, z4, z0), + svadd_write_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (0, z1, z0), + svadd_write_za64_vg1x2 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w0, z1, z0), + svadd_write_za64_vg1x2 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w8, z1, z0), + svadd_write_za64_vg1x2 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w8 + 7, z1, z0), + svadd_write_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w8 + 8, z1, z0), + svadd_write_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w0 - 1, z1, z0), + svadd_write_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w8, z0, z15), + svadd_write_za64_vg1x2 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint64x2_t, svint64_t, + svadd_write_single_za64_s64_vg1x2 (w8, z20, z16), + svadd_write_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1c694901902e7fbbd52d002eec5009e592e78ad0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_s64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svint64x4_t, + svadd_write_za64_s64_vg1x4 (0, z0, z0), + svadd_write_za64_vg1x4 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w0, z0, z0), + svadd_write_za64_vg1x4 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8, z0, z4), + svadd_write_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z0_z18: +** ... +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z18, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8, z0, z18), + svadd_write_za64_vg1x4 (w8, z0, z18)) + +/* +** add_write_w8_z18_z28: +** ... +** add za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z28, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8, z18, z28), + svadd_write_za64_vg1x4 (w8, z18, z28)) + +/* +** add_write_w8_z28_z23: +** ... +** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z28_z23, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8, z28, z23), + svadd_write_za64_vg1x4 (w8, z28, z23)) + +/* +** add_write_w8p7_z4_z0: +** add za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8 + 7, z4, z0), + svadd_write_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8 + 8, z4, z4), + svadd_write_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svint64x4_t, + svadd_write_za64_s64_vg1x4 (w8 - 1, z4, z0), + svadd_write_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (0, z1, z0), + svadd_write_za64_vg1x4 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w0, z1, z0), + svadd_write_za64_vg1x4 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w8, z1, z0), + svadd_write_za64_vg1x4 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w8 + 7, z1, z0), + svadd_write_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w8 + 8, z1, z0), + svadd_write_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w0 - 1, z1, z0), + svadd_write_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w8, z0, z15), + svadd_write_za64_vg1x4 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svint64x4_t, svint64_t, + svadd_write_single_za64_s64_vg1x4 (w8, z20, z16), + svadd_write_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1c314017981870f1b4bfeda57b36d7a082dec0f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (0, z0, z0), + svadd_write_za64_vg1x2 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w0, z0, z0), + svadd_write_za64_vg1x2 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8, z0, z4), + svadd_write_za64_vg1x2 (w8, z0, z4)) + +/* +** add_write_w8_z4_z18: +** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z18, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8, z4, z18), + svadd_write_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z23_z0: +** ... +** add za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z23_z0, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8, z23, z0), + svadd_write_za64_vg1x2 (w8, z23, z0)) + +/* +** add_write_w8_z18_z23: +** ... +** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z23, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8, z18, z23), + svadd_write_za64_vg1x2 (w8, z18, z23)) + +/* +** add_write_w8_z4_z28: +** add za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z4_z28, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8, z4, z28), + svadd_write_za64_vg1x2 (w8, z4, z28)) + +/* +** add_write_w8p7_z4_z0: +** add za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8 + 7, z4, z0), + svadd_write_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8 + 8, z4, z4), + svadd_write_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svuint64x2_t, + svadd_write_za64_u64_vg1x2 (w8 - 1, z4, z0), + svadd_write_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (0, z1, z0), + svadd_write_za64_vg1x2 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w0, z1, z0), + svadd_write_za64_vg1x2 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w8, z1, z0), + svadd_write_za64_vg1x2 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w8 + 7, z1, z0), + svadd_write_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w8 + 8, z1, z0), + svadd_write_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w0 - 1, z1, z0), + svadd_write_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w8, z0, z15), + svadd_write_za64_vg1x2 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint64x2_t, svuint64_t, + svadd_write_single_za64_u64_vg1x2 (w8, z20, z16), + svadd_write_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8574e6c82fba53d978b263408f10da1c824aa5f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_write_za64_u64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_0_z0_z0, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (0, z0, z0), + svadd_write_za64_vg1x4 (0, z0, z0)) + +/* +** add_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w0_z0_z0, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w0, z0, z0), + svadd_write_za64_vg1x4 (w0, z0, z0)) + +/* +** add_write_w8_z0_z4: +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z4, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8, z0, z4), + svadd_write_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_write_w8_z0_z18: +** ... +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z0_z18, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8, z0, z18), + svadd_write_za64_vg1x4 (w8, z0, z18)) + +/* +** add_write_w8_z18_z28: +** ... +** add za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8_z18_z28, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8, z18, z28), + svadd_write_za64_vg1x4 (w8, z18, z28)) + +/* +** add_write_w8_z28_z23: +** ... +** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (add_write_w8_z28_z23, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8, z28, z23), + svadd_write_za64_vg1x4 (w8, z28, z23)) + +/* +** add_write_w8p7_z4_z0: +** add za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p7_z4_z0, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8 + 7, z4, z0), + svadd_write_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** add_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8p8_z4_z4, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8 + 8, z4, z4), + svadd_write_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** add_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_write_w8m1_z4_z0, svuint64x4_t, + svadd_write_za64_u64_vg1x4 (w8 - 1, z4, z0), + svadd_write_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** add_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_0_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (0, z1, z0), + svadd_write_za64_vg1x4 (0, z1, z0)) + +/* +** add_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w0, z1, z0), + svadd_write_za64_vg1x4 (w0, z1, z0)) + +/* +** add_write_single_w8_z1_z0: +** add za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w8, z1, z0), + svadd_write_za64_vg1x4 (w8, z1, z0)) + +/* +** add_write_single_w8p7_z1_z0: +** add za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p7_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w8 + 7, z1, z0), + svadd_write_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** add_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8p8_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w8 + 8, z1, z0), + svadd_write_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** add_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** add za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w0m1_z1_z0, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w0 - 1, z1, z0), + svadd_write_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** add_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (add_write_single_w8_z0_z15, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w8, z0, z15), + svadd_write_za64_vg1x4 (w8, z0, z15)) + +/* +** add_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** add za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (add_write_single_w8_z20_z16, svuint64x4_t, svuint64_t, + svadd_write_single_za64_u64_vg1x4 (w8, z20, z16), + svadd_write_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..25e7217a6a8d96403d92e0cabcdb629c30c18646 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (0, z0), + svadd_za32_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w0, z0), + svadd_za32_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w7, z0), + svadd_za32_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8, z0), + svadd_za32_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w11, z0), + svadd_za32_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w12, z0), + svadd_za32_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8 + 7, z0), + svadd_za32_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8 + 8, z0), + svadd_za32_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8 - 1, z0), + svadd_za32_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** fadd za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8, z18), + svadd_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8, z23), + svadd_za32_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat32x2_t, + svadd_za32_f32_vg1x2 (w8, z28), + svadd_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3fb9da07eed830b6b4123f41723ba82be2b46c18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_f32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (0, z0), + svadd_za32_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w0, z0), + svadd_za32_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w7, z0), + svadd_za32_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8, z0), + svadd_za32_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w11, z0), + svadd_za32_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w12, z0), + svadd_za32_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8 + 7, z0), + svadd_za32_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8 + 8, z0), + svadd_za32_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8 - 1, z0), + svadd_za32_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** fadd za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8, z4), + svadd_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8, z18), + svadd_za32_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8, z23), + svadd_za32_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat32x4_t, + svadd_za32_f32_vg1x4 (w8, z28), + svadd_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..424a88aaf9a509e4d6625de6bc512be5d1017874 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (0, z0), + svadd_za32_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w0, z0), + svadd_za32_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w7, z0), + svadd_za32_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w8, z0), + svadd_za32_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** add za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w11, z0), + svadd_za32_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w12, z0), + svadd_za32_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w8 + 7, z0), + svadd_za32_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w8 + 8, z0), + svadd_za32_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svint32x2_t, + svadd_za32_s32_vg1x2 (w8 - 1, z0), + svadd_za32_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svint32x2_t, + svadd_za32_s32_vg1x2 (w8, z18), + svadd_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svint32x2_t, + svadd_za32_s32_vg1x2 (w8, z23), + svadd_za32_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** add za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svint32x2_t, + svadd_za32_s32_vg1x2 (w8, z28), + svadd_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..40b6a39f2b7089221e3678256feff8c8db279804 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_s32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (0, z0), + svadd_za32_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w0, z0), + svadd_za32_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w7, z0), + svadd_za32_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w8, z0), + svadd_za32_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** add za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w11, z0), + svadd_za32_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w12, z0), + svadd_za32_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w8 + 7, z0), + svadd_za32_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w8 + 8, z0), + svadd_za32_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svint32x4_t, + svadd_za32_s32_vg1x4 (w8 - 1, z0), + svadd_za32_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** add za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svint32x4_t, + svadd_za32_s32_vg1x4 (w8, z4), + svadd_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svint32x4_t, + svadd_za32_s32_vg1x4 (w8, z18), + svadd_za32_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svint32x4_t, + svadd_za32_s32_vg1x4 (w8, z23), + svadd_za32_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svint32x4_t, + svadd_za32_s32_vg1x4 (w8, z28), + svadd_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..80db23dc6fa9dd1af1551a252e27976e2462954b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (0, z0), + svadd_za32_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w0, z0), + svadd_za32_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w7, z0), + svadd_za32_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** add za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8, z0), + svadd_za32_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** add za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w11, z0), + svadd_za32_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w12, z0), + svadd_za32_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8 + 7, z0), + svadd_za32_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8 + 8, z0), + svadd_za32_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8 - 1, z0), + svadd_za32_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** add za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8, z18), + svadd_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8, z23), + svadd_za32_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** add za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svuint32x2_t, + svadd_za32_u32_vg1x2 (w8, z28), + svadd_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..185b6b05a4263cf73eb1d72166127f840fdb0f74 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za32_u32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_0_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (0, z0), + svadd_za32_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w0, z0), + svadd_za32_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w7, z0), + svadd_za32_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** add za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8, z0), + svadd_za32_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** add za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w11, z0), + svadd_za32_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w12, z0), + svadd_za32_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8 + 7, z0), + svadd_za32_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8 + 8, z0), + svadd_za32_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8 - 1, z0), + svadd_za32_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** add za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8, z4), + svadd_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8, z18), + svadd_za32_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8, z23), + svadd_za32_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** add za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svuint32x4_t, + svadd_za32_u32_vg1x4 (w8, z28), + svadd_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5c9c228c5012f6ed91daa641319a7bbd00b0b055 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (0, z0), + svadd_za64_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w0, z0), + svadd_za64_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w7, z0), + svadd_za64_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8, z0), + svadd_za64_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w11, z0), + svadd_za64_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w12, z0), + svadd_za64_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8 + 7, z0), + svadd_za64_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8 + 8, z0), + svadd_za64_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8 - 1, z0), + svadd_za64_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** fadd za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8, z18), + svadd_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8, z23), + svadd_za64_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat64x2_t, + svadd_za64_f64_vg1x2 (w8, z28), + svadd_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..adc208612ec1d56cc6fa4bf564785eb04f1bc0db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_f64_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (0, z0), + svadd_za64_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w0, z0), + svadd_za64_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w7, z0), + svadd_za64_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** fadd za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8, z0), + svadd_za64_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** fadd za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w11, z0), + svadd_za64_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w12, z0), + svadd_za64_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** fadd za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8 + 7, z0), + svadd_za64_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8 + 8, z0), + svadd_za64_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fadd za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8 - 1, z0), + svadd_za64_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** fadd za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8, z4), + svadd_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8, z18), + svadd_za64_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fadd za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8, z23), + svadd_za64_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** fadd za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svfloat64x4_t, + svadd_za64_f64_vg1x4 (w8, z28), + svadd_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..13aa886a710f6db4b2240e7e7134677489c3923c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (0, z0), + svadd_za64_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w0, z0), + svadd_za64_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w7, z0), + svadd_za64_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w8, z0), + svadd_za64_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** add za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w11, z0), + svadd_za64_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w12, z0), + svadd_za64_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w8 + 7, z0), + svadd_za64_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w8 + 8, z0), + svadd_za64_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svint64x2_t, + svadd_za64_s64_vg1x2 (w8 - 1, z0), + svadd_za64_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svint64x2_t, + svadd_za64_s64_vg1x2 (w8, z18), + svadd_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svint64x2_t, + svadd_za64_s64_vg1x2 (w8, z23), + svadd_za64_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** add za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svint64x2_t, + svadd_za64_s64_vg1x2 (w8, z28), + svadd_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7b3366c94ab675e196603e69824b4c10f0e06002 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_s64_vg1x4.c @@ -0,0 +1,139 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (0, z0), + svadd_za64_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w0, z0), + svadd_za64_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w7, z0), + svadd_za64_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w8, z0), + svadd_za64_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** add za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w11, z0), + svadd_za64_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w12, z0), + svadd_za64_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w8 + 7, z0), + svadd_za64_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w8 + 8, z0), + svadd_za64_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svint64x4_t, + svadd_za64_s64_vg1x4 (w8 - 1, z0), + svadd_za64_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** add za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svint64x4_t, + svadd_za64_s64_vg1x4 (w8, z4), + svadd_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svint64x4_t, + svadd_za64_s64_vg1x4 (w8, z18), + svadd_za64_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svint64x4_t, + svadd_za64_s64_vg1x4 (w8, z23), + svadd_za64_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svint64x4_t, + svadd_za64_s64_vg1x4 (w8, z28), + svadd_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2c68a0e02fab3ecc2b7569c057c573278a5aee0d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (0, z0), + svadd_za64_vg1x2 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w0, z0), + svadd_za64_vg1x2 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w7, z0), + svadd_za64_vg1x2 (w7, z0)) + +/* +** add_w8_z0: +** add za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8, z0), + svadd_za64_vg1x2 (w8, z0)) + +/* +** add_w11_z0: +** add za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w11, z0), + svadd_za64_vg1x2 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w12, z0), + svadd_za64_vg1x2 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8 + 7, z0), + svadd_za64_vg1x2 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8 + 8, z0), + svadd_za64_vg1x2 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8 - 1, z0), + svadd_za64_vg1x2 (w8 - 1, z0)) + +/* +** add_w8_z18: +** add za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z18, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8, z18), + svadd_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8, z23), + svadd_za64_vg1x2 (w8, z23)) + +/* +** add_w8_z28: +** add za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svuint64x2_t, + svadd_za64_u64_vg1x2 (w8, z28), + svadd_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..249e888ad58b3fddbc1eb87a9cb0e5c5875953fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za64_u64_vg1x4.c @@ -0,0 +1,139 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** add_0_z0: +** mov (w8|w9|w10|w11), #?0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_0_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (0, z0), + svadd_za64_vg1x4 (0, z0)) + +/* +** add_w0_z0: +** mov (w8|w9|w10|w11), w0 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w0_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w0, z0), + svadd_za64_vg1x4 (w0, z0)) + +/* +** add_w7_z0: +** mov (w8|w9|w10|w11), w7 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w7_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w7, z0), + svadd_za64_vg1x4 (w7, z0)) + +/* +** add_w8_z0: +** add za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8, z0), + svadd_za64_vg1x4 (w8, z0)) + +/* +** add_w11_z0: +** add za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w11_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w11, z0), + svadd_za64_vg1x4 (w11, z0)) + + +/* +** add_w12_z0: +** mov (w8|w9|w10|w11), w12 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w12_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w12, z0), + svadd_za64_vg1x4 (w12, z0)) + +/* +** add_w8p7_z0: +** add za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p7_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8 + 7, z0), + svadd_za64_vg1x4 (w8 + 7, z0)) + +/* +** add_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8p8_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8 + 8, z0), + svadd_za64_vg1x4 (w8 + 8, z0)) + +/* +** add_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** add za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (add_w8m1_z0, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8 - 1, z0), + svadd_za64_vg1x4 (w8 - 1, z0)) + +/* +** add_w8_z4: +** add za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z4, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8, z4), + svadd_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** add_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z18, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8, z18), + svadd_za64_vg1x4 (w8, z18)) + +/* +** add_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** add za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (add_w8_z23, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8, z23), + svadd_za64_vg1x4 (w8, z23)) + +/* +** add_w8_z28: +** add za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (add_w8_z28, svuint64x4_t, + svadd_za64_u64_vg1x4 (w8, z28), + svadd_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..f67316cd33ce0dd14137bdd333dbf579986db1b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bfmlalb_f32_tied1: +** bfmlalb z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t, + z0 = svbfmlalb_f32 (z0, z4, z5), + z0 = svbfmlalb (z0, z4, z5)) + +/* +** bfmlalb_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlalb z0\.s, \1\.h, z1\.h +** ret +*/ +TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t, + z0_res = svbfmlalb_f32 (z4, z0, z1), + z0_res = svbfmlalb (z4, z0, z1)) + +/* +** bfmlalb_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlalb z0\.s, z1\.h, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t, + z0_res = svbfmlalb_f32 (z4, z1, z0), + z0_res = svbfmlalb (z4, z1, z0)) + +/* +** bfmlalb_f32_untied: +** movprfx z0, z1 +** bfmlalb z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t, + z0 = svbfmlalb_f32 (z1, z4, z5), + z0 = svbfmlalb (z1, z4, z5)) + +/* +** bfmlalb_h7_f32_tied1: +** mov (z[0-9]+\.h), h7 +** bfmlalb z0\.s, z4\.h, \1 +** ret +*/ +TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, + z0 = svbfmlalb_n_f32 (z0, z4, d7), + z0 = svbfmlalb (z0, z4, d7)) + +/* +** bfmlalb_h7_f32_untied: +** mov (z[0-9]+\.h), h7 +** movprfx z0, z1 +** bfmlalb z0\.s, z4\.h, \1 +** ret +*/ +TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, + z0 = svbfmlalb_n_f32 (z1, z4, d7), + z0 = svbfmlalb (z1, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_lane_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..91a7a18d056d6ad6a1480adfd5d64a67d66e90a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_lane_f32.c @@ -0,0 +1,84 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bfmlslb_lane_0_f32_tied1: +** bfmlslb z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (bfmlslb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, + z0 = svbfmlslb_lane_f32 (z0, z4, z5, 0), + z0 = svbfmlslb_lane (z0, z4, z5, 0)) + +/* +** bfmlslb_lane_0_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslb z0\.s, \1\.h, z1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslb_lane_f32 (z4, z0, z1, 0), + z0_res = svbfmlslb_lane (z4, z0, z1, 0)) + +/* +** bfmlslb_lane_0_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslb z0\.s, z1\.h, \1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslb_lane_f32 (z4, z1, z0, 0), + z0_res = svbfmlslb_lane (z4, z1, z0, 0)) + +/* +** bfmlslb_lane_0_f32_untied: +** movprfx z0, z1 +** bfmlslb z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (bfmlslb_lane_0_f32_untied, svfloat32_t, svbfloat16_t, + z0 = svbfmlslb_lane_f32 (z1, z4, z5, 0), + z0 = svbfmlslb_lane (z1, z4, z5, 0)) + +/* +** bfmlslb_lane_1_f32: +** bfmlslb z0\.s, z4\.h, z5\.h\[1\] +** ret +*/ +TEST_DUAL_Z (bfmlslb_lane_1_f32, svfloat32_t, svbfloat16_t, + z0 = svbfmlslb_lane_f32 (z0, z4, z5, 1), + z0 = svbfmlslb_lane (z0, z4, z5, 1)) + +/* +** bfmlslb_lane_7_f32: +** bfmlslb z0\.s, z4\.h, z5\.h\[7\] +** ret +*/ +TEST_DUAL_Z (bfmlslb_lane_7_f32, svfloat32_t, svbfloat16_t, + z0 = svbfmlslb_lane_f32 (z0, z4, z5, 7), + z0 = svbfmlslb_lane (z0, z4, z5, 7)) + +/* +** bfmlslb_lane_z8_f32: +** str d8, \[sp, -16\]! +** mov (z[0-7])\.d, z8\.d +** bfmlslb z0\.s, z1\.h, \1\.h\[1\] +** ldr d8, \[sp\], 16 +** ret +*/ +TEST_DUAL_LANE_REG (bfmlslb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, + z0 = svbfmlslb_lane_f32 (z0, z1, z8, 1), + z0 = svbfmlslb_lane (z0, z1, z8, 1)) + +/* +** bfmlslb_lane_z16_f32: +** mov (z[0-7])\.d, z16\.d +** bfmlslb z0\.s, z1\.h, \1\.h\[1\] +** ret +*/ +TEST_DUAL_LANE_REG (bfmlslb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, + z0 = svbfmlslb_lane_f32 (z0, z1, z16, 1), + z0 = svbfmlslb_lane (z0, z1, z16, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..bc6b7a712d0f748dae08727c76de57eae317cd29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_f32.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bfmlslt_f32_tied1: +** bfmlslt z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (bfmlslt_f32_tied1, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_f32 (z0, z4, z5), + z0 = svbfmlslt (z0, z4, z5)) + +/* +** bfmlslt_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslt z0\.s, \1\.h, z1\.h +** ret +*/ +TEST_DUAL_Z_REV (bfmlslt_f32_tied2, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslt_f32 (z4, z0, z1), + z0_res = svbfmlslt (z4, z0, z1)) + +/* +** bfmlslt_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslt z0\.s, z1\.h, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (bfmlslt_f32_tied3, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslt_f32 (z4, z1, z0), + z0_res = svbfmlslt (z4, z1, z0)) + +/* +** bfmlslt_f32_untied: +** movprfx z0, z1 +** bfmlslt z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (bfmlslt_f32_untied, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_f32 (z1, z4, z5), + z0 = svbfmlslt (z1, z4, z5)) + +/* +** bfmlslt_h7_f32_tied1: +** mov (z[0-9]+\.h), h7 +** bfmlslt z0\.s, z4\.h, \1 +** ret +*/ +TEST_DUAL_ZD (bfmlslt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, + z0 = svbfmlslt_n_f32 (z0, z4, d7), + z0 = svbfmlslt (z0, z4, d7)) + +/* +** bfmlslt_h7_f32_untied: +** mov (z[0-9]+\.h), h7 +** movprfx z0, z1 +** bfmlslt z0\.s, z4\.h, \1 +** ret +*/ +TEST_DUAL_ZD (bfmlslt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, + z0 = svbfmlslt_n_f32 (z1, z4, d7), + z0 = svbfmlslt (z1, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_lane_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..1c93011e257ab92e40165bad8da3f08cc8e99b84 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslt_lane_f32.c @@ -0,0 +1,84 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bfmlslt_lane_0_f32_tied1: +** bfmlslt z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (bfmlslt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_lane_f32 (z0, z4, z5, 0), + z0 = svbfmlslt_lane (z0, z4, z5, 0)) + +/* +** bfmlslt_lane_0_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslt z0\.s, \1\.h, z1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslt_lane_f32 (z4, z0, z1, 0), + z0_res = svbfmlslt_lane (z4, z0, z1, 0)) + +/* +** bfmlslt_lane_0_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** bfmlslt z0\.s, z1\.h, \1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, + z0_res = svbfmlslt_lane_f32 (z4, z1, z0, 0), + z0_res = svbfmlslt_lane (z4, z1, z0, 0)) + +/* +** bfmlslt_lane_0_f32_untied: +** movprfx z0, z1 +** bfmlslt z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (bfmlslt_lane_0_f32_untied, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_lane_f32 (z1, z4, z5, 0), + z0 = svbfmlslt_lane (z1, z4, z5, 0)) + +/* +** bfmlslt_lane_1_f32: +** bfmlslt z0\.s, z4\.h, z5\.h\[1\] +** ret +*/ +TEST_DUAL_Z (bfmlslt_lane_1_f32, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_lane_f32 (z0, z4, z5, 1), + z0 = svbfmlslt_lane (z0, z4, z5, 1)) + +/* +** bfmlslt_lane_7_f32: +** bfmlslt z0\.s, z4\.h, z5\.h\[7\] +** ret +*/ +TEST_DUAL_Z (bfmlslt_lane_7_f32, svfloat32_t, svbfloat16_t, + z0 = svbfmlslt_lane_f32 (z0, z4, z5, 7), + z0 = svbfmlslt_lane (z0, z4, z5, 7)) + +/* +** bfmlslt_lane_z8_f32: +** str d8, \[sp, -16\]! +** mov (z[0-7])\.d, z8\.d +** bfmlslt z0\.s, z1\.h, \1\.h\[1\] +** ldr d8, \[sp\], 16 +** ret +*/ +TEST_DUAL_LANE_REG (bfmlslt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, + z0 = svbfmlslt_lane_f32 (z0, z1, z8, 1), + z0 = svbfmlslt_lane (z0, z1, z8, 1)) + +/* +** bfmlslt_lane_z16_f32: +** mov (z[0-7])\.d, z16\.d +** bfmlslt z0\.s, z1\.h, \1\.h\[1\] +** ret +*/ +TEST_DUAL_LANE_REG (bfmlslt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, + z0 = svbfmlslt_lane_f32 (z0, z1, z16, 1), + z0 = svbfmlslt_lane (z0, z1, z16, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmopa_za32.c new file mode 100644 index 0000000000000000000000000000000000000000..b88a81722a31c492aafbe289a41dccb17f989ca2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmopa_za32.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bmopa_za32_u32_0_p0_p1_z0_z1: +** bmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_ZA (bmopa_za32_u32_0_p0_p1_z0_z1, svuint32_t, + svbmopa_za32_u32_m (0, p0, p1, z0, z1), + svbmopa_za32_m (0, p0, p1, z0, z1)) + +/* +** bmopa_za32_u32_0_p1_p0_z1_z0: +** bmopa za0\.s, p1/m, p0/m, z1\.s, z0\.s +** ret +*/ +TEST_UNIFORM_ZA (bmopa_za32_u32_0_p1_p0_z1_z0, svuint32_t, + svbmopa_za32_u32_m (0, p1, p0, z1, z0), + svbmopa_za32_m (0, p1, p0, z1, z0)) + +/* +** bmopa_za32_u32_3_p0_p1_z0_z1: +** bmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_ZA (bmopa_za32_u32_3_p0_p1_z0_z1, svuint32_t, + svbmopa_za32_u32_m (3, p0, p1, z0, z1), + svbmopa_za32_m (3, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmops_za32.c new file mode 100644 index 0000000000000000000000000000000000000000..dda120c6099434091080b69b6725509f96af288b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bmops_za32.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** bmops_za32_u32_0_p0_p1_z0_z1: +** bmops za0\.s, p0/m, p1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_ZA (bmops_za32_u32_0_p0_p1_z0_z1, svuint32_t, + svbmops_za32_u32_m (0, p0, p1, z0, z1), + svbmops_za32_m (0, p0, p1, z0, z1)) + +/* +** bmops_za32_u32_0_p1_p0_z1_z0: +** bmops za0\.s, p1/m, p0/m, z1\.s, z0\.s +** ret +*/ +TEST_UNIFORM_ZA (bmops_za32_u32_0_p1_p0_z1_z0, svuint32_t, + svbmops_za32_u32_m (0, p1, p0, z1, z0), + svbmops_za32_m (0, p1, p0, z1, z0)) + +/* +** bmops_za32_u32_3_p0_p1_z0_z1: +** bmops za3\.s, p0/m, p1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_ZA (bmops_za32_u32_3_p0_p1_z0_z1, svuint32_t, + svbmops_za32_u32_m (3, p0, p1, z0, z1), + svbmops_za32_m (3, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16.c new file mode 100644 index 0000000000000000000000000000000000000000..7918a9c7d8eb2fd800e0d65d7354036f95d79c13 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_f16_tied1: +** fclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_f16_tied1, svfloat16_t, + z0 = svclamp_f16 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_f16_tied2: +** fclamp z0\.h, z1\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_f16_tied2, svfloat16_t, + z0 = svclamp_f16 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_f16_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** fclamp z0\.h, z2\.h, \1\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_f16_tied3, svfloat16_t, + z0 = svclamp_f16 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_f16_untied: +** movprfx z0, z1 +** fclamp z0\.h, z2\.h, z3\.h +** ret +*/ +TEST_UNIFORM_Z (clamp_f16_untied, svfloat16_t, + z0 = svclamp_f16 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c63294be867025cbfe69f8cc5314503dea61e2d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.h - z25\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat16x2_t, svfloat16_t, z24, + svclamp_single_f16_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.h - z25\.h}, z5\.h, z7\.h +** | +** fclamp {z28\.h - z29\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat16x2_t, svfloat16_t, z24, + svclamp_single_f16_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fclamp {z24\.h - z25\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat16x2_t, svfloat16_t, z24, + svclamp_single_f16_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.h - z25\.h}, z16\.h, z23\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat16x2_t, svfloat16_t, z1, + svclamp_single_f16_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat16x2_t, svfloat16_t, z1, + svclamp_single_f16_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** fclamp {z18\.h - z19\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat16x2_t, svfloat16_t, z18, + svclamp_single_f16_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svclamp_single_f16_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7487aa0cb71a5176c196e226f39206eeff193a5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f16_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.h - z27\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat16x4_t, svfloat16_t, z24, + svclamp_single_f16_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.h - z27\.h}, z5\.h, z7\.h +** | +** fclamp {z28\.h - z31\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat16x4_t, svfloat16_t, z24, + svclamp_single_f16_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.h - z27\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat16x4_t, svfloat16_t, z24, + svclamp_single_f16_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.h - z27\.h}, z16\.h, z23\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat16x4_t, svfloat16_t, z1, + svclamp_single_f16_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat16x4_t, svfloat16_t, z1, + svclamp_single_f16_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat16x4_t, svfloat16_t, z18, + svclamp_single_f16_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svclamp_single_f16_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..7c6cff34b456dec840a5c977198fc94c8f29797c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_f32_tied1: +** fclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_f32_tied1, svfloat32_t, + z0 = svclamp_f32 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_f32_tied2: +** fclamp z0\.s, z1\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_f32_tied2, svfloat32_t, + z0 = svclamp_f32 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** fclamp z0\.s, z2\.s, \1\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_f32_tied3, svfloat32_t, + z0 = svclamp_f32 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_f32_untied: +** movprfx z0, z1 +** fclamp z0\.s, z2\.s, z3\.s +** ret +*/ +TEST_UNIFORM_Z (clamp_f32_untied, svfloat32_t, + z0 = svclamp_f32 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dd8eb62cf3dfba2bbe40eafc1f8222c314982d44 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.s - z25\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat32x2_t, svfloat32_t, z24, + svclamp_single_f32_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.s - z25\.s}, z5\.s, z7\.s +** | +** fclamp {z28\.s - z29\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat32x2_t, svfloat32_t, z24, + svclamp_single_f32_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fclamp {z24\.s - z25\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat32x2_t, svfloat32_t, z24, + svclamp_single_f32_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.s - z25\.s}, z16\.s, z23\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat32x2_t, svfloat32_t, z1, + svclamp_single_f32_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat32x2_t, svfloat32_t, z1, + svclamp_single_f32_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** fclamp {z18\.s - z19\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat32x2_t, svfloat32_t, z18, + svclamp_single_f32_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svclamp_single_f32_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..29d73f44f2a47fc42d769b79ef7b56c899790bd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f32_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.s - z27\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat32x4_t, svfloat32_t, z24, + svclamp_single_f32_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.s - z27\.s}, z5\.s, z7\.s +** | +** fclamp {z28\.s - z31\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat32x4_t, svfloat32_t, z24, + svclamp_single_f32_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.s - z27\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat32x4_t, svfloat32_t, z24, + svclamp_single_f32_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.s - z27\.s}, z16\.s, z23\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat32x4_t, svfloat32_t, z1, + svclamp_single_f32_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat32x4_t, svfloat32_t, z1, + svclamp_single_f32_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat32x4_t, svfloat32_t, z18, + svclamp_single_f32_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svclamp_single_f32_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64.c new file mode 100644 index 0000000000000000000000000000000000000000..599f21802d257c8818d25502bb89fd9ce4c8226c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64.c @@ -0,0 +1,42 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_f64_tied1: +** fclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_f64_tied1, svfloat64_t, + z0 = svclamp_f64 (z0, z1, z2), + z0 = svclamp (z0, z1, z2)) + +/* +** clamp_f64_tied2: +** fclamp z0\.d, z1\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_f64_tied2, svfloat64_t, + z0 = svclamp_f64 (z1, z0, z2), + z0 = svclamp (z1, z0, z2)) + +/* +** clamp_f64_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z1 +** fclamp z0\.d, z2\.d, \1\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_f64_tied3, svfloat64_t, + z0 = svclamp_f64 (z1, z2, z0), + z0 = svclamp (z1, z2, z0)) + +/* +** clamp_f64_untied: +** movprfx z0, z1 +** fclamp z0\.d, z2\.d, z3\.d +** ret +*/ +TEST_UNIFORM_Z (clamp_f64_untied, svfloat64_t, + z0 = svclamp_f64 (z1, z2, z3), + z0 = svclamp (z1, z2, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ca9e99609af054029557186c21dc83935258d3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.d - z25\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat64x2_t, svfloat64_t, z24, + svclamp_single_f64_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.d - z25\.d}, z5\.d, z7\.d +** | +** fclamp {z28\.d - z29\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat64x2_t, svfloat64_t, z24, + svclamp_single_f64_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fclamp {z24\.d - z25\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat64x2_t, svfloat64_t, z24, + svclamp_single_f64_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.d - z25\.d}, z16\.d, z23\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat64x2_t, svfloat64_t, z1, + svclamp_single_f64_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat64x2_t, svfloat64_t, z1, + svclamp_single_f64_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** fclamp {z18\.d - z19\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svfloat64x2_t, svfloat64_t, z18, + svclamp_single_f64_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svclamp_single_f64_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c2773e2c995048ad7855bd14d88a1683e57ca83c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_f64_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** fclamp {z24\.d - z27\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svfloat64x4_t, svfloat64_t, z24, + svclamp_single_f64_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.d - z27\.d}, z5\.d, z7\.d +** | +** fclamp {z28\.d - z31\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svfloat64x4_t, svfloat64_t, z24, + svclamp_single_f64_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z24\.d - z27\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svfloat64x4_t, svfloat64_t, z24, + svclamp_single_f64_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** fclamp {z24\.d - z27\.d}, z16\.d, z23\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svfloat64x4_t, svfloat64_t, z1, + svclamp_single_f64_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svfloat64x4_t, svfloat64_t, z1, + svclamp_single_f64_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svfloat64x4_t, svfloat64_t, z18, + svclamp_single_f64_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** fclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svclamp_single_f64_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..401a298abebd18bf3ceaac1257791266a1404dbe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.h - z25\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint16x2_t, svint16_t, z24, + svclamp_single_s16_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.h - z25\.h}, z5\.h, z7\.h +** | +** sclamp {z28\.h - z29\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint16x2_t, svint16_t, z24, + svclamp_single_s16_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sclamp {z24\.h - z25\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint16x2_t, svint16_t, z24, + svclamp_single_s16_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.h - z25\.h}, z16\.h, z23\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint16x2_t, svint16_t, z1, + svclamp_single_s16_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint16x2_t, svint16_t, z1, + svclamp_single_s16_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** sclamp {z18\.h - z19\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint16x2_t, svint16_t, z18, + svclamp_single_s16_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint16x2_t, svint16_t, + z0_res = svclamp_single_s16_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..96c87dbf7ce1a6e8d39164f4c55d7bf5be405715 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s16_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.h - z27\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint16x4_t, svint16_t, z24, + svclamp_single_s16_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.h - z27\.h}, z5\.h, z7\.h +** | +** sclamp {z28\.h - z31\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint16x4_t, svint16_t, z24, + svclamp_single_s16_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.h - z27\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint16x4_t, svint16_t, z24, + svclamp_single_s16_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.h - z27\.h}, z16\.h, z23\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint16x4_t, svint16_t, z1, + svclamp_single_s16_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint16x4_t, svint16_t, z1, + svclamp_single_s16_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint16x4_t, svint16_t, z18, + svclamp_single_s16_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint16x4_t, svint16_t, + z0_res = svclamp_single_s16_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1a50b851d1d42444461ce83b109b6bee68a0d4c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.s - z25\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint32x2_t, svint32_t, z24, + svclamp_single_s32_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.s - z25\.s}, z5\.s, z7\.s +** | +** sclamp {z28\.s - z29\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint32x2_t, svint32_t, z24, + svclamp_single_s32_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sclamp {z24\.s - z25\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint32x2_t, svint32_t, z24, + svclamp_single_s32_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.s - z25\.s}, z16\.s, z23\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint32x2_t, svint32_t, z1, + svclamp_single_s32_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint32x2_t, svint32_t, z1, + svclamp_single_s32_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** sclamp {z18\.s - z19\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint32x2_t, svint32_t, z18, + svclamp_single_s32_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint32x2_t, svint32_t, + z0_res = svclamp_single_s32_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8f6a0d3aff7e0d1f6f26fee7788c931b89bfd57e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s32_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.s - z27\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint32x4_t, svint32_t, z24, + svclamp_single_s32_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.s - z27\.s}, z5\.s, z7\.s +** | +** sclamp {z28\.s - z31\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint32x4_t, svint32_t, z24, + svclamp_single_s32_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.s - z27\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint32x4_t, svint32_t, z24, + svclamp_single_s32_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.s - z27\.s}, z16\.s, z23\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint32x4_t, svint32_t, z1, + svclamp_single_s32_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint32x4_t, svint32_t, z1, + svclamp_single_s32_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint32x4_t, svint32_t, z18, + svclamp_single_s32_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint32x4_t, svint32_t, + z0_res = svclamp_single_s32_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6accce502f2ce3439f21d1a58c681639693947ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.d - z25\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint64x2_t, svint64_t, z24, + svclamp_single_s64_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.d - z25\.d}, z5\.d, z7\.d +** | +** sclamp {z28\.d - z29\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint64x2_t, svint64_t, z24, + svclamp_single_s64_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sclamp {z24\.d - z25\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint64x2_t, svint64_t, z24, + svclamp_single_s64_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.d - z25\.d}, z16\.d, z23\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint64x2_t, svint64_t, z1, + svclamp_single_s64_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint64x2_t, svint64_t, z1, + svclamp_single_s64_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** sclamp {z18\.d - z19\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint64x2_t, svint64_t, z18, + svclamp_single_s64_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint64x2_t, svint64_t, + z0_res = svclamp_single_s64_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fbc06589ca8e23bf788b2ebffe5ed585aca5fa3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s64_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.d - z27\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint64x4_t, svint64_t, z24, + svclamp_single_s64_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.d - z27\.d}, z5\.d, z7\.d +** | +** sclamp {z28\.d - z31\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint64x4_t, svint64_t, z24, + svclamp_single_s64_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.d - z27\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint64x4_t, svint64_t, z24, + svclamp_single_s64_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.d - z27\.d}, z16\.d, z23\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint64x4_t, svint64_t, z1, + svclamp_single_s64_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint64x4_t, svint64_t, z1, + svclamp_single_s64_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint64x4_t, svint64_t, z18, + svclamp_single_s64_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint64x4_t, svint64_t, + z0_res = svclamp_single_s64_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fc9151b6d2afe6d20e66f3561fecb77e8d875834 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.b - z25\.b}, z0\.b, z5\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint8x2_t, svint8_t, z24, + svclamp_single_s8_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.b - z25\.b}, z5\.b, z7\.b +** | +** sclamp {z28\.b - z29\.b}, z5\.b, z7\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint8x2_t, svint8_t, z24, + svclamp_single_s8_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sclamp {z24\.b - z25\.b}, z7\.b, z16\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint8x2_t, svint8_t, z24, + svclamp_single_s8_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.b - z25\.b}, z16\.b, z23\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint8x2_t, svint8_t, z1, + svclamp_single_s8_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint8x2_t, svint8_t, z1, + svclamp_single_s8_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** sclamp {z18\.b - z19\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svint8x2_t, svint8_t, z18, + svclamp_single_s8_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z3\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint8x2_t, svint8_t, + z0_res = svclamp_single_s8_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ce1ad0272311bf0646e063ef14bd0ba4e06112f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_s8_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** sclamp {z24\.b - z27\.b}, z0\.b, z5\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svint8x4_t, svint8_t, z24, + svclamp_single_s8_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.b - z27\.b}, z5\.b, z7\.b +** | +** sclamp {z28\.b - z31\.b}, z5\.b, z7\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svint8x4_t, svint8_t, z24, + svclamp_single_s8_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z24\.b - z27\.b}, z7\.b, z16\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svint8x4_t, svint8_t, z24, + svclamp_single_s8_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** sclamp {z24\.b - z27\.b}, z16\.b, z23\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svint8x4_t, svint8_t, z1, + svclamp_single_s8_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svint8x4_t, svint8_t, z1, + svclamp_single_s8_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z16\.b, z5\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svint8x4_t, svint8_t, z18, + svclamp_single_s8_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** sclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z5\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svint8x4_t, svint8_t, + z0_res = svclamp_single_s8_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..50ed0a1389bdacb7bf4c09d183feeb8df6fd7978 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.h - z25\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint16x2_t, svuint16_t, z24, + svclamp_single_u16_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.h - z25\.h}, z5\.h, z7\.h +** | +** uclamp {z28\.h - z29\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint16x2_t, svuint16_t, z24, + svclamp_single_u16_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** uclamp {z24\.h - z25\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint16x2_t, svuint16_t, z24, + svclamp_single_u16_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.h - z25\.h}, z16\.h, z23\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint16x2_t, svuint16_t, z1, + svclamp_single_u16_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint16x2_t, svuint16_t, z1, + svclamp_single_u16_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** uclamp {z18\.h - z19\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint16x2_t, svuint16_t, z18, + svclamp_single_u16_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z3\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint16x2_t, svuint16_t, + z0_res = svclamp_single_u16_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ca3e65b4bae4ed3ffb1c84c401233479f3131782 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u16_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.h - z27\.h}, z0\.h, z5\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint16x4_t, svuint16_t, z24, + svclamp_single_u16_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.h - z27\.h}, z5\.h, z7\.h +** | +** uclamp {z28\.h - z31\.h}, z5\.h, z7\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint16x4_t, svuint16_t, z24, + svclamp_single_u16_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.h - z27\.h}, z7\.h, z16\.h +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint16x4_t, svuint16_t, z24, + svclamp_single_u16_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.h - z27\.h}, z16\.h, z23\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint16x4_t, svuint16_t, z1, + svclamp_single_u16_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z23\.h, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint16x4_t, svuint16_t, z1, + svclamp_single_u16_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z16\.h, z5\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint16x4_t, svuint16_t, z18, + svclamp_single_u16_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h, z5\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint16x4_t, svuint16_t, + z0_res = svclamp_single_u16_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2494df25444c551c981f54f6c08848f365e1a488 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.s - z25\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint32x2_t, svuint32_t, z24, + svclamp_single_u32_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.s - z25\.s}, z5\.s, z7\.s +** | +** uclamp {z28\.s - z29\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint32x2_t, svuint32_t, z24, + svclamp_single_u32_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** uclamp {z24\.s - z25\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint32x2_t, svuint32_t, z24, + svclamp_single_u32_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.s - z25\.s}, z16\.s, z23\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint32x2_t, svuint32_t, z1, + svclamp_single_u32_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint32x2_t, svuint32_t, z1, + svclamp_single_u32_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** uclamp {z18\.s - z19\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint32x2_t, svuint32_t, z18, + svclamp_single_u32_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z3\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint32x2_t, svuint32_t, + z0_res = svclamp_single_u32_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a02d9dd78c4507318beabe20d5f9cfeafa49746d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u32_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.s - z27\.s}, z0\.s, z5\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint32x4_t, svuint32_t, z24, + svclamp_single_u32_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.s - z27\.s}, z5\.s, z7\.s +** | +** uclamp {z28\.s - z31\.s}, z5\.s, z7\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint32x4_t, svuint32_t, z24, + svclamp_single_u32_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.s - z27\.s}, z7\.s, z16\.s +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint32x4_t, svuint32_t, z24, + svclamp_single_u32_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.s - z27\.s}, z16\.s, z23\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint32x4_t, svuint32_t, z1, + svclamp_single_u32_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z23\.s, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint32x4_t, svuint32_t, z1, + svclamp_single_u32_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z16\.s, z5\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint32x4_t, svuint32_t, z18, + svclamp_single_u32_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s, z5\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint32x4_t, svuint32_t, + z0_res = svclamp_single_u32_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b827ee476e123ee1c7bc14f0f39b1bd9d398d02a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.d - z25\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint64x2_t, svuint64_t, z24, + svclamp_single_u64_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.d - z25\.d}, z5\.d, z7\.d +** | +** uclamp {z28\.d - z29\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint64x2_t, svuint64_t, z24, + svclamp_single_u64_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** uclamp {z24\.d - z25\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint64x2_t, svuint64_t, z24, + svclamp_single_u64_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.d - z25\.d}, z16\.d, z23\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint64x2_t, svuint64_t, z1, + svclamp_single_u64_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint64x2_t, svuint64_t, z1, + svclamp_single_u64_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** uclamp {z18\.d - z19\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint64x2_t, svuint64_t, z18, + svclamp_single_u64_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z3\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint64x2_t, svuint64_t, + z0_res = svclamp_single_u64_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f27c9ab8154269f1dc3e6e8ef4743e19e28dbecf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u64_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.d - z27\.d}, z0\.d, z5\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint64x4_t, svuint64_t, z24, + svclamp_single_u64_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.d - z27\.d}, z5\.d, z7\.d +** | +** uclamp {z28\.d - z31\.d}, z5\.d, z7\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint64x4_t, svuint64_t, z24, + svclamp_single_u64_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.d - z27\.d}, z7\.d, z16\.d +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint64x4_t, svuint64_t, z24, + svclamp_single_u64_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.d - z27\.d}, z16\.d, z23\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint64x4_t, svuint64_t, z1, + svclamp_single_u64_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z23\.d, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint64x4_t, svuint64_t, z1, + svclamp_single_u64_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z16\.d, z5\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint64x4_t, svuint64_t, z18, + svclamp_single_u64_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d, z5\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint64x4_t, svuint64_t, + z0_res = svclamp_single_u64_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..27f6b1dd19247832edbdd4e7cbf4d7a5b402b16b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x2.c @@ -0,0 +1,94 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.b - z25\.b}, z0\.b, z5\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint8x2_t, svuint8_t, z24, + svclamp_single_u8_x2 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.b - z25\.b}, z5\.b, z7\.b +** | +** uclamp {z28\.b - z29\.b}, z5\.b, z7\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint8x2_t, svuint8_t, z24, + svclamp_single_u8_x2 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** uclamp {z24\.b - z25\.b}, z7\.b, z16\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint8x2_t, svuint8_t, z24, + svclamp_single_u8_x2 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.b - z25\.b}, z16\.b, z23\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint8x2_t, svuint8_t, z1, + svclamp_single_u8_x2 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint8x2_t, svuint8_t, z1, + svclamp_single_u8_x2 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z0_z23: +** uclamp {z18\.b - z19\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z0_z23, svuint8x2_t, svuint8_t, z18, + svclamp_single_u8_x2 (z18, z0, z23), + svclamp (z18, z0, z23)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z3\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint8x2_t, svuint8_t, + z0_res = svclamp_single_u8_x2 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1e04634bd87d33bee7029e773cd42310a108695d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/clamp_u8_x4.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** clamp_z24_z24_z0_z5: +** uclamp {z24\.b - z27\.b}, z0\.b, z5\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z24_z0_z5, svuint8x4_t, svuint8_t, z24, + svclamp_single_u8_x4 (z24, z0, z5), + svclamp (z24, z0, z5)) + +/* +** clamp_z24_z28_z5_z7: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.b - z27\.b}, z5\.b, z7\.b +** | +** uclamp {z28\.b - z31\.b}, z5\.b, z7\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z28_z5_z7, svuint8x4_t, svuint8_t, z24, + svclamp_single_u8_x4 (z28, z5, z7), + svclamp (z28, z5, z7)) + +/* +** clamp_z24_z1_z7_z16: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z24\.b - z27\.b}, z7\.b, z16\.b +** ret +*/ +TEST_XN_SINGLE (clamp_z24_z1_z7_z16, svuint8x4_t, svuint8_t, z24, + svclamp_single_u8_x4 (z1, z7, z16), + svclamp (z1, z7, z16)) + +/* +** clamp_z1_z24_z16_z23: +** uclamp {z24\.b - z27\.b}, z16\.b, z23\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z24_z16_z23, svuint8x4_t, svuint8_t, z1, + svclamp_single_u8_x4 (z24, z16, z23), + svclamp (z24, z16, z23)) + +/* +** clamp_z1_z1_z23_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z23\.b, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z1_z1_z23_z0, svuint8x4_t, svuint8_t, z1, + svclamp_single_u8_x4 (z1, z23, z0), + svclamp (z1, z23, z0)) + +/* +** clamp_z18_z18_z16_z5: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z16\.b, z5\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (clamp_z18_z18_z16_z5, svuint8x4_t, svuint8_t, z18, + svclamp_single_u8_x4 (z18, z16, z5), + svclamp (z18, z16, z5)) + +/* +** clamp_awkward: +** ... +** uclamp {z[0-9]+\.b - z[0-9]+\.b}, z[0-9]+\.b, z5\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (clamp_awkward, svuint8x4_t, svuint8_t, + z0_res = svclamp_single_u8_x4 (z1, z0, zn), + z0_res = svclamp (z1, z0, zn)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..2206206ef6463c16698673f1d662a7ace8f5593f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c16.c @@ -0,0 +1,39 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cntp_x0_pn0_2: +** cntp x0, pn0\.h, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn0_2, + x0 = svcntp_c16 (pn0, 2), + x0 = svcntp_c16 (pn0, 2)) + +/* +** cntp_x15_pn7_4: +** cntp x15, pn7\.h, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x15_pn7_4, + x15 = svcntp_c16 (pn7, 4), + x15 = svcntp_c16 (pn7, 4)) + +/* +** cntp_x17_pn8_2: +** cntp x17, pn8\.h, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x17_pn8_2, + x17 = svcntp_c16 (pn8, 2), + x17 = svcntp_c16 (pn8, 2)) + +/* +** cntp_x0_pn15_4: +** cntp x0, pn15\.h, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn15_4, + x0 = svcntp_c16 (pn15, 4), + x0 = svcntp_c16 (pn15, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..86d15c6207ed7d054b853b0506a8bcd85631bcdb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c32.c @@ -0,0 +1,39 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cntp_x0_pn0_2: +** cntp x0, pn0\.s, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn0_2, + x0 = svcntp_c32 (pn0, 2), + x0 = svcntp_c32 (pn0, 2)) + +/* +** cntp_x15_pn7_4: +** cntp x15, pn7\.s, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x15_pn7_4, + x15 = svcntp_c32 (pn7, 4), + x15 = svcntp_c32 (pn7, 4)) + +/* +** cntp_x17_pn8_2: +** cntp x17, pn8\.s, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x17_pn8_2, + x17 = svcntp_c32 (pn8, 2), + x17 = svcntp_c32 (pn8, 2)) + +/* +** cntp_x0_pn15_4: +** cntp x0, pn15\.s, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn15_4, + x0 = svcntp_c32 (pn15, 4), + x0 = svcntp_c32 (pn15, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..d56e6761cde4e9b16b1276009485930d0943ae5a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c64.c @@ -0,0 +1,39 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cntp_x0_pn0_2: +** cntp x0, pn0\.d, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn0_2, + x0 = svcntp_c64 (pn0, 2), + x0 = svcntp_c64 (pn0, 2)) + +/* +** cntp_x15_pn7_4: +** cntp x15, pn7\.d, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x15_pn7_4, + x15 = svcntp_c64 (pn7, 4), + x15 = svcntp_c64 (pn7, 4)) + +/* +** cntp_x17_pn8_2: +** cntp x17, pn8\.d, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x17_pn8_2, + x17 = svcntp_c64 (pn8, 2), + x17 = svcntp_c64 (pn8, 2)) + +/* +** cntp_x0_pn15_4: +** cntp x0, pn15\.d, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn15_4, + x0 = svcntp_c64 (pn15, 4), + x0 = svcntp_c64 (pn15, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..35ce2d67e8da2cc376f05769306dcdb7dee73107 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cntp_c8.c @@ -0,0 +1,39 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cntp_x0_pn0_2: +** cntp x0, pn0\.b, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn0_2, + x0 = svcntp_c8 (pn0, 2), + x0 = svcntp_c8 (pn0, 2)) + +/* +** cntp_x15_pn7_4: +** cntp x15, pn7\.b, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x15_pn7_4, + x15 = svcntp_c8 (pn7, 4), + x15 = svcntp_c8 (pn7, 4)) + +/* +** cntp_x17_pn8_2: +** cntp x17, pn8\.b, vlx2 +** ret +*/ +TEST_COUNT_PN (cntp_x17_pn8_2, + x17 = svcntp_c8 (pn8, 2), + x17 = svcntp_c8 (pn8, 2)) + +/* +** cntp_x0_pn15_4: +** cntp x0, pn15\.b, vlx4 +** ret +*/ +TEST_COUNT_PN (cntp_x0_pn15_4, + x0 = svcntp_c8 (pn15, 4), + x0 = svcntp_c8 (pn15, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..639991aacd3e3fc5eef957018a386245090f7463 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_bf16_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z0: +** bfcvt z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z0, svfloat32x2_t, svbfloat16_t, + z0_res = svcvt_bf16_f32_x2 (z0), + z0_res = svcvt_bf16 (z0)) + +/* +** cvt_z0_z6: +** bfcvt z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z6, svfloat32x2_t, svbfloat16_t, + z0_res = svcvt_bf16_f32_x2 (z6), + z0_res = svcvt_bf16 (z6)) + +/* +** cvt_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** bfcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (cvt_z0_z29, svfloat32x2_t, svbfloat16_t, + z0_res = svcvt_bf16_f32_x2 (z29), + z0_res = svcvt_bf16 (z29)) + +/* +** cvt_z5_z0: +** bfcvt z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z5_z0, svfloat32x2_t, svbfloat16_t, + z5 = svcvt_bf16_f32_x2 (z0), + z5 = svcvt_bf16 (z0)) + +/* +** cvt_z22_z16: +** bfcvt z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z22_z16, svfloat32x2_t, svbfloat16_t, + z22 = svcvt_bf16_f32_x2 (z16), + z22 = svcvt_bf16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f16_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f16_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..35f8c1c56c9990426f8fd061a43d9698d7116a4d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f16_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z0: +** fcvt z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvt_f16_f32_x2 (z0), + z0_res = svcvt_f16 (z0)) + +/* +** cvt_z0_z6: +** fcvt z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z6, svfloat32x2_t, svfloat16_t, + z0_res = svcvt_f16_f32_x2 (z6), + z0_res = svcvt_f16 (z6)) + +/* +** cvt_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** fcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (cvt_z0_z29, svfloat32x2_t, svfloat16_t, + z0_res = svcvt_f16_f32_x2 (z29), + z0_res = svcvt_f16 (z29)) + +/* +** cvt_z5_z0: +** fcvt z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z5_z0, svfloat32x2_t, svfloat16_t, + z5 = svcvt_f16_f32_x2 (z0), + z5 = svcvt_f16 (z0)) + +/* +** cvt_z22_z16: +** fcvt z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (cvt_z22_z16, svfloat32x2_t, svfloat16_t, + z22 = svcvt_f16_f32_x2 (z16), + z22 = svcvt_f16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3e395122d72fa11c3ba660c897d6aba5ab06d9e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x2.c @@ -0,0 +1,43 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** scvtf {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svfloat32x2_t, svint32x2_t, z0, + svcvt_f32_s32_x2 (z4), + svcvt_f32 (z4)) + +/* +** cvt_z4_z0: +** scvtf {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svint32x2_t, svfloat32x2_t, z4, + svcvt_f32_s32_x2 (z0), + svcvt_f32 (z0)) + +/* +** cvt_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** scvtf {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z18_z23, svfloat32x2_t, svint32x2_t, z18, + svcvt_f32_s32_x2 (z23), + svcvt_f32 (z23)) + +/* +** cvt_z23_z28: +** scvtf [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svint32x2_t, svfloat32x2_t, z23, + svcvt_f32_s32_x2 (z28), + svcvt_f32 (z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ae3d5822a1438043dce4625f6da8e4f9a3caccb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_s32_x4.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** scvtf {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svfloat32x4_t, svint32x4_t, z0, + svcvt_f32_s32_x4 (z4), + svcvt_f32 (z4)) + +/* +** cvt_z4_z0: +** scvtf {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svint32x4_t, svfloat32x4_t, z4, + svcvt_f32_s32_x4 (z0), + svcvt_f32 (z0)) + +/* +** cvt_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** scvtf {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z4_z18, svint32x4_t, svfloat32x4_t, z4, + svcvt_f32_s32_x4 (z18), + svcvt_f32 (z18)) + +/* +** cvt_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** scvtf {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z28_z23, svfloat32x4_t, svint32x4_t, z28, + svcvt_f32_s32_x4 (z23), + svcvt_f32 (z23)) + +/* +** cvt_z23_z28: +** scvtf [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svint32x4_t, svfloat32x4_t, z23, + svcvt_f32_s32_x4 (z28), + svcvt_f32 (z28)) + +/* +** cvt_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** scvtf {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z18, svint32x4_t, svfloat32x4_t, z23, + svcvt_f32_s32_x4 (z18), + svcvt_f32 (z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..da23d1f069de331a182c07a12b1b0cd0df6f2342 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x2.c @@ -0,0 +1,43 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** ucvtf {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svfloat32x2_t, svuint32x2_t, z0, + svcvt_f32_u32_x2 (z4), + svcvt_f32 (z4)) + +/* +** cvt_z4_z0: +** ucvtf {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svuint32x2_t, svfloat32x2_t, z4, + svcvt_f32_u32_x2 (z0), + svcvt_f32 (z0)) + +/* +** cvt_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** ucvtf {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z18_z23, svfloat32x2_t, svuint32x2_t, z18, + svcvt_f32_u32_x2 (z23), + svcvt_f32 (z23)) + +/* +** cvt_z23_z28: +** ucvtf [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svuint32x2_t, svfloat32x2_t, z23, + svcvt_f32_u32_x2 (z28), + svcvt_f32 (z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..63029815987659791fd39533086d0cf1d4c5971e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_u32_x4.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** ucvtf {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svfloat32x4_t, svuint32x4_t, z0, + svcvt_f32_u32_x4 (z4), + svcvt_f32 (z4)) + +/* +** cvt_z4_z0: +** ucvtf {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svuint32x4_t, svfloat32x4_t, z4, + svcvt_f32_u32_x4 (z0), + svcvt_f32 (z0)) + +/* +** cvt_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ucvtf {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z4_z18, svuint32x4_t, svfloat32x4_t, z4, + svcvt_f32_u32_x4 (z18), + svcvt_f32 (z18)) + +/* +** cvt_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ucvtf {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z28_z23, svfloat32x4_t, svuint32x4_t, z28, + svcvt_f32_u32_x4 (z23), + svcvt_f32 (z23)) + +/* +** cvt_z23_z28: +** ucvtf [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svuint32x4_t, svfloat32x4_t, z23, + svcvt_f32_u32_x4 (z28), + svcvt_f32 (z28)) + +/* +** cvt_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ucvtf {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z18, svuint32x4_t, svfloat32x4_t, z23, + svcvt_f32_u32_x4 (z18), + svcvt_f32 (z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..935d7dbaec25628b5de69ffe65fafff39a38653e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x2.c @@ -0,0 +1,43 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** fcvtzs {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svint32x2_t, svfloat32x2_t, z0, + svcvt_s32_f32_x2 (z4), + svcvt_s32 (z4)) + +/* +** cvt_z4_z0: +** fcvtzs {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svfloat32x2_t, svint32x2_t, z4, + svcvt_s32_f32_x2 (z0), + svcvt_s32 (z0)) + +/* +** cvt_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** fcvtzs {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z18_z23, svint32x2_t, svfloat32x2_t, z18, + svcvt_s32_f32_x2 (z23), + svcvt_s32 (z23)) + +/* +** cvt_z23_z28: +** fcvtzs [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svfloat32x2_t, svint32x2_t, z23, + svcvt_s32_f32_x2 (z28), + svcvt_s32 (z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..45b90d5efdc3526cf8692d833de7ae20b9d62623 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_s32_f32_x4.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** fcvtzs {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svint32x4_t, svfloat32x4_t, z0, + svcvt_s32_f32_x4 (z4), + svcvt_s32 (z4)) + +/* +** cvt_z4_z0: +** fcvtzs {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svfloat32x4_t, svint32x4_t, z4, + svcvt_s32_f32_x4 (z0), + svcvt_s32 (z0)) + +/* +** cvt_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzs {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z4_z18, svfloat32x4_t, svint32x4_t, z4, + svcvt_s32_f32_x4 (z18), + svcvt_s32 (z18)) + +/* +** cvt_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzs {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z28_z23, svint32x4_t, svfloat32x4_t, z28, + svcvt_s32_f32_x4 (z23), + svcvt_s32 (z23)) + +/* +** cvt_z23_z28: +** fcvtzs [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svfloat32x4_t, svint32x4_t, z23, + svcvt_s32_f32_x4 (z28), + svcvt_s32 (z28)) + +/* +** cvt_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzs {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z18, svfloat32x4_t, svint32x4_t, z23, + svcvt_s32_f32_x4 (z18), + svcvt_s32 (z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ad57a78dd7093d2ce8ed0e5d14d1b945fff25f32 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x2.c @@ -0,0 +1,43 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** fcvtzu {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svuint32x2_t, svfloat32x2_t, z0, + svcvt_u32_f32_x2 (z4), + svcvt_u32 (z4)) + +/* +** cvt_z4_z0: +** fcvtzu {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svfloat32x2_t, svuint32x2_t, z4, + svcvt_u32_f32_x2 (z0), + svcvt_u32 (z0)) + +/* +** cvt_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** fcvtzu {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z18_z23, svuint32x2_t, svfloat32x2_t, z18, + svcvt_u32_f32_x2 (z23), + svcvt_u32 (z23)) + +/* +** cvt_z23_z28: +** fcvtzu [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svfloat32x2_t, svuint32x2_t, z23, + svcvt_u32_f32_x2 (z28), + svcvt_u32 (z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..29a140ccd3fe394a96705d269a3a59ff6a811486 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_u32_f32_x4.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvt_z0_z4: +** fcvtzu {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z0_z4, svuint32x4_t, svfloat32x4_t, z0, + svcvt_u32_f32_x4 (z4), + svcvt_u32 (z4)) + +/* +** cvt_z4_z0: +** fcvtzu {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (cvt_z4_z0, svfloat32x4_t, svuint32x4_t, z4, + svcvt_u32_f32_x4 (z0), + svcvt_u32 (z0)) + +/* +** cvt_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzu {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z4_z18, svfloat32x4_t, svuint32x4_t, z4, + svcvt_u32_f32_x4 (z18), + svcvt_u32 (z18)) + +/* +** cvt_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzu {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z28_z23, svuint32x4_t, svfloat32x4_t, z28, + svcvt_u32_f32_x4 (z23), + svcvt_u32 (z23)) + +/* +** cvt_z23_z28: +** fcvtzu [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z28, svfloat32x4_t, svuint32x4_t, z23, + svcvt_u32_f32_x4 (z28), + svcvt_u32 (z28)) + +/* +** cvt_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtzu {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (cvt_z23_z18, svfloat32x4_t, svuint32x4_t, z23, + svcvt_u32_f32_x4 (z18), + svcvt_u32 (z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8974bedadc628ca8ac5e2fb720855cba2a42d57c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_bf16_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvtn_z0_z0: +** bfcvtn z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z0, svfloat32x2_t, svbfloat16_t, + z0_res = svcvtn_bf16_f32_x2 (z0), + z0_res = svcvtn_bf16 (z0)) + +/* +** cvtn_z0_z6: +** bfcvtn z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z6, svfloat32x2_t, svbfloat16_t, + z0_res = svcvtn_bf16_f32_x2 (z6), + z0_res = svcvtn_bf16 (z6)) + +/* +** cvtn_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** bfcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z29, svfloat32x2_t, svbfloat16_t, + z0_res = svcvtn_bf16_f32_x2 (z29), + z0_res = svcvtn_bf16 (z29)) + +/* +** cvtn_z5_z0: +** bfcvtn z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z5_z0, svfloat32x2_t, svbfloat16_t, + z5 = svcvtn_bf16_f32_x2 (z0), + z5 = svcvtn_bf16 (z0)) + +/* +** cvtn_z22_z16: +** bfcvtn z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z22_z16, svfloat32x2_t, svbfloat16_t, + z22 = svcvtn_bf16_f32_x2 (z16), + z22 = svcvtn_bf16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6693d386e90d31f15542cdf404b73097dbec2df7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_f16_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** cvtn_z0_z0: +** fcvtn z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z0, svfloat32x2_t, svfloat16_t, + z0_res = svcvtn_f16_f32_x2 (z0), + z0_res = svcvtn_f16 (z0)) + +/* +** cvtn_z0_z6: +** fcvtn z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z6, svfloat32x2_t, svfloat16_t, + z0_res = svcvtn_f16_f32_x2 (z6), + z0_res = svcvtn_f16 (z6)) + +/* +** cvtn_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** fcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (cvtn_z0_z29, svfloat32x2_t, svfloat16_t, + z0_res = svcvtn_f16_f32_x2 (z29), + z0_res = svcvtn_f16 (z29)) + +/* +** cvtn_z5_z0: +** fcvtn z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z5_z0, svfloat32x2_t, svfloat16_t, + z5 = svcvtn_f16_f32_x2 (z0), + z5 = svcvtn_f16 (z0)) + +/* +** cvtn_z22_z16: +** fcvtn z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (cvtn_z22_z16, svfloat32x2_t, svfloat16_t, + z22 = svcvtn_f16_f32_x2 (z16), + z22 = svcvtn_f16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..815aadae1746edb3edffaa2292f38cdce92b84a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_f32.c @@ -0,0 +1,44 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_f32_tied1: +** fdot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_f32_tied1, svfloat32_t, svfloat16_t, + z0 = svdot_f32_f16 (z0, z4, z5), + z0 = svdot (z0, z4, z5)) + +/* +** dot_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, \1\.h, z1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_f32_tied2, svfloat32_t, svfloat16_t, + z0_res = svdot_f32_f16 (z4, z0, z1), + z0_res = svdot (z4, z0, z1)) + +/* +** dot_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, z1\.h, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_f32_tied3, svfloat32_t, svfloat16_t, + z0_res = svdot_f32_f16 (z4, z1, z0), + z0_res = svdot (z4, z1, z0)) + +/* +** dot_f32_untied: +** movprfx z0, z1 +** fdot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_f32_untied, svfloat32_t, svfloat16_t, + z0 = svdot_f32_f16 (z1, z4, z5), + z0 = svdot (z1, z4, z5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..263b21e65169df5b04d22b4c1293190328625c7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_f32.c @@ -0,0 +1,93 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_f32_tied1: +** fdot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svfloat16_t, + z0 = svdot_lane_f32_f16 (z0, z4, z5, 0), + z0 = svdot_lane (z0, z4, z5, 0)) + +/* +** dot_lane_0_f32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, \1\.h, z1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svfloat16_t, + z0_res = svdot_lane_f32_f16 (z4, z0, z1, 0), + z0_res = svdot_lane (z4, z0, z1, 0)) + +/* +** dot_lane_0_f32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, z1\.h, \1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svfloat16_t, + z0_res = svdot_lane_f32_f16 (z4, z1, z0, 0), + z0_res = svdot_lane (z4, z1, z0, 0)) + +/* +** dot_lane_0_f32_untied: +** movprfx z0, z1 +** fdot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svfloat16_t, + z0 = svdot_lane_f32_f16 (z1, z4, z5, 0), + z0 = svdot_lane (z1, z4, z5, 0)) + +/* +** dot_lane_1_f32: +** fdot z0\.s, z4\.h, z5\.h\[1\] +** ret +*/ +TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svfloat16_t, + z0 = svdot_lane_f32_f16 (z0, z4, z5, 1), + z0 = svdot_lane (z0, z4, z5, 1)) + +/* +** dot_lane_2_f32: +** fdot z0\.s, z4\.h, z5\.h\[2\] +** ret +*/ +TEST_DUAL_Z (dot_lane_2_f32, svfloat32_t, svfloat16_t, + z0 = svdot_lane_f32_f16 (z0, z4, z5, 2), + z0 = svdot_lane (z0, z4, z5, 2)) + +/* +** dot_lane_3_f32: +** fdot z0\.s, z4\.h, z5\.h\[3\] +** ret +*/ +TEST_DUAL_Z (dot_lane_3_f32, svfloat32_t, svfloat16_t, + z0 = svdot_lane_f32_f16 (z0, z4, z5, 3), + z0 = svdot_lane (z0, z4, z5, 3)) + +/* +** dot_lane_z8_f32: +** str d8, \[sp, -16\]! +** mov (z[0-7])\.d, z8\.d +** fdot z0\.s, z1\.h, \1\.h\[1\] +** ldr d8, \[sp\], 16 +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svfloat16_t, z8, + z0 = svdot_lane_f32_f16 (z0, z1, z8, 1), + z0 = svdot_lane (z0, z1, z8, 1)) + +/* +** dot_lane_z16_f32: +** mov (z[0-7])\.d, z16\.d +** fdot z0\.s, z1\.h, \1\.h\[1\] +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z16_f32, svfloat32_t, svfloat16_t, z16, + z0 = svdot_lane_f32_f16 (z0, z1, z16, 1), + z0 = svdot_lane (z0, z1, z16, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..58abbeaa52240f1b658e10ab26385925ad99b125 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_s32.c @@ -0,0 +1,93 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_s32_tied1: +** sdot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint16_t, + z0 = svdot_lane_s32_s16 (z0, z4, z5, 0), + z0 = svdot_lane (z0, z4, z5, 0)) + +/* +** dot_lane_0_s32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** sdot z0\.s, \1\.h, z1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint16_t, + z0_res = svdot_lane_s32_s16 (z4, z0, z1, 0), + z0_res = svdot_lane (z4, z0, z1, 0)) + +/* +** dot_lane_0_s32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** sdot z0\.s, z1\.h, \1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint16_t, + z0_res = svdot_lane_s32_s16 (z4, z1, z0, 0), + z0_res = svdot_lane (z4, z1, z0, 0)) + +/* +** dot_lane_0_s32_untied: +** movprfx z0, z1 +** sdot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint16_t, + z0 = svdot_lane_s32_s16 (z1, z4, z5, 0), + z0 = svdot_lane (z1, z4, z5, 0)) + +/* +** dot_lane_1_s32: +** sdot z0\.s, z4\.h, z5\.h\[1\] +** ret +*/ +TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint16_t, + z0 = svdot_lane_s32_s16 (z0, z4, z5, 1), + z0 = svdot_lane (z0, z4, z5, 1)) + +/* +** dot_lane_2_s32: +** sdot z0\.s, z4\.h, z5\.h\[2\] +** ret +*/ +TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint16_t, + z0 = svdot_lane_s32_s16 (z0, z4, z5, 2), + z0 = svdot_lane (z0, z4, z5, 2)) + +/* +** dot_lane_3_s32: +** sdot z0\.s, z4\.h, z5\.h\[3\] +** ret +*/ +TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint16_t, + z0 = svdot_lane_s32_s16 (z0, z4, z5, 3), + z0 = svdot_lane (z0, z4, z5, 3)) + +/* +** dot_lane_z8_s32: +** str d8, \[sp, -16\]! +** mov (z[0-7])\.d, z8\.d +** sdot z0\.s, z1\.h, \1\.h\[1\] +** ldr d8, \[sp\], 16 +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint16_t, z8, + z0 = svdot_lane_s32_s16 (z0, z1, z8, 1), + z0 = svdot_lane (z0, z1, z8, 1)) + +/* +** dot_lane_z16_s32: +** mov (z[0-7])\.d, z16\.d +** sdot z0\.s, z1\.h, \1\.h\[1\] +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint16_t, z16, + z0 = svdot_lane_s32_s16 (z0, z1, z16, 1), + z0 = svdot_lane (z0, z1, z16, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..2cf9a14cfbad8f14a40b1d0d845d0aa5c3e9b368 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_u32.c @@ -0,0 +1,93 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_u32_tied1: +** udot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint16_t, + z0 = svdot_lane_u32_u16 (z0, z4, z5, 0), + z0 = svdot_lane (z0, z4, z5, 0)) + +/* +** dot_lane_0_u32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** udot z0\.s, \1\.h, z1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint16_t, + z0_res = svdot_lane_u32_u16 (z4, z0, z1, 0), + z0_res = svdot_lane (z4, z0, z1, 0)) + +/* +** dot_lane_0_u32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** udot z0\.s, z1\.h, \1\.h\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint16_t, + z0_res = svdot_lane_u32_u16 (z4, z1, z0, 0), + z0_res = svdot_lane (z4, z1, z0, 0)) + +/* +** dot_lane_0_u32_untied: +** movprfx z0, z1 +** udot z0\.s, z4\.h, z5\.h\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint16_t, + z0 = svdot_lane_u32_u16 (z1, z4, z5, 0), + z0 = svdot_lane (z1, z4, z5, 0)) + +/* +** dot_lane_1_u32: +** udot z0\.s, z4\.h, z5\.h\[1\] +** ret +*/ +TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint16_t, + z0 = svdot_lane_u32_u16 (z0, z4, z5, 1), + z0 = svdot_lane (z0, z4, z5, 1)) + +/* +** dot_lane_2_u32: +** udot z0\.s, z4\.h, z5\.h\[2\] +** ret +*/ +TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint16_t, + z0 = svdot_lane_u32_u16 (z0, z4, z5, 2), + z0 = svdot_lane (z0, z4, z5, 2)) + +/* +** dot_lane_3_u32: +** udot z0\.s, z4\.h, z5\.h\[3\] +** ret +*/ +TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint16_t, + z0 = svdot_lane_u32_u16 (z0, z4, z5, 3), + z0 = svdot_lane (z0, z4, z5, 3)) + +/* +** dot_lane_z8_u32: +** str d8, \[sp, -16\]! +** mov (z[0-7])\.d, z8\.d +** udot z0\.s, z1\.h, \1\.h\[1\] +** ldr d8, \[sp\], 16 +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint16_t, z8, + z0 = svdot_lane_u32_u16 (z0, z1, z8, 1), + z0 = svdot_lane (z0, z1, z8, 1)) + +/* +** dot_lane_z16_u32: +** mov (z[0-7])\.d, z16\.d +** udot z0\.s, z1\.h, \1\.h\[1\] +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint16_t, z16, + z0 = svdot_lane_u32_u16 (z0, z1, z16, 1), + z0 = svdot_lane (z0, z1, z16, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a452c48ca20d085ad453ca766bc5d148ccede350 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** bfdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** bfdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** bfdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a6632a2e7cd84ee35d186475c5ec4ec01f4cb127 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_bf16_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** bfdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** bfdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t, + svdot_lane_za32_bf16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5f27d5f5ed6e4be3c3375f6a1393b80eb330122c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** fdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** fdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svdot_lane_za32_f16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fd9eb564af9348beaf48ab214811067bc46bcd21 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_f16_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** fdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t, + svdot_lane_za32_f16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f7b08617708f3358b5f33129f3032e0b89f6f5ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** sdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** sdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x2_t, svint16_t, + svdot_lane_za32_s16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..240b6332be6f14eaaec1799f30fed1dbfa029ca9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s16_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** sdot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x4_t, svint16_t, + svdot_lane_za32_s16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..623756c31b4f28256d18029c3c6c95bdcace135a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** sdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** sdot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint8x2_t, svint8_t, + svdot_lane_za32_s8_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b775026e381a71cfec7a56b66b2fe9ea2df986fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_s8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** sdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint8x4_t, svint8_t, + svdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b0e9550dae69c28c9261ca1204c9efeb2538c839 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** udot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** udot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t, + svdot_lane_za32_u16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..87a74768c2e1ed17f1e6eaa7a24ff17202dea95b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u16_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** udot za\.s\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t, + svdot_lane_za32_u16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c3374b4eb4d70627ac96744111ac247151d8194f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (0, z0, z4, 0), + svdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w0, z0, z7, 1), + svdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8, z28, z4, 2), + svdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** udot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8, z4, z15, 2), + svdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8, z28, z16, 3), + svdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8, z17, z7, 0), + svdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** udot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint8x2_t, svuint8_t, + svdot_lane_za32_u8_vg1x2 (w8, z22, z4, 1), + svdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0d5181386c2435358eb488c30e07426c95118e12 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_u8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (0, z0, z4, 0), + svdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1), + svdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_2: +** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2), + svdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** dot_lane_w8p7_z0_z4_3: +** udot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3), + svdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2), + svdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** dot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3), + svdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0), + svdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint8x4_t, svuint8_t, + svdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1), + svdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d11466f8cc85598b59e83085ccd3ef632d6d2ef0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x2.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (0, z0, z4, 0), + svdot_lane_za64_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za64_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_0: +** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8, z28, z4, 0), + svdot_lane_za64_vg1x2 (w8, z28, z4, 0)) + +/* +** dot_lane_w8p7_z0_z4_1: +** sdot za\.d\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8 + 7, z0, z4, 1), + svdot_lane_za64_vg1x2 (w8 + 7, z0, z4, 1)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za64_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za64_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** sdot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8, z4, z15, 0), + svdot_lane_za64_vg1x2 (w8, z4, z15, 0)) + +/* +** dot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8, z28, z16, 1), + svdot_lane_za64_vg1x2 (w8, z28, z16, 1)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.d\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za64_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** sdot za\.d\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x2_t, svint16_t, + svdot_lane_za64_s16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za64_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ed48dca58687a0fb12baf7710e70748ece4740be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_s16_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (0, z0, z4, 0), + svdot_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_0: +** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8, z28, z4, 0), + svdot_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** dot_lane_w8p7_z0_z4_1: +** sdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8 + 7, z0, z4, 1), + svdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** sdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8, z4, z15, 0), + svdot_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** dot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8, z28, z16, 1), + svdot_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svint16x4_t, svint16_t, + svdot_lane_za64_s16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1dd89eaaf614451776f1260cda2c78f5d6dcac04 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x2.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (0, z0, z4, 0), + svdot_lane_za64_vg1x2 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w0, z0, z7, 1), + svdot_lane_za64_vg1x2 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_0: +** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8, z28, z4, 0), + svdot_lane_za64_vg1x2 (w8, z28, z4, 0)) + +/* +** dot_lane_w8p7_z0_z4_1: +** udot za\.d\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8 + 7, z0, z4, 1), + svdot_lane_za64_vg1x2 (w8 + 7, z0, z4, 1)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8 + 8, z0, z4, 0), + svdot_lane_za64_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w0 - 1, z0, z4, 1), + svdot_lane_za64_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** udot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8, z4, z15, 0), + svdot_lane_za64_vg1x2 (w8, z4, z15, 0)) + +/* +** dot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8, z28, z16, 1), + svdot_lane_za64_vg1x2 (w8, z28, z16, 1)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** udot za\.d\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8, z17, z7, 0), + svdot_lane_za64_vg1x2 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** udot za\.d\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t, + svdot_lane_za64_u16_vg1x2 (w8, z22, z4, 1), + svdot_lane_za64_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2ce269e5469f971fd20961782627529975a6a88b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za64_u16_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (0, z0, z4, 0), + svdot_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** dot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w0, z0, z7, 1), + svdot_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** dot_lane_w8_z28_z4_0: +** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_0, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8, z28, z4, 0), + svdot_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** dot_lane_w8p7_z0_z4_1: +** udot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_1, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8 + 7, z0, z4, 1), + svdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8 + 8, z0, z4, 0), + svdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w0 - 1, z0, z4, 1), + svdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** dot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** udot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8, z4, z15, 0), + svdot_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** dot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_1, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8, z28, z16, 1), + svdot_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** dot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8, z17, z7, 0), + svdot_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** dot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** udot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t, + svdot_lane_za64_u16_vg1x4 (w8, z22, z4, 1), + svdot_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_s32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..d1f7556bd8d0f92d1622eab42e56c7d9c27b6cf5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_s32.c @@ -0,0 +1,44 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_s32_tied1: +** sdot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint16_t, + z0 = svdot_s32_s16 (z0, z4, z5), + z0 = svdot (z0, z4, z5)) + +/* +** dot_s32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** sdot z0\.s, \1\.h, z1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint16_t, + z0_res = svdot_s32_s16 (z4, z0, z1), + z0_res = svdot (z4, z0, z1)) + +/* +** dot_s32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** sdot z0\.s, z1\.h, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint16_t, + z0_res = svdot_s32_s16 (z4, z1, z0), + z0_res = svdot (z4, z1, z0)) + +/* +** dot_s32_untied: +** movprfx z0, z1 +** sdot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_s32_untied, svint32_t, svint16_t, + z0 = svdot_s32_s16 (z1, z4, z5), + z0 = svdot (z1, z4, z5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_u32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..ce2052b0b5828fd8efc397a60d51e14f2f0b5843 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_u32.c @@ -0,0 +1,44 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_u32_tied1: +** udot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint16_t, + z0 = svdot_u32_u16 (z0, z4, z5), + z0 = svdot (z0, z4, z5)) + +/* +** dot_u32_tied2: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** udot z0\.s, \1\.h, z1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint16_t, + z0_res = svdot_u32_u16 (z4, z0, z1), + z0_res = svdot (z4, z0, z1)) + +/* +** dot_u32_tied3: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** udot z0\.s, z1\.h, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint16_t, + z0_res = svdot_u32_u16 (z4, z1, z0), + z0_res = svdot (z4, z1, z0)) + +/* +** dot_u32_untied: +** movprfx z0, z1 +** udot z0\.s, z4\.h, z5\.h +** ret +*/ +TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint16_t, + z0 = svdot_u32_u16 (z1, z4, z5), + z0 = svdot (z1, z4, z5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..0665f8893c77962baa8c280101b22dd2805d0a11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** bfdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** bfdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** bfdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** bfdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** bfdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** bfdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** bfdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** bfdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svbfloat16x2_t, + svdot_za32_bf16_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** bfdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** bfdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** bfdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** bfdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** bfdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..acdb3cdc8efbad184224b2fb363866fd0b92fb5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_bf16_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** bfdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** bfdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** bfdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** bfdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** bfdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** bfdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** bfdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svbfloat16x4_t, + svdot_za32_bf16_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** bfdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** bfdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** bfdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** bfdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** bfdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t, + svdot_single_za32_bf16_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8779959ffa9a1284bf456d2b74dbac7d92fd4f74 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** fdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** fdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** fdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** fdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** fdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** fdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** fdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** fdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svfloat16x2_t, + svdot_za32_f16_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** fdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** fdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** fdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** fdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** fdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svfloat16x2_t, svfloat16_t, + svdot_single_za32_f16_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..26082302e535a8a68f9fcf0c6d9a326f5892f917 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_f16_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** fdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** fdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** fdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** fdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** fdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** fdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svfloat16x4_t, + svdot_za32_f16_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** fdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** fdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** fdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** fdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** fdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svfloat16x4_t, svfloat16_t, + svdot_single_za32_f16_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9b2285b1013c620e985ac80c8bbfb91b4c981241 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** sdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** sdot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** sdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svint16x2_t, + svdot_za32_s16_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint16x2_t, + svdot_za32_s16_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint16x2_t, + svdot_za32_s16_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** sdot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** sdot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x2_t, svint16_t, + svdot_single_za32_s16_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..78c3f420d0a644609d99876f2d26ad8620dfe45b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s16_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** sdot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** sdot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint16x4_t, + svdot_za32_s16_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint16x4_t, + svdot_za32_s16_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** sdot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** sdot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x4_t, svint16_t, + svdot_single_za32_s16_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3570efcc1928e52c60fd60196a4c99d577c5dc14 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** sdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** sdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** sdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svint8x2_t, + svdot_za32_s8_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint8x2_t, + svdot_za32_s8_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint8x2_t, + svdot_za32_s8_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** sdot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** sdot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x2_t, svint8_t, + svdot_single_za32_s8_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d738910da130ae871dd8b4eedc2d263f12b8ef69 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_s8_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** sdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** sdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint8x4_t, + svdot_za32_s8_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint8x4_t, + svdot_za32_s8_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** sdot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** sdot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x4_t, svint8_t, + svdot_single_za32_s8_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c78fe5b6442886a43cb7e84d259fff6385a7fdae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** udot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** udot za\.s\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** udot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** udot za\.s\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.s\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.s\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.s\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.s\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x2_t, + svdot_za32_u16_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.s\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.s\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** udot za\.s\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** udot za\.s\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.s\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svdot_single_za32_u16_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..276a7d3af781299dc69a5aea6a77c51e1c917685 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u16_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** udot za\.s\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** udot za\.s\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** udot za\.s\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.s\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.s\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.s\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.s\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x4_t, + svdot_za32_u16_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.s\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.s\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** udot za\.s\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** udot za\.s\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.s\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svdot_single_za32_u16_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7f01cc1589adea7f386373e1e19b5544c848f521 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (0, z0, z0), + svdot_za32_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w0, z0, z0), + svdot_za32_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z0, z4), + svdot_za32_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** udot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z4, z18), + svdot_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z0, z23), + svdot_za32_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z23, z0), + svdot_za32_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** udot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z18, z28), + svdot_za32_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** udot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8, z28, z4), + svdot_za32_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** udot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8 + 1, z4, z0), + svdot_za32_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8 + 2, z4, z0), + svdot_za32_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w11 + 4, z4, z0), + svdot_za32_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8 + 7, z4, z0), + svdot_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8 + 8, z4, z4), + svdot_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x2_t, + svdot_za32_u8_vg1x2 (w8 - 1, z4, z0), + svdot_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (0, z1, z0), + svdot_za32_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w0, z1, z0), + svdot_za32_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8, z1, z0), + svdot_za32_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8 + 1, z1, z0), + svdot_za32_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** udot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8 + 2, z20, z0), + svdot_za32_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** udot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w11 + 4, z27, z0), + svdot_za32_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8 + 7, z1, z0), + svdot_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8 + 8, z1, z0), + svdot_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w0 - 1, z1, z0), + svdot_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8, z0, z15), + svdot_za32_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x2_t, svuint8_t, + svdot_single_za32_u8_vg1x2 (w8, z20, z16), + svdot_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6e56db4024469405fb8e35cfdf0615dfc99d915c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_u8_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (0, z0, z0), + svdot_za32_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w0, z0, z0), + svdot_za32_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z0, z4), + svdot_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z0, z18), + svdot_za32_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z18, z0), + svdot_za32_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z0, z23), + svdot_za32_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z23, z0), + svdot_za32_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** udot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z4, z28), + svdot_za32_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** udot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8, z28, z0), + svdot_za32_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** udot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8 + 1, z4, z0), + svdot_za32_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8 + 2, z4, z0), + svdot_za32_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w11 + 4, z4, z0), + svdot_za32_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8 + 7, z4, z0), + svdot_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8 + 8, z4, z4), + svdot_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x4_t, + svdot_za32_u8_vg1x4 (w8 - 1, z4, z0), + svdot_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (0, z1, z0), + svdot_za32_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w0, z1, z0), + svdot_za32_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8, z1, z0), + svdot_za32_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8 + 1, z1, z0), + svdot_za32_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** udot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8 + 4, z20, z0), + svdot_za32_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** udot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8 + 6, z27, z0), + svdot_za32_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8 + 7, z1, z0), + svdot_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8 + 8, z1, z0), + svdot_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w0 - 1, z1, z0), + svdot_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8, z0, z15), + svdot_za32_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x4_t, svuint8_t, + svdot_single_za32_u8_vg1x4 (w8, z20, z16), + svdot_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2bbf6323112ae29b4767bfc9329e5c5042ca97c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x2.c @@ -0,0 +1,245 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (0, z0, z0), + svdot_za64_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w0, z0, z0), + svdot_za64_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z0, z4), + svdot_za64_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** sdot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z4, z18), + svdot_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z0, z23), + svdot_za64_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z23, z0), + svdot_za64_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** sdot za\.d\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z18, z28), + svdot_za64_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** sdot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svint16x2_t, + svdot_za64_s16_vg1x2 (w8, z28, z4), + svdot_za64_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.d\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w8 + 1, z4, z0), + svdot_za64_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.d\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w8 + 2, z4, z0), + svdot_za64_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.d\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w11 + 4, z4, z0), + svdot_za64_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.d\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w8 + 7, z4, z0), + svdot_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint16x2_t, + svdot_za64_s16_vg1x2 (w8 + 8, z4, z4), + svdot_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint16x2_t, + svdot_za64_s16_vg1x2 (w8 - 1, z4, z0), + svdot_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (0, z1, z0), + svdot_za64_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w0, z1, z0), + svdot_za64_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.d\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8, z1, z0), + svdot_za64_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.d\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8 + 1, z1, z0), + svdot_za64_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** sdot za\.d\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8 + 2, z20, z0), + svdot_za64_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** sdot za\.d\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w11 + 4, z27, z0), + svdot_za64_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.d\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8 + 7, z1, z0), + svdot_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8 + 8, z1, z0), + svdot_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w0 - 1, z1, z0), + svdot_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8, z0, z15), + svdot_za64_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.d\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x2_t, svint16_t, + svdot_single_za64_s16_vg1x2 (w8, z20, z16), + svdot_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5c962d71041314b2c1f8f7fba3e299ca4ebe70a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_s16_vg1x4.c @@ -0,0 +1,256 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (0, z0, z0), + svdot_za64_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w0, z0, z0), + svdot_za64_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z0, z4), + svdot_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z0, z18), + svdot_za64_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** sdot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z18, z0), + svdot_za64_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z0, z23), + svdot_za64_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** sdot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z23, z0), + svdot_za64_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** sdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z4, z28), + svdot_za64_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** sdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8, z28, z0), + svdot_za64_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** sdot za\.d\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8 + 1, z4, z0), + svdot_za64_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** sdot za\.d\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8 + 2, z4, z0), + svdot_za64_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** sdot za\.d\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w11 + 4, z4, z0), + svdot_za64_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** sdot za\.d\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8 + 7, z4, z0), + svdot_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svint16x4_t, + svdot_za64_s16_vg1x4 (w8 + 8, z4, z4), + svdot_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sdot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint16x4_t, + svdot_za64_s16_vg1x4 (w8 - 1, z4, z0), + svdot_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (0, z1, z0), + svdot_za64_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w0, z1, z0), + svdot_za64_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** sdot za\.d\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8, z1, z0), + svdot_za64_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** sdot za\.d\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8 + 1, z1, z0), + svdot_za64_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** sdot za\.d\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8 + 4, z20, z0), + svdot_za64_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** sdot za\.d\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8 + 6, z27, z0), + svdot_za64_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** sdot za\.d\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8 + 7, z1, z0), + svdot_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8 + 8, z1, z0), + svdot_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sdot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w0 - 1, z1, z0), + svdot_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sdot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8, z0, z15), + svdot_za64_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sdot za\.d\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint16x4_t, svint16_t, + svdot_single_za64_s16_vg1x4 (w8, z20, z16), + svdot_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..503104c023fa135bd1f72241976a8fad3e118a0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x2.c @@ -0,0 +1,245 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (0, z0, z0), + svdot_za64_vg1x2 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w0, z0, z0), + svdot_za64_vg1x2 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z0, z4), + svdot_za64_vg1x2 (w8, z0, z4)) + +/* +** dot_w8_z4_z18: +** udot za\.d\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z4, z18), + svdot_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z0, z23), + svdot_za64_vg1x2 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z23, z0), + svdot_za64_vg1x2 (w8, z23, z0)) + +/* +** dot_w8_z18_z28: +** udot za\.d\[w8, 0, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z18, z28), + svdot_za64_vg1x2 (w8, z18, z28)) + +/* +** dot_w8_z28_z4: +** udot za\.d\[w8, 0, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8, z28, z4), + svdot_za64_vg1x2 (w8, z28, z4)) + +/* +** dot_w8p1_z4_z0: +** udot za\.d\[w8, 1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8 + 1, z4, z0), + svdot_za64_vg1x2 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.d\[w8, 2, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8 + 2, z4, z0), + svdot_za64_vg1x2 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.d\[w11, 4, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w11 + 4, z4, z0), + svdot_za64_vg1x2 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.d\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8 + 7, z4, z0), + svdot_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8 + 8, z4, z4), + svdot_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.d\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x2_t, + svdot_za64_u16_vg1x2 (w8 - 1, z4, z0), + svdot_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (0, z1, z0), + svdot_za64_vg1x2 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w0, z1, z0), + svdot_za64_vg1x2 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.d\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8, z1, z0), + svdot_za64_vg1x2 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.d\[w8, 1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8 + 1, z1, z0), + svdot_za64_vg1x2 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p2_z20_z0: +** udot za\.d\[w8, 2, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8 + 2, z20, z0), + svdot_za64_vg1x2 (w8 + 2, z20, z0)) + +/* +** dot_single_w11p4_z27_z0: +** udot za\.d\[w11, 4, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w11 + 4, z27, z0), + svdot_za64_vg1x2 (w11 + 4, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.d\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8 + 7, z1, z0), + svdot_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8 + 8, z1, z0), + svdot_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.d\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w0 - 1, z1, z0), + svdot_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.d\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8, z0, z15), + svdot_za64_vg1x2 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.d\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svdot_single_za64_u16_vg1x2 (w8, z20, z16), + svdot_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bfde05cd9c55ae12d03610b6bea4416ffb1f73da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za64_u16_vg1x4.c @@ -0,0 +1,256 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (0, z0, z0), + svdot_za64_vg1x4 (0, z0, z0)) + +/* +** dot_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w0, z0, z0), + svdot_za64_vg1x4 (w0, z0, z0)) + +/* +** dot_w8_z0_z4: +** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z0, z4), + svdot_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z0, z18), + svdot_za64_vg1x4 (w8, z0, z18)) + +/* +** dot_w8_z18_z0: +** ... +** udot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z18, z0), + svdot_za64_vg1x4 (w8, z18, z0)) + +/* +** dot_w8_z0_z23: +** ... +** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z0, z23), + svdot_za64_vg1x4 (w8, z0, z23)) + +/* +** dot_w8_z23_z0: +** ... +** udot za\.d\[w8, 0, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z23, z0), + svdot_za64_vg1x4 (w8, z23, z0)) + +/* +** dot_w8_z4_z28: +** udot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z4, z28), + svdot_za64_vg1x4 (w8, z4, z28)) + +/* +** dot_w8_z28_z0: +** udot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8, z28, z0), + svdot_za64_vg1x4 (w8, z28, z0)) + +/* +** dot_w8p1_z4_z0: +** udot za\.d\[w8, 1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8 + 1, z4, z0), + svdot_za64_vg1x4 (w8 + 1, z4, z0)) + +/* +** dot_w8p2_z4_z0: +** udot za\.d\[w8, 2, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8 + 2, z4, z0), + svdot_za64_vg1x4 (w8 + 2, z4, z0)) + +/* +** dot_w11p4_z4_z0: +** udot za\.d\[w11, 4, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w11 + 4, z4, z0), + svdot_za64_vg1x4 (w11 + 4, z4, z0)) + +/* +** dot_w8p7_z4_z0: +** udot za\.d\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8 + 7, z4, z0), + svdot_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8 + 8, z4, z4), + svdot_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** udot za\.d\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint16x4_t, + svdot_za64_u16_vg1x4 (w8 - 1, z4, z0), + svdot_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (0, z1, z0), + svdot_za64_vg1x4 (0, z1, z0)) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w0, z1, z0), + svdot_za64_vg1x4 (w0, z1, z0)) + +/* +** dot_single_w8_z1_z0: +** udot za\.d\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8, z1, z0), + svdot_za64_vg1x4 (w8, z1, z0)) + +/* +** dot_single_w8p1_z1_z0: +** udot za\.d\[w8, 1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8 + 1, z1, z0), + svdot_za64_vg1x4 (w8 + 1, z1, z0)) + +/* +** dot_single_w8p4_z20_z0: +** udot za\.d\[w8, 4, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8 + 4, z20, z0), + svdot_za64_vg1x4 (w8 + 4, z20, z0)) + +/* +** dot_single_w8p6_z27_z0: +** udot za\.d\[w8, 6, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8 + 6, z27, z0), + svdot_za64_vg1x4 (w8 + 6, z27, z0)) + +/* +** dot_single_w8p7_z1_z0: +** udot za\.d\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8 + 7, z1, z0), + svdot_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8 + 8, z1, z0), + svdot_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** udot za\.d\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w0 - 1, z1, z0), + svdot_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** udot za\.d\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8, z0, z15), + svdot_za64_vg1x4 (w8, z0, z15)) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** udot za\.d\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svdot_single_za64_u16_vg1x4 (w8, z20, z16), + svdot_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..caed35db0c3b1e935246b7044fb206f01304404e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_bf16_base: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_bf16_index: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 + svcnth ()), + z0 = svld1_x2 (pn8, x0 + svcnth ())) + +/* +** ld1_bf16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 2), + z0 = svld1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ld1_bf16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_14, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 14), + z0 = svld1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_16, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 16), + z0 = svld1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 - svcnth ()), + z0 = svld1_x2 (pn8, x0 - svcnth ())) + +/* +** ld1_bf16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 2), + z0 = svld1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ld1_bf16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m16, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 16), + z0 = svld1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ld1_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m18, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 18), + z0 = svld1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ld1_bf16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x2_t, bfloat16_t, + z17 = svld1_bf16_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_bf16_z22: +** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x2_t, bfloat16_t, + z22 = svld1_bf16_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_bf16_z28: +** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x2_t, bfloat16_t, + z28 = svld1_bf16_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_bf16_pn15: +** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x2_t, bfloat16_t, + z0 = svld1_bf16_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_bf16_0: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_bf16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_bf16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_bf16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_bf16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, + z0 = svld1_vnum_bf16_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e6a0b569704cc20e86e3be9a8c378dc6dc168476 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_bf16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_bf16_base: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_bf16_index: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth ()), + z0 = svld1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 2), + z0 = svld1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_3, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 3), + z0 = svld1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ld1_bf16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_4, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 4), + z0 = svld1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ld1_bf16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_28, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 28), + z0 = svld1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ld1_bf16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_32, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 32), + z0 = svld1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth ()), + z0 = svld1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 2), + z0 = svld1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_bf16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m3, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 3), + z0 = svld1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ld1_bf16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_bf16_m4, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 4), + z0 = svld1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ld1_bf16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m32, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 32), + z0 = svld1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ld1_bf16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_m36, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 36), + z0 = svld1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ld1_bf16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x4_t, bfloat16_t, + z17 = svld1_bf16_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_bf16_z22: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x4_t, bfloat16_t, + z22 = svld1_bf16_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_bf16_z28: +** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x4_t, bfloat16_t, + z28 = svld1_bf16_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_bf16_pn15: +** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x4_t, bfloat16_t, + z0 = svld1_bf16_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_bf16_0: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_bf16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_bf16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_bf16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_bf16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_bf16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_bf16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_bf16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, + z0 = svld1_vnum_bf16_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..204ed557fc3580c8e3a5350075c6882ea467617d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f16_base: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_base, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_f16_index: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_index, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_1, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 + svcnth ()), + z0 = svld1_x2 (pn8, x0 + svcnth ())) + +/* +** ld1_f16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_2, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 2), + z0 = svld1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ld1_f16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_14, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 14), + z0 = svld1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_16, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 16), + z0 = svld1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 - svcnth ()), + z0 = svld1_x2 (pn8, x0 - svcnth ())) + +/* +** ld1_f16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 2), + z0 = svld1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ld1_f16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m16, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 16), + z0 = svld1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ld1_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m18, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 18), + z0 = svld1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ld1_f16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x2_t, float16_t, + z17 = svld1_f16_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_f16_z22: +** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x2_t, float16_t, + z22 = svld1_f16_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_f16_z28: +** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x2_t, float16_t, + z28 = svld1_f16_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_f16_pn15: +** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x2_t, float16_t, + z0 = svld1_f16_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_f16_0: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_f16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_f16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_14, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_16, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_f16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_f16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m16, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m18, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x2_t, float16_t, + z0 = svld1_vnum_f16_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..203f7c30616507792ce6eaf3f57162567d9f7377 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f16_base: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_base, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_f16_index: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_index, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_1, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth ()), + z0 = svld1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_2, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 2), + z0 = svld1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_3, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 3), + z0 = svld1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ld1_f16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_4, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 4), + z0 = svld1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ld1_f16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_28, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 28), + z0 = svld1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ld1_f16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_32, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 32), + z0 = svld1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth ()), + z0 = svld1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 2), + z0 = svld1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m3, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 3), + z0 = svld1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ld1_f16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_f16_m4, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 4), + z0 = svld1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ld1_f16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m32, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 32), + z0 = svld1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ld1_f16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_m36, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 36), + z0 = svld1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ld1_f16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x4_t, float16_t, + z17 = svld1_f16_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_f16_z22: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x4_t, float16_t, + z22 = svld1_f16_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_f16_z28: +** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x4_t, float16_t, + z28 = svld1_f16_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_f16_pn15: +** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x4_t, float16_t, + z0 = svld1_f16_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_f16_0: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_3, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_f16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_4, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_f16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_28, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_f16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_32, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m3, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_f16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m4, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_f16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m32, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_f16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_m36, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x4_t, float16_t, + z0 = svld1_vnum_f16_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f98f38532afc32255f531829f193aca1586aa54e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f32_base: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_base, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_f32_index: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_index, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_1, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 + svcntw ()), + z0 = svld1_x2 (pn8, x0 + svcntw ())) + +/* +** ld1_f32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_2, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 2), + z0 = svld1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ld1_f32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_14, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 14), + z0 = svld1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_16, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 16), + z0 = svld1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 - svcntw ()), + z0 = svld1_x2 (pn8, x0 - svcntw ())) + +/* +** ld1_f32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 2), + z0 = svld1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ld1_f32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m16, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 16), + z0 = svld1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ld1_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m18, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 18), + z0 = svld1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ld1_f32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x2_t, float32_t, + z17 = svld1_f32_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_f32_z22: +** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x2_t, float32_t, + z22 = svld1_f32_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_f32_z28: +** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x2_t, float32_t, + z28 = svld1_f32_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_f32_pn15: +** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x2_t, float32_t, + z0 = svld1_f32_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_f32_0: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_f32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_f32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_14, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_16, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_f32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_f32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m16, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m18, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x2_t, float32_t, + z0 = svld1_vnum_f32_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e6dcb9f354e7b70f57d24d005c56be0bbfda3efe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f32_base: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_base, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_f32_index: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_index, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_1, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw ()), + z0 = svld1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_2, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 2), + z0 = svld1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_3, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 3), + z0 = svld1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ld1_f32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_4, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 4), + z0 = svld1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ld1_f32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_28, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 28), + z0 = svld1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ld1_f32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_32, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 32), + z0 = svld1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw ()), + z0 = svld1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 2), + z0 = svld1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m3, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 3), + z0 = svld1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ld1_f32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_f32_m4, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 4), + z0 = svld1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ld1_f32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m32, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 32), + z0 = svld1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ld1_f32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_m36, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 36), + z0 = svld1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ld1_f32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x4_t, float32_t, + z17 = svld1_f32_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_f32_z22: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x4_t, float32_t, + z22 = svld1_f32_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_f32_z28: +** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x4_t, float32_t, + z28 = svld1_f32_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_f32_pn15: +** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x4_t, float32_t, + z0 = svld1_f32_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_f32_0: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_3, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_f32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_4, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_f32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_28, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_f32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_32, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m3, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_f32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m4, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_f32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m32, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_f32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_m36, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x4_t, float32_t, + z0 = svld1_vnum_f32_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..80e6bed6915abf5b0ceb8e1336e655c7a65ae17e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f64_base: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_base, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_f64_index: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_index, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_1, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 + svcntd ()), + z0 = svld1_x2 (pn8, x0 + svcntd ())) + +/* +** ld1_f64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_2, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 2), + z0 = svld1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ld1_f64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_14, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 14), + z0 = svld1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_16, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 16), + z0 = svld1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 - svcntd ()), + z0 = svld1_x2 (pn8, x0 - svcntd ())) + +/* +** ld1_f64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 2), + z0 = svld1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ld1_f64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m16, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 16), + z0 = svld1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ld1_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m18, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 18), + z0 = svld1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ld1_f64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x2_t, float64_t, + z17 = svld1_f64_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_f64_z22: +** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x2_t, float64_t, + z22 = svld1_f64_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_f64_z28: +** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x2_t, float64_t, + z28 = svld1_f64_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_f64_pn15: +** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x2_t, float64_t, + z0 = svld1_f64_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_f64_0: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_f64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_f64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_14, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_16, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_f64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_f64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m16, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m18, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x2_t, float64_t, + z0 = svld1_vnum_f64_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5153d6480d7574a50c5147d82859c5199c64ceee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_f64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_f64_base: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_base, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_f64_index: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_index, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_1, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd ()), + z0 = svld1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_2, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 2), + z0 = svld1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_3, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 3), + z0 = svld1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ld1_f64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_4, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 4), + z0 = svld1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ld1_f64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_28, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 28), + z0 = svld1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ld1_f64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_32, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 32), + z0 = svld1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd ()), + z0 = svld1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 2), + z0 = svld1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_f64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m3, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 3), + z0 = svld1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ld1_f64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_f64_m4, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 4), + z0 = svld1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ld1_f64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m32, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 32), + z0 = svld1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ld1_f64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_m36, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 36), + z0 = svld1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ld1_f64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x4_t, float64_t, + z17 = svld1_f64_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_f64_z22: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x4_t, float64_t, + z22 = svld1_f64_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_f64_z28: +** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x4_t, float64_t, + z28 = svld1_f64_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_f64_pn15: +** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x4_t, float64_t, + z0 = svld1_f64_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_f64_0: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_3, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_f64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_4, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_f64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_28, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_f64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_32, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_f64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m3, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_f64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m4, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_f64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m32, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_f64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_m36, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x4_t, float64_t, + z0 = svld1_vnum_f64_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5121bf38fd5f4716ae3d90b9ec1b7397d89cc757 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s16_base: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_base, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_s16_index: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_index, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_1, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 + svcnth ()), + z0 = svld1_x2 (pn8, x0 + svcnth ())) + +/* +** ld1_s16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_2, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 2), + z0 = svld1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ld1_s16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_14, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 14), + z0 = svld1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_16, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 16), + z0 = svld1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m1, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 - svcnth ()), + z0 = svld1_x2 (pn8, x0 - svcnth ())) + +/* +** ld1_s16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m2, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 2), + z0 = svld1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ld1_s16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m16, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 16), + z0 = svld1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ld1_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m18, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 18), + z0 = svld1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ld1_s16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z17, svint16x2_t, int16_t, + z17 = svld1_s16_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_s16_z22: +** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z22, svint16x2_t, int16_t, + z22 = svld1_s16_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_s16_z28: +** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z28, svint16x2_t, int16_t, + z28 = svld1_s16_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn0, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn7, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_s16_pn15: +** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn15, svint16x2_t, int16_t, + z0 = svld1_s16_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_s16_0: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_s16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_s16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_14, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_16, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_s16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_s16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m16, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m18, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x2_t, int16_t, + z0 = svld1_vnum_s16_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..16e1fce71179b66ec32ad829ea4fc45cf1aea964 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s16_base: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_base, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_s16_index: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_index, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_1, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth ()), + z0 = svld1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_2, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 2), + z0 = svld1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_3, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 3), + z0 = svld1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ld1_s16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_4, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 4), + z0 = svld1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ld1_s16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_28, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 28), + z0 = svld1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ld1_s16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_32, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 32), + z0 = svld1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m1, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth ()), + z0 = svld1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m2, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 2), + z0 = svld1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m3, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 3), + z0 = svld1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ld1_s16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_s16_m4, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 4), + z0 = svld1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ld1_s16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m32, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 32), + z0 = svld1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ld1_s16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_m36, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 36), + z0 = svld1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ld1_s16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z17, svint16x4_t, int16_t, + z17 = svld1_s16_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_s16_z22: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z22, svint16x4_t, int16_t, + z22 = svld1_s16_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_s16_z28: +** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_z28, svint16x4_t, int16_t, + z28 = svld1_s16_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn0, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn7, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_s16_pn15: +** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s16_pn15, svint16x4_t, int16_t, + z0 = svld1_s16_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_s16_0: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_3, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_s16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_4, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_s16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_28, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_s16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_32, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m3, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_s16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m4, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_s16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m32, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_s16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_m36, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x4_t, int16_t, + z0 = svld1_vnum_s16_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..884a0ca9fff5c98aed36c4365bcc9805c0065220 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s32_base: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_base, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_s32_index: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_index, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_1, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 + svcntw ()), + z0 = svld1_x2 (pn8, x0 + svcntw ())) + +/* +** ld1_s32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_2, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 2), + z0 = svld1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ld1_s32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_14, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 14), + z0 = svld1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_16, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 16), + z0 = svld1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m1, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 - svcntw ()), + z0 = svld1_x2 (pn8, x0 - svcntw ())) + +/* +** ld1_s32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m2, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 2), + z0 = svld1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ld1_s32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m16, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 16), + z0 = svld1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ld1_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m18, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 18), + z0 = svld1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ld1_s32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z17, svint32x2_t, int32_t, + z17 = svld1_s32_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_s32_z22: +** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z22, svint32x2_t, int32_t, + z22 = svld1_s32_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_s32_z28: +** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z28, svint32x2_t, int32_t, + z28 = svld1_s32_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn0, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn7, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_s32_pn15: +** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn15, svint32x2_t, int32_t, + z0 = svld1_s32_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_s32_0: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_s32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_s32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_14, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_16, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_s32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_s32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m16, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m18, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x2_t, int32_t, + z0 = svld1_vnum_s32_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..372edc38ed77096d2c9364da53a29bd3ed44995c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s32_base: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_base, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_s32_index: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_index, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_1, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw ()), + z0 = svld1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_2, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 2), + z0 = svld1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_3, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 3), + z0 = svld1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ld1_s32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_4, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 4), + z0 = svld1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ld1_s32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_28, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 28), + z0 = svld1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ld1_s32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_32, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 32), + z0 = svld1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m1, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw ()), + z0 = svld1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m2, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 2), + z0 = svld1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m3, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 3), + z0 = svld1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ld1_s32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_s32_m4, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 4), + z0 = svld1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ld1_s32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m32, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 32), + z0 = svld1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ld1_s32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_m36, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 36), + z0 = svld1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ld1_s32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z17, svint32x4_t, int32_t, + z17 = svld1_s32_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_s32_z22: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z22, svint32x4_t, int32_t, + z22 = svld1_s32_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_s32_z28: +** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_z28, svint32x4_t, int32_t, + z28 = svld1_s32_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn0, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn7, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_s32_pn15: +** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s32_pn15, svint32x4_t, int32_t, + z0 = svld1_s32_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_s32_0: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_3, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_s32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_4, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_s32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_28, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_s32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_32, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m3, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_s32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m4, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_s32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m32, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_s32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_m36, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x4_t, int32_t, + z0 = svld1_vnum_s32_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..70fe5b3b22fcaeb073bc8271f818aaae0b566a3c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s64_base: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_base, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_s64_index: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_index, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_1, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 + svcntd ()), + z0 = svld1_x2 (pn8, x0 + svcntd ())) + +/* +** ld1_s64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_2, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 2), + z0 = svld1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ld1_s64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_14, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 14), + z0 = svld1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_16, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 16), + z0 = svld1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m1, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 - svcntd ()), + z0 = svld1_x2 (pn8, x0 - svcntd ())) + +/* +** ld1_s64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m2, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 2), + z0 = svld1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ld1_s64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m16, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 16), + z0 = svld1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ld1_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m18, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 18), + z0 = svld1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ld1_s64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z17, svint64x2_t, int64_t, + z17 = svld1_s64_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_s64_z22: +** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z22, svint64x2_t, int64_t, + z22 = svld1_s64_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_s64_z28: +** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z28, svint64x2_t, int64_t, + z28 = svld1_s64_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn0, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn7, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_s64_pn15: +** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn15, svint64x2_t, int64_t, + z0 = svld1_s64_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_s64_0: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_s64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_s64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_14, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_16, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_s64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_s64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m16, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m18, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x2_t, int64_t, + z0 = svld1_vnum_s64_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4f6f8b23e8e544cec0c045e6f1ab32534246d002 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s64_base: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_base, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_s64_index: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_index, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_1, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd ()), + z0 = svld1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_2, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 2), + z0 = svld1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_3, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 3), + z0 = svld1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ld1_s64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_4, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 4), + z0 = svld1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ld1_s64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_28, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 28), + z0 = svld1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ld1_s64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_32, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 32), + z0 = svld1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m1, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd ()), + z0 = svld1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m2, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 2), + z0 = svld1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m3, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 3), + z0 = svld1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ld1_s64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_s64_m4, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 4), + z0 = svld1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ld1_s64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m32, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 32), + z0 = svld1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ld1_s64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_m36, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 36), + z0 = svld1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ld1_s64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z17, svint64x4_t, int64_t, + z17 = svld1_s64_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_s64_z22: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z22, svint64x4_t, int64_t, + z22 = svld1_s64_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_s64_z28: +** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_z28, svint64x4_t, int64_t, + z28 = svld1_s64_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn0, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn7, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_s64_pn15: +** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s64_pn15, svint64x4_t, int64_t, + z0 = svld1_s64_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_s64_0: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_3, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_s64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_4, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_s64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_28, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_s64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_32, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m3, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_s64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m4, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_s64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m32, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_s64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_m36, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x4_t, int64_t, + z0 = svld1_vnum_s64_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b18daa4bde2261f92745888b5886ee3b7dcfe309 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s8_base: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_base, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_s8_index: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_index, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_1, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 + svcntb ()), + z0 = svld1_x2 (pn8, x0 + svcntb ())) + +/* +** ld1_s8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_2, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 2), + z0 = svld1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ld1_s8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_14, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 14), + z0 = svld1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_16, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 16), + z0 = svld1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m1, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 - svcntb ()), + z0 = svld1_x2 (pn8, x0 - svcntb ())) + +/* +** ld1_s8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m2, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 2), + z0 = svld1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ld1_s8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m16, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 16), + z0 = svld1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ld1_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m18, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 18), + z0 = svld1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ld1_s8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z17, svint8x2_t, int8_t, + z17 = svld1_s8_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_s8_z22: +** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z22, svint8x2_t, int8_t, + z22 = svld1_s8_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_s8_z28: +** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z28, svint8x2_t, int8_t, + z28 = svld1_s8_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn0, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn7, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_s8_pn15: +** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn15, svint8x2_t, int8_t, + z0 = svld1_s8_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_s8_0: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_s8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_s8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_14, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_16, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_s8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_s8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m16, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m18, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x2_t, int8_t, + z0 = svld1_vnum_s8_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..cbcc27ad3c80653e7fa78e40cd2c9509f65c5758 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_s8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_s8_base: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_base, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_s8_index: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_index, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_1, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb ()), + z0 = svld1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_2, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 2), + z0 = svld1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_3, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 3), + z0 = svld1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ld1_s8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_4, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 4), + z0 = svld1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ld1_s8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_28, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 28), + z0 = svld1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ld1_s8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_32, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 32), + z0 = svld1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m1, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb ()), + z0 = svld1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m2, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 2), + z0 = svld1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_s8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m3, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 3), + z0 = svld1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ld1_s8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_s8_m4, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 4), + z0 = svld1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ld1_s8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m32, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 32), + z0 = svld1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ld1_s8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_m36, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 36), + z0 = svld1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ld1_s8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z17, svint8x4_t, int8_t, + z17 = svld1_s8_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_s8_z22: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z22, svint8x4_t, int8_t, + z22 = svld1_s8_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_s8_z28: +** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_z28, svint8x4_t, int8_t, + z28 = svld1_s8_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn0, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn7, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_s8_pn15: +** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_s8_pn15, svint8x4_t, int8_t, + z0 = svld1_s8_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_s8_0: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_3, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_s8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_4, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_s8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_28, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_s8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_32, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_s8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m3, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_s8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m4, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_s8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m32, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_s8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_m36, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x4_t, int8_t, + z0 = svld1_vnum_s8_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8c15a76b44a616c9d36633874c8f7cdb91378c1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u16_base: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_base, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_u16_index: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_index, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_1, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 + svcnth ()), + z0 = svld1_x2 (pn8, x0 + svcnth ())) + +/* +** ld1_u16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_2, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 2), + z0 = svld1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ld1_u16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_14, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 14), + z0 = svld1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_16, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 16), + z0 = svld1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m1, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 - svcnth ()), + z0 = svld1_x2 (pn8, x0 - svcnth ())) + +/* +** ld1_u16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m2, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 2), + z0 = svld1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ld1_u16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m16, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 16), + z0 = svld1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ld1_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m18, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 18), + z0 = svld1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ld1_u16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z17, svuint16x2_t, uint16_t, + z17 = svld1_u16_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_u16_z22: +** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z22, svuint16x2_t, uint16_t, + z22 = svld1_u16_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_u16_z28: +** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z28, svuint16x2_t, uint16_t, + z28 = svld1_u16_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_u16_pn15: +** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x2_t, uint16_t, + z0 = svld1_u16_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_u16_0: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_1: +** incb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_u16_2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_u16_14: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_14, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_16: +** incb x0, all, mul #16 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_16, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_m1: +** decb x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_u16_m2: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_u16_m16: +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m16, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m18, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x2_t, uint16_t, + z0 = svld1_vnum_u16_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..41ed814c9012caaa4b90fc2e9188610916404d5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u16_base: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_base, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_u16_index: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_index, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_1, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth ()), + z0 = svld1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_2, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 2), + z0 = svld1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_3, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 3), + z0 = svld1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ld1_u16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_4, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 4), + z0 = svld1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ld1_u16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_28, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 28), + z0 = svld1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ld1_u16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_32, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 32), + z0 = svld1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m1, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth ()), + z0 = svld1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m2, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 2), + z0 = svld1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m3, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 3), + z0 = svld1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ld1_u16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_u16_m4, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 4), + z0 = svld1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ld1_u16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m32, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 32), + z0 = svld1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ld1_u16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_m36, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 36), + z0 = svld1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ld1_u16_z17: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z17, svuint16x4_t, uint16_t, + z17 = svld1_u16_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_u16_z22: +** ld1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z22, svuint16x4_t, uint16_t, + z22 = svld1_u16_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_u16_z28: +** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_z28, svuint16x4_t, uint16_t, + z28 = svld1_u16_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_u16_pn15: +** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x4_t, uint16_t, + z0 = svld1_u16_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_u16_0: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_1: +** incb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_2: +** incb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_3: +** incb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_3, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_u16_4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_4, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_u16_28: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_28, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_u16_32: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_32, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_m1: +** decb x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_m2: +** decb x0, all, mul #2 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u16_m3: +** decb x0, all, mul #3 +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m3, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_u16_m4: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m4, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_u16_m32: +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m32, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_u16_m36: +** [^{]* +** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_m36, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x4_t, uint16_t, + z0 = svld1_vnum_u16_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8262d8c86ae21c7519321f6e813ed1286d135dfc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u32_base: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_base, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_u32_index: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_index, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_1, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 + svcntw ()), + z0 = svld1_x2 (pn8, x0 + svcntw ())) + +/* +** ld1_u32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_2, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 2), + z0 = svld1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ld1_u32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_14, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 14), + z0 = svld1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_16, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 16), + z0 = svld1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m1, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 - svcntw ()), + z0 = svld1_x2 (pn8, x0 - svcntw ())) + +/* +** ld1_u32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m2, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 2), + z0 = svld1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ld1_u32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m16, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 16), + z0 = svld1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ld1_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m18, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 18), + z0 = svld1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ld1_u32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z17, svuint32x2_t, uint32_t, + z17 = svld1_u32_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_u32_z22: +** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z22, svuint32x2_t, uint32_t, + z22 = svld1_u32_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_u32_z28: +** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z28, svuint32x2_t, uint32_t, + z28 = svld1_u32_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_u32_pn15: +** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x2_t, uint32_t, + z0 = svld1_u32_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_u32_0: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_1: +** incb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_u32_2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_u32_14: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_14, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_16: +** incb x0, all, mul #16 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_16, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_m1: +** decb x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_u32_m2: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_u32_m16: +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m16, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m18, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x2_t, uint32_t, + z0 = svld1_vnum_u32_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0c2a9fa4cca6bf4b54e86806f110285b65020daa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u32_base: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_base, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_u32_index: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_index, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_1, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw ()), + z0 = svld1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_2, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 2), + z0 = svld1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_3, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 3), + z0 = svld1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ld1_u32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_4, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 4), + z0 = svld1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ld1_u32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_28, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 28), + z0 = svld1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ld1_u32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_32, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 32), + z0 = svld1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m1, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw ()), + z0 = svld1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m2, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 2), + z0 = svld1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m3, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 3), + z0 = svld1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ld1_u32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_u32_m4, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 4), + z0 = svld1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ld1_u32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m32, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 32), + z0 = svld1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ld1_u32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_m36, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 36), + z0 = svld1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ld1_u32_z17: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z17, svuint32x4_t, uint32_t, + z17 = svld1_u32_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_u32_z22: +** ld1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z22, svuint32x4_t, uint32_t, + z22 = svld1_u32_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_u32_z28: +** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_z28, svuint32x4_t, uint32_t, + z28 = svld1_u32_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_u32_pn15: +** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x4_t, uint32_t, + z0 = svld1_u32_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_u32_0: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_1: +** incb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_2: +** incb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_3: +** incb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_3, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_u32_4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_4, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_u32_28: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_28, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_u32_32: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_32, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_m1: +** decb x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_m2: +** decb x0, all, mul #2 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u32_m3: +** decb x0, all, mul #3 +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m3, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_u32_m4: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m4, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_u32_m32: +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m32, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_u32_m36: +** [^{]* +** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_m36, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x4_t, uint32_t, + z0 = svld1_vnum_u32_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7cd45ef1b659b2bd7cf236b9d8c341418d1f4f3c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u64_base: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_base, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_u64_index: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_index, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_1, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 + svcntd ()), + z0 = svld1_x2 (pn8, x0 + svcntd ())) + +/* +** ld1_u64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_2, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 2), + z0 = svld1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ld1_u64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_14, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 14), + z0 = svld1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_16, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 16), + z0 = svld1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m1, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 - svcntd ()), + z0 = svld1_x2 (pn8, x0 - svcntd ())) + +/* +** ld1_u64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m2, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 2), + z0 = svld1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ld1_u64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m16, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 16), + z0 = svld1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ld1_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m18, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 18), + z0 = svld1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ld1_u64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z17, svuint64x2_t, uint64_t, + z17 = svld1_u64_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_u64_z22: +** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z22, svuint64x2_t, uint64_t, + z22 = svld1_u64_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_u64_z28: +** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z28, svuint64x2_t, uint64_t, + z28 = svld1_u64_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_u64_pn15: +** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x2_t, uint64_t, + z0 = svld1_u64_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_u64_0: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_1: +** incb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_u64_2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_u64_14: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_14, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_16: +** incb x0, all, mul #16 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_16, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_m1: +** decb x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_u64_m2: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_u64_m16: +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m16, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m18, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x2_t, uint64_t, + z0 = svld1_vnum_u64_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..37f58dfecdfb2826b63dffababdb00a9f2a9eaf0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u64_base: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_base, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_u64_index: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_index, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_1, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd ()), + z0 = svld1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_2, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 2), + z0 = svld1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_3, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 3), + z0 = svld1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ld1_u64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_4, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 4), + z0 = svld1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ld1_u64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_28, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 28), + z0 = svld1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ld1_u64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_32, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 32), + z0 = svld1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m1, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd ()), + z0 = svld1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m2, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 2), + z0 = svld1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m3, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 3), + z0 = svld1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ld1_u64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_u64_m4, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 4), + z0 = svld1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ld1_u64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m32, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 32), + z0 = svld1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ld1_u64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_m36, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 36), + z0 = svld1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ld1_u64_z17: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z17, svuint64x4_t, uint64_t, + z17 = svld1_u64_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_u64_z22: +** ld1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z22, svuint64x4_t, uint64_t, + z22 = svld1_u64_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_u64_z28: +** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_z28, svuint64x4_t, uint64_t, + z28 = svld1_u64_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_u64_pn15: +** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x4_t, uint64_t, + z0 = svld1_u64_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_u64_0: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_1: +** incb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_2: +** incb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_3: +** incb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_3, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_u64_4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_4, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_u64_28: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_28, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_u64_32: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_32, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_m1: +** decb x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_m2: +** decb x0, all, mul #2 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u64_m3: +** decb x0, all, mul #3 +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m3, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_u64_m4: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m4, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_u64_m32: +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m32, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_u64_m36: +** [^{]* +** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_m36, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x4_t, uint64_t, + z0 = svld1_vnum_u64_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..83e6b35ea5768a73284e55b668f3b96521a03896 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u8_base: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_base, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0), + z0 = svld1_x2 (pn8, x0)) + +/* +** ld1_u8_index: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_index, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 + x1), + z0 = svld1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_1, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 + svcntb ()), + z0 = svld1_x2 (pn8, x0 + svcntb ())) + +/* +** ld1_u8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_2, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 2), + z0 = svld1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ld1_u8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_14, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 14), + z0 = svld1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_16, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 16), + z0 = svld1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m1, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 - svcntb ()), + z0 = svld1_x2 (pn8, x0 - svcntb ())) + +/* +** ld1_u8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m2, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 2), + z0 = svld1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ld1_u8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m16, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 16), + z0 = svld1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ld1_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m18, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 18), + z0 = svld1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ld1_u8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z17, svuint8x2_t, uint8_t, + z17 = svld1_u8_x2 (pn8, x0), + z17 = svld1_x2 (pn8, x0)) + +/* +** ld1_u8_z22: +** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z22, svuint8x2_t, uint8_t, + z22 = svld1_u8_x2 (pn8, x0), + z22 = svld1_x2 (pn8, x0)) + +/* +** ld1_u8_z28: +** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z28, svuint8x2_t, uint8_t, + z28 = svld1_u8_x2 (pn8, x0), + z28 = svld1_x2 (pn8, x0)) + +/* +** ld1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn0, x0), + z0 = svld1_x2 (pn0, x0)) + +/* +** ld1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn7, x0), + z0 = svld1_x2 (pn7, x0)) + +/* +** ld1_u8_pn15: +** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x2_t, uint8_t, + z0 = svld1_u8_x2 (pn15, x0), + z0 = svld1_x2 (pn15, x0)) + +/* +** ld1_vnum_u8_0: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, 0), + z0 = svld1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_1: +** incb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, 1), + z0 = svld1_vnum_x2 (pn8, x0, 1)) + +/* +** ld1_vnum_u8_2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, 2), + z0 = svld1_vnum_x2 (pn8, x0, 2)) + +/* +** ld1_vnum_u8_14: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_14, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, 14), + z0 = svld1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_16: +** incb x0, all, mul #16 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_16, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, 16), + z0 = svld1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_m1: +** decb x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, -1), + z0 = svld1_vnum_x2 (pn8, x0, -1)) + +/* +** ld1_vnum_u8_m2: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, -2), + z0 = svld1_vnum_x2 (pn8, x0, -2)) + +/* +** ld1_vnum_u8_m16: +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m16, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, -16), + z0 = svld1_vnum_x2 (pn8, x0, -16)) + +/* +** ld1_vnum_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m18, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, -18), + z0 = svld1_vnum_x2 (pn8, x0, -18)) + +/* +** ld1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x2_t, uint8_t, + z0 = svld1_vnum_u8_x2 (pn8, x0, x1), + z0 = svld1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e6361a430ff51f7756dde1accb8bd3a6983d2ed8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_u8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ld1_u8_base: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_base, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0), + z0 = svld1_x4 (pn8, x0)) + +/* +** ld1_u8_index: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_index, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + x1), + z0 = svld1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_1, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb ()), + z0 = svld1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_2, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 2), + z0 = svld1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_3, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 3), + z0 = svld1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ld1_u8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_4, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 4), + z0 = svld1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ld1_u8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_28, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 28), + z0 = svld1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ld1_u8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_32, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 32), + z0 = svld1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m1, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb ()), + z0 = svld1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m2, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 2), + z0 = svld1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_u8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m3, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 3), + z0 = svld1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ld1_u8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ld1_u8_m4, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 4), + z0 = svld1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ld1_u8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m32, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 32), + z0 = svld1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ld1_u8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_m36, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 36), + z0 = svld1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ld1_u8_z17: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z17, svuint8x4_t, uint8_t, + z17 = svld1_u8_x4 (pn8, x0), + z17 = svld1_x4 (pn8, x0)) + +/* +** ld1_u8_z22: +** ld1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z22, svuint8x4_t, uint8_t, + z22 = svld1_u8_x4 (pn8, x0), + z22 = svld1_x4 (pn8, x0)) + +/* +** ld1_u8_z28: +** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_z28, svuint8x4_t, uint8_t, + z28 = svld1_u8_x4 (pn8, x0), + z28 = svld1_x4 (pn8, x0)) + +/* +** ld1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn0, x0), + z0 = svld1_x4 (pn0, x0)) + +/* +** ld1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn7, x0), + z0 = svld1_x4 (pn7, x0)) + +/* +** ld1_u8_pn15: +** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x4_t, uint8_t, + z0 = svld1_u8_x4 (pn15, x0), + z0 = svld1_x4 (pn15, x0)) + +/* +** ld1_vnum_u8_0: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 0), + z0 = svld1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_1: +** incb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 1), + z0 = svld1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_2: +** incb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 2), + z0 = svld1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_3: +** incb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_3, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 3), + z0 = svld1_vnum_x4 (pn8, x0, 3)) + +/* +** ld1_vnum_u8_4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_4, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 4), + z0 = svld1_vnum_x4 (pn8, x0, 4)) + +/* +** ld1_vnum_u8_28: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_28, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 28), + z0 = svld1_vnum_x4 (pn8, x0, 28)) + +/* +** ld1_vnum_u8_32: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_32, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, 32), + z0 = svld1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_m1: +** decb x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -1), + z0 = svld1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_m2: +** decb x0, all, mul #2 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -2), + z0 = svld1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ld1_vnum_u8_m3: +** decb x0, all, mul #3 +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m3, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -3), + z0 = svld1_vnum_x4 (pn8, x0, -3)) + +/* +** ld1_vnum_u8_m4: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m4, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -4), + z0 = svld1_vnum_x4 (pn8, x0, -4)) + +/* +** ld1_vnum_u8_m32: +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m32, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -32), + z0 = svld1_vnum_x4 (pn8, x0, -32)) + +/* +** ld1_vnum_u8_m36: +** [^{]* +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_m36, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, -36), + z0 = svld1_vnum_x4 (pn8, x0, -36)) + +/* +** ld1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x4_t, uint8_t, + z0 = svld1_vnum_u8_x4 (pn8, x0, x1), + z0 = svld1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d196144fe9f9ec3ed45c402a1d82bc02ae5ddf33 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_bf16_base: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_bf16_index: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth ()), + z0 = svldnt1_x2 (pn8, x0 + svcnth ())) + +/* +** ldnt1_bf16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ldnt1_bf16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_14, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_16, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth ()), + z0 = svldnt1_x2 (pn8, x0 - svcnth ())) + +/* +** ldnt1_bf16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ldnt1_bf16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m16, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ldnt1_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m18, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ldnt1_bf16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x2_t, bfloat16_t, + z17 = svldnt1_bf16_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_bf16_z22: +** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x2_t, bfloat16_t, + z22 = svldnt1_bf16_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_bf16_z28: +** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x2_t, bfloat16_t, + z28 = svldnt1_bf16_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_bf16_pn15: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_bf16_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_bf16_0: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_bf16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_bf16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_bf16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_bf16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0b9903f9f7c8c6a181dfa8a383d3898991cafe7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_bf16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_bf16_base: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_bf16_index: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth ()), + z0 = svldnt1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_3, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ldnt1_bf16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_4, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ldnt1_bf16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_28, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ldnt1_bf16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_32, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth ()), + z0 = svldnt1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_bf16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m3, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ldnt1_bf16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_bf16_m4, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ldnt1_bf16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m32, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ldnt1_bf16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_m36, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ldnt1_bf16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x4_t, bfloat16_t, + z17 = svldnt1_bf16_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_bf16_z22: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x4_t, bfloat16_t, + z22 = svldnt1_bf16_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_bf16_z28: +** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x4_t, bfloat16_t, + z28 = svldnt1_bf16_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_bf16_pn15: +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_bf16_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_bf16_0: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_bf16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_bf16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_bf16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_bf16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_bf16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_bf16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_bf16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, + z0 = svldnt1_vnum_bf16_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6ae61143010e373d4fd03fefe3c0cde738de0b60 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f16_base: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f16_index: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 + svcnth ()), + z0 = svldnt1_x2 (pn8, x0 + svcnth ())) + +/* +** ldnt1_f16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ldnt1_f16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_14, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_16, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 - svcnth ()), + z0 = svldnt1_x2 (pn8, x0 - svcnth ())) + +/* +** ldnt1_f16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ldnt1_f16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m16, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ldnt1_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m18, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ldnt1_f16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x2_t, float16_t, + z17 = svldnt1_f16_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f16_z22: +** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x2_t, float16_t, + z22 = svldnt1_f16_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f16_z28: +** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x2_t, float16_t, + z28 = svldnt1_f16_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_f16_pn15: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x2_t, float16_t, + z0 = svldnt1_f16_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_f16_0: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_f16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_f16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_14, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_16, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_f16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_f16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m16, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m18, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x2_t, float16_t, + z0 = svldnt1_vnum_f16_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3dad40cea006d2d249770f06af0403fd745c93bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f16_base: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f16_index: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth ()), + z0 = svldnt1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_3, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ldnt1_f16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_4, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ldnt1_f16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_28, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ldnt1_f16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_32, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth ()), + z0 = svldnt1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m3, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ldnt1_f16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_f16_m4, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ldnt1_f16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m32, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ldnt1_f16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_m36, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ldnt1_f16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x4_t, float16_t, + z17 = svldnt1_f16_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f16_z22: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x4_t, float16_t, + z22 = svldnt1_f16_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f16_z28: +** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x4_t, float16_t, + z28 = svldnt1_f16_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_f16_pn15: +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x4_t, float16_t, + z0 = svldnt1_f16_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_f16_0: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_3, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_f16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_4, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_f16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_28, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_f16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_32, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m3, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_f16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m4, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_f16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m32, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_f16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_m36, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x4_t, float16_t, + z0 = svldnt1_vnum_f16_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dce1b82ffcd3d9306f00185b9f222c4d536bfc36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f32_base: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f32_index: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 + svcntw ()), + z0 = svldnt1_x2 (pn8, x0 + svcntw ())) + +/* +** ldnt1_f32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ldnt1_f32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_14, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_16, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 - svcntw ()), + z0 = svldnt1_x2 (pn8, x0 - svcntw ())) + +/* +** ldnt1_f32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ldnt1_f32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m16, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ldnt1_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m18, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ldnt1_f32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x2_t, float32_t, + z17 = svldnt1_f32_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f32_z22: +** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x2_t, float32_t, + z22 = svldnt1_f32_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f32_z28: +** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x2_t, float32_t, + z28 = svldnt1_f32_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_f32_pn15: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x2_t, float32_t, + z0 = svldnt1_f32_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_f32_0: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_f32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_f32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_14, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_16, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_f32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_f32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m16, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m18, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x2_t, float32_t, + z0 = svldnt1_vnum_f32_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..852c7c4d4e38e8803918b2d3f75843f0fcff24df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f32_base: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f32_index: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw ()), + z0 = svldnt1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_3, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ldnt1_f32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_4, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ldnt1_f32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_28, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ldnt1_f32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_32, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw ()), + z0 = svldnt1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m3, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ldnt1_f32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_f32_m4, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ldnt1_f32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m32, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ldnt1_f32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_m36, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ldnt1_f32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x4_t, float32_t, + z17 = svldnt1_f32_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f32_z22: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x4_t, float32_t, + z22 = svldnt1_f32_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f32_z28: +** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x4_t, float32_t, + z28 = svldnt1_f32_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_f32_pn15: +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x4_t, float32_t, + z0 = svldnt1_f32_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_f32_0: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_3, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_f32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_4, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_f32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_28, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_f32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_32, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m3, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_f32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m4, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_f32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m32, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_f32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_m36, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x4_t, float32_t, + z0 = svldnt1_vnum_f32_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..77319bb946af71875fda27de0553a5ed92813bfe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f64_base: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f64_index: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 + svcntd ()), + z0 = svldnt1_x2 (pn8, x0 + svcntd ())) + +/* +** ldnt1_f64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ldnt1_f64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_14, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_16, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 - svcntd ()), + z0 = svldnt1_x2 (pn8, x0 - svcntd ())) + +/* +** ldnt1_f64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ldnt1_f64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m16, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ldnt1_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m18, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ldnt1_f64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x2_t, float64_t, + z17 = svldnt1_f64_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f64_z22: +** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x2_t, float64_t, + z22 = svldnt1_f64_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f64_z28: +** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x2_t, float64_t, + z28 = svldnt1_f64_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_f64_pn15: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x2_t, float64_t, + z0 = svldnt1_f64_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_f64_0: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_f64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_f64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_14, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_16, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_f64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_f64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m16, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m18, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x2_t, float64_t, + z0 = svldnt1_vnum_f64_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bbaf66a07889a3405981a7c5c394c8da972e4811 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_f64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_f64_base: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f64_index: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd ()), + z0 = svldnt1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_3, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ldnt1_f64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_4, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ldnt1_f64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_28, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ldnt1_f64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_32, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd ()), + z0 = svldnt1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_f64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m3, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ldnt1_f64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_f64_m4, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ldnt1_f64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m32, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ldnt1_f64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_m36, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ldnt1_f64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x4_t, float64_t, + z17 = svldnt1_f64_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f64_z22: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x4_t, float64_t, + z22 = svldnt1_f64_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f64_z28: +** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x4_t, float64_t, + z28 = svldnt1_f64_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_f64_pn15: +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x4_t, float64_t, + z0 = svldnt1_f64_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_f64_0: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_3, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_f64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_4, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_f64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_28, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_f64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_32, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_f64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m3, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_f64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m4, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_f64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m32, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_f64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_m36, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x4_t, float64_t, + z0 = svldnt1_vnum_f64_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b96df0573f3a7e67e84d3dbd9382aa307c6bcb6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s16_base: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_base, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s16_index: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_index, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_1, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 + svcnth ()), + z0 = svldnt1_x2 (pn8, x0 + svcnth ())) + +/* +** ldnt1_s16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_2, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ldnt1_s16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_14, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_16, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 - svcnth ()), + z0 = svldnt1_x2 (pn8, x0 - svcnth ())) + +/* +** ldnt1_s16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ldnt1_s16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m16, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ldnt1_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m18, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ldnt1_s16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x2_t, int16_t, + z17 = svldnt1_s16_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s16_z22: +** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x2_t, int16_t, + z22 = svldnt1_s16_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s16_z28: +** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x2_t, int16_t, + z28 = svldnt1_s16_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_s16_pn15: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x2_t, int16_t, + z0 = svldnt1_s16_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_s16_0: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_s16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_s16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_14, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_16, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_s16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_s16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m16, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m18, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x2_t, int16_t, + z0 = svldnt1_vnum_s16_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c64ab0919a34ab79ff9570a34254d69af240ed5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s16_base: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_base, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s16_index: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_index, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_1, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth ()), + z0 = svldnt1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_2, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_3, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ldnt1_s16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_4, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ldnt1_s16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_28, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ldnt1_s16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_32, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth ()), + z0 = svldnt1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m3, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ldnt1_s16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_s16_m4, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ldnt1_s16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m32, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ldnt1_s16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_m36, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ldnt1_s16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x4_t, int16_t, + z17 = svldnt1_s16_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s16_z22: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x4_t, int16_t, + z22 = svldnt1_s16_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s16_z28: +** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x4_t, int16_t, + z28 = svldnt1_s16_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_s16_pn15: +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x4_t, int16_t, + z0 = svldnt1_s16_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_s16_0: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_3, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_s16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_4, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_s16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_28, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_s16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_32, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m3, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_s16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m4, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_s16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m32, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_s16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_m36, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x4_t, int16_t, + z0 = svldnt1_vnum_s16_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e37e544131cb0285b9e8c59ef745590d5d92c6b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s32_base: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_base, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s32_index: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_index, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_1, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 + svcntw ()), + z0 = svldnt1_x2 (pn8, x0 + svcntw ())) + +/* +** ldnt1_s32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_2, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ldnt1_s32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_14, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_16, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 - svcntw ()), + z0 = svldnt1_x2 (pn8, x0 - svcntw ())) + +/* +** ldnt1_s32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ldnt1_s32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m16, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ldnt1_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m18, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ldnt1_s32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x2_t, int32_t, + z17 = svldnt1_s32_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s32_z22: +** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x2_t, int32_t, + z22 = svldnt1_s32_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s32_z28: +** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x2_t, int32_t, + z28 = svldnt1_s32_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_s32_pn15: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x2_t, int32_t, + z0 = svldnt1_s32_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_s32_0: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_s32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_s32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_14, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_16, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_s32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_s32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m16, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m18, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x2_t, int32_t, + z0 = svldnt1_vnum_s32_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b97ff93c247fb20fa6b24eea331c15a4b771bed7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s32_base: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_base, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s32_index: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_index, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_1, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw ()), + z0 = svldnt1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_2, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_3, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ldnt1_s32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_4, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ldnt1_s32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_28, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ldnt1_s32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_32, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw ()), + z0 = svldnt1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m3, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ldnt1_s32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_s32_m4, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ldnt1_s32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m32, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ldnt1_s32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_m36, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ldnt1_s32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x4_t, int32_t, + z17 = svldnt1_s32_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s32_z22: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x4_t, int32_t, + z22 = svldnt1_s32_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s32_z28: +** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x4_t, int32_t, + z28 = svldnt1_s32_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_s32_pn15: +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x4_t, int32_t, + z0 = svldnt1_s32_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_s32_0: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_3, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_s32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_4, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_s32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_28, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_s32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_32, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m3, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_s32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m4, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_s32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m32, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_s32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_m36, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x4_t, int32_t, + z0 = svldnt1_vnum_s32_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1e063fb721d4e8592cfbcea643a1c5aa45840ae9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s64_base: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_base, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s64_index: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_index, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_1, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 + svcntd ()), + z0 = svldnt1_x2 (pn8, x0 + svcntd ())) + +/* +** ldnt1_s64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_2, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ldnt1_s64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_14, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_16, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 - svcntd ()), + z0 = svldnt1_x2 (pn8, x0 - svcntd ())) + +/* +** ldnt1_s64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ldnt1_s64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m16, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ldnt1_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m18, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ldnt1_s64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x2_t, int64_t, + z17 = svldnt1_s64_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s64_z22: +** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x2_t, int64_t, + z22 = svldnt1_s64_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s64_z28: +** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x2_t, int64_t, + z28 = svldnt1_s64_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_s64_pn15: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x2_t, int64_t, + z0 = svldnt1_s64_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_s64_0: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_s64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_s64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_14, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_16, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_s64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_s64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m16, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m18, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x2_t, int64_t, + z0 = svldnt1_vnum_s64_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..90f929256d3f3efd9003f5929db6e35faaefb043 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s64_base: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_base, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s64_index: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_index, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_1, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd ()), + z0 = svldnt1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_2, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_3, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ldnt1_s64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_4, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ldnt1_s64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_28, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ldnt1_s64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_32, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd ()), + z0 = svldnt1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m3, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ldnt1_s64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_s64_m4, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ldnt1_s64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m32, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ldnt1_s64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_m36, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ldnt1_s64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x4_t, int64_t, + z17 = svldnt1_s64_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s64_z22: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x4_t, int64_t, + z22 = svldnt1_s64_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s64_z28: +** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x4_t, int64_t, + z28 = svldnt1_s64_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_s64_pn15: +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x4_t, int64_t, + z0 = svldnt1_s64_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_s64_0: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_3, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_s64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_4, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_s64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_28, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_s64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_32, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m3, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_s64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m4, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_s64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m32, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_s64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_m36, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x4_t, int64_t, + z0 = svldnt1_vnum_s64_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a93f516319f844994c3c3c6210a3af54564fb315 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s8_base: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_base, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s8_index: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_index, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_1, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 + svcntb ()), + z0 = svldnt1_x2 (pn8, x0 + svcntb ())) + +/* +** ldnt1_s8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_2, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ldnt1_s8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_14, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_16, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 - svcntb ()), + z0 = svldnt1_x2 (pn8, x0 - svcntb ())) + +/* +** ldnt1_s8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ldnt1_s8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m16, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ldnt1_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m18, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ldnt1_s8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x2_t, int8_t, + z17 = svldnt1_s8_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s8_z22: +** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x2_t, int8_t, + z22 = svldnt1_s8_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s8_z28: +** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x2_t, int8_t, + z28 = svldnt1_s8_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_s8_pn15: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x2_t, int8_t, + z0 = svldnt1_s8_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_s8_0: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_s8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_s8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_14, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_16, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_s8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_s8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m16, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m18, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x2_t, int8_t, + z0 = svldnt1_vnum_s8_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ccf75967d1c06797fb9759a03d7d18cd5d422f5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_s8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_s8_base: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_base, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s8_index: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_index, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_1, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb ()), + z0 = svldnt1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_2, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_3, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ldnt1_s8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_4, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ldnt1_s8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_28, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ldnt1_s8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_32, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb ()), + z0 = svldnt1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_s8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m3, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ldnt1_s8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_s8_m4, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ldnt1_s8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m32, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ldnt1_s8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_m36, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ldnt1_s8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x4_t, int8_t, + z17 = svldnt1_s8_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s8_z22: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x4_t, int8_t, + z22 = svldnt1_s8_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s8_z28: +** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x4_t, int8_t, + z28 = svldnt1_s8_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_s8_pn15: +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x4_t, int8_t, + z0 = svldnt1_s8_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_s8_0: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_3, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_s8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_4, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_s8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_28, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_s8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_32, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_s8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m3, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_s8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m4, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_s8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m32, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_s8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_m36, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x4_t, int8_t, + z0 = svldnt1_vnum_s8_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..66eb0ad6cad981cde3b027a7f7b12ec0676d3d4f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u16_base: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u16_index: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 + svcnth ()), + z0 = svldnt1_x2 (pn8, x0 + svcnth ())) + +/* +** ldnt1_u16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2)) + +/* +** ldnt1_u16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_14, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_16, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 - svcnth ()), + z0 = svldnt1_x2 (pn8, x0 - svcnth ())) + +/* +** ldnt1_u16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2)) + +/* +** ldnt1_u16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m16, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16)) + +/* +** ldnt1_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m18, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18)) + +/* +** ldnt1_u16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x2_t, uint16_t, + z17 = svldnt1_u16_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u16_z22: +** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x2_t, uint16_t, + z22 = svldnt1_u16_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u16_z28: +** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x2_t, uint16_t, + z28 = svldnt1_u16_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_u16_pn15: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x2_t, uint16_t, + z0 = svldnt1_u16_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_u16_0: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_1: +** incb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_u16_2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_u16_14: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_14, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_16: +** incb x0, all, mul #16 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_16, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_m1: +** decb x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_u16_m2: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_u16_m16: +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m16, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m18, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x2_t, uint16_t, + z0 = svldnt1_vnum_u16_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7a53c18f74e861685298dbaeb59c8e621e50537d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u16_base: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u16_index: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth ()), + z0 = svldnt1_x4 (pn8, x0 + svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_3, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3)) + +/* +** ldnt1_u16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_4, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4)) + +/* +** ldnt1_u16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_28, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28)) + +/* +** ldnt1_u16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_32, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth ()), + z0 = svldnt1_x4 (pn8, x0 - svcnth ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m3, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3)) + +/* +** ldnt1_u16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_u16_m4, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4)) + +/* +** ldnt1_u16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m32, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32)) + +/* +** ldnt1_u16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_m36, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36)) + +/* +** ldnt1_u16_z17: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x4_t, uint16_t, + z17 = svldnt1_u16_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u16_z22: +** ldnt1h {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x4_t, uint16_t, + z22 = svldnt1_u16_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u16_z28: +** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x4_t, uint16_t, + z28 = svldnt1_u16_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_u16_pn15: +** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x4_t, uint16_t, + z0 = svldnt1_u16_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_u16_0: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_1: +** incb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_2: +** incb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_3: +** incb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_3, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_u16_4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_4, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_u16_28: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_28, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_u16_32: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_32, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_m1: +** decb x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_m2: +** decb x0, all, mul #2 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u16_m3: +** decb x0, all, mul #3 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m3, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_u16_m4: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m4, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_u16_m32: +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m32, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_u16_m36: +** [^{]* +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_m36, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x4_t, uint16_t, + z0 = svldnt1_vnum_u16_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6dd278cc4b8be4091ef2b70ea5d375a16b28016e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u32_base: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u32_index: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 + svcntw ()), + z0 = svldnt1_x2 (pn8, x0 + svcntw ())) + +/* +** ldnt1_u32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2)) + +/* +** ldnt1_u32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_14, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_16, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 - svcntw ()), + z0 = svldnt1_x2 (pn8, x0 - svcntw ())) + +/* +** ldnt1_u32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2)) + +/* +** ldnt1_u32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m16, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16)) + +/* +** ldnt1_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m18, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18)) + +/* +** ldnt1_u32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x2_t, uint32_t, + z17 = svldnt1_u32_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u32_z22: +** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x2_t, uint32_t, + z22 = svldnt1_u32_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u32_z28: +** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x2_t, uint32_t, + z28 = svldnt1_u32_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_u32_pn15: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x2_t, uint32_t, + z0 = svldnt1_u32_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_u32_0: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_1: +** incb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_u32_2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_u32_14: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_14, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_16: +** incb x0, all, mul #16 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_16, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_m1: +** decb x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_u32_m2: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_u32_m16: +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m16, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m18, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x2_t, uint32_t, + z0 = svldnt1_vnum_u32_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2e3280122b50135662b7a0181f4d039cb940cfa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u32_base: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u32_index: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw ()), + z0 = svldnt1_x4 (pn8, x0 + svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_3, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3)) + +/* +** ldnt1_u32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_4, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4)) + +/* +** ldnt1_u32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_28, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28)) + +/* +** ldnt1_u32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_32, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw ()), + z0 = svldnt1_x4 (pn8, x0 - svcntw ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m3, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3)) + +/* +** ldnt1_u32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_u32_m4, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4)) + +/* +** ldnt1_u32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m32, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32)) + +/* +** ldnt1_u32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_m36, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36)) + +/* +** ldnt1_u32_z17: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x4_t, uint32_t, + z17 = svldnt1_u32_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u32_z22: +** ldnt1w {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x4_t, uint32_t, + z22 = svldnt1_u32_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u32_z28: +** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x4_t, uint32_t, + z28 = svldnt1_u32_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_u32_pn15: +** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x4_t, uint32_t, + z0 = svldnt1_u32_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_u32_0: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_1: +** incb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_2: +** incb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_3: +** incb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_3, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_u32_4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_4, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_u32_28: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_28, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_u32_32: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_32, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_m1: +** decb x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_m2: +** decb x0, all, mul #2 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u32_m3: +** decb x0, all, mul #3 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m3, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_u32_m4: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m4, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_u32_m32: +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m32, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_u32_m36: +** [^{]* +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_m36, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x4_t, uint32_t, + z0 = svldnt1_vnum_u32_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fdabbbfd13e19eefd78e98cb7832b16cdc504d5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u64_base: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u64_index: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 + svcntd ()), + z0 = svldnt1_x2 (pn8, x0 + svcntd ())) + +/* +** ldnt1_u64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2)) + +/* +** ldnt1_u64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_14, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_16, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 - svcntd ()), + z0 = svldnt1_x2 (pn8, x0 - svcntd ())) + +/* +** ldnt1_u64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2)) + +/* +** ldnt1_u64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m16, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16)) + +/* +** ldnt1_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m18, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18)) + +/* +** ldnt1_u64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x2_t, uint64_t, + z17 = svldnt1_u64_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u64_z22: +** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x2_t, uint64_t, + z22 = svldnt1_u64_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u64_z28: +** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x2_t, uint64_t, + z28 = svldnt1_u64_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_u64_pn15: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x2_t, uint64_t, + z0 = svldnt1_u64_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_u64_0: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_1: +** incb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_u64_2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_u64_14: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_14, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_16: +** incb x0, all, mul #16 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_16, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_m1: +** decb x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_u64_m2: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_u64_m16: +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m16, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m18, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x2_t, uint64_t, + z0 = svldnt1_vnum_u64_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e56fa002e682fd26c8dd6a8d19030fdc39ca0ed5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u64_base: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u64_index: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd ()), + z0 = svldnt1_x4 (pn8, x0 + svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_3, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3)) + +/* +** ldnt1_u64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_4, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4)) + +/* +** ldnt1_u64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_28, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28)) + +/* +** ldnt1_u64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_32, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd ()), + z0 = svldnt1_x4 (pn8, x0 - svcntd ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m3, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3)) + +/* +** ldnt1_u64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_u64_m4, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4)) + +/* +** ldnt1_u64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m32, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32)) + +/* +** ldnt1_u64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_m36, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36)) + +/* +** ldnt1_u64_z17: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x4_t, uint64_t, + z17 = svldnt1_u64_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u64_z22: +** ldnt1d {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x4_t, uint64_t, + z22 = svldnt1_u64_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u64_z28: +** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x4_t, uint64_t, + z28 = svldnt1_u64_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_u64_pn15: +** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x4_t, uint64_t, + z0 = svldnt1_u64_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_u64_0: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_1: +** incb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_2: +** incb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_3: +** incb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_3, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_u64_4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_4, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_u64_28: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_28, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_u64_32: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_32, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_m1: +** decb x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_m2: +** decb x0, all, mul #2 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u64_m3: +** decb x0, all, mul #3 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m3, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_u64_m4: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m4, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_u64_m32: +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m32, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_u64_m36: +** [^{]* +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_m36, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x4_t, uint64_t, + z0 = svldnt1_vnum_u64_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..67f3faada15133e29e44fef388a6c3e60800c405 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u8_base: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0), + z0 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u8_index: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 + x1), + z0 = svldnt1_x2 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 + svcntb ()), + z0 = svldnt1_x2 (pn8, x0 + svcntb ())) + +/* +** ldnt1_u8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2)) + +/* +** ldnt1_u8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_14, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 14), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_16, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 - svcntb ()), + z0 = svldnt1_x2 (pn8, x0 - svcntb ())) + +/* +** ldnt1_u8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2)) + +/* +** ldnt1_u8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m16, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 16), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16)) + +/* +** ldnt1_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m18, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 18), + z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18)) + +/* +** ldnt1_u8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x2_t, uint8_t, + z17 = svldnt1_u8_x2 (pn8, x0), + z17 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u8_z22: +** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x2_t, uint8_t, + z22 = svldnt1_u8_x2 (pn8, x0), + z22 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u8_z28: +** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x2_t, uint8_t, + z28 = svldnt1_u8_x2 (pn8, x0), + z28 = svldnt1_x2 (pn8, x0)) + +/* +** ldnt1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn0, x0), + z0 = svldnt1_x2 (pn0, x0)) + +/* +** ldnt1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn7, x0), + z0 = svldnt1_x2 (pn7, x0)) + +/* +** ldnt1_u8_pn15: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x2_t, uint8_t, + z0 = svldnt1_u8_x2 (pn15, x0), + z0 = svldnt1_x2 (pn15, x0)) + +/* +** ldnt1_vnum_u8_0: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, 0), + z0 = svldnt1_vnum_x2 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_1: +** incb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, 1), + z0 = svldnt1_vnum_x2 (pn8, x0, 1)) + +/* +** ldnt1_vnum_u8_2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, 2), + z0 = svldnt1_vnum_x2 (pn8, x0, 2)) + +/* +** ldnt1_vnum_u8_14: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_14, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, 14), + z0 = svldnt1_vnum_x2 (pn8, x0, 14)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_16: +** incb x0, all, mul #16 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_16, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, 16), + z0 = svldnt1_vnum_x2 (pn8, x0, 16)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_m1: +** decb x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, -1), + z0 = svldnt1_vnum_x2 (pn8, x0, -1)) + +/* +** ldnt1_vnum_u8_m2: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, -2), + z0 = svldnt1_vnum_x2 (pn8, x0, -2)) + +/* +** ldnt1_vnum_u8_m16: +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m16, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, -16), + z0 = svldnt1_vnum_x2 (pn8, x0, -16)) + +/* +** ldnt1_vnum_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m18, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, -18), + z0 = svldnt1_vnum_x2 (pn8, x0, -18)) + +/* +** ldnt1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x2_t, uint8_t, + z0 = svldnt1_vnum_u8_x2 (pn8, x0, x1), + z0 = svldnt1_vnum_x2 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..827b99411d4cc5fb783aefba2698bf839e7c2fb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_u8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** ldnt1_u8_base: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0), + z0 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u8_index: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + x1), + z0 = svldnt1_x4 (pn8, x0 + x1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb ()), + z0 = svldnt1_x4 (pn8, x0 + svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_3, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3)) + +/* +** ldnt1_u8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_4, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4)) + +/* +** ldnt1_u8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_28, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 28), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28)) + +/* +** ldnt1_u8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_32, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb ()), + z0 = svldnt1_x4 (pn8, x0 - svcntb ())) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 2), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_u8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m3, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 3), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3)) + +/* +** ldnt1_u8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ + TEST_LOAD_COUNT (ldnt1_u8_m4, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 4), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4)) + +/* +** ldnt1_u8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m32, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 32), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32)) + +/* +** ldnt1_u8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_m36, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 36), + z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36)) + +/* +** ldnt1_u8_z17: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x4_t, uint8_t, + z17 = svldnt1_u8_x4 (pn8, x0), + z17 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u8_z22: +** ldnt1b {z[^\n]+}, pn8/z, \[x0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x4_t, uint8_t, + z22 = svldnt1_u8_x4 (pn8, x0), + z22 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u8_z28: +** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x4_t, uint8_t, + z28 = svldnt1_u8_x4 (pn8, x0), + z28 = svldnt1_x4 (pn8, x0)) + +/* +** ldnt1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn0, x0), + z0 = svldnt1_x4 (pn0, x0)) + +/* +** ldnt1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn7, x0), + z0 = svldnt1_x4 (pn7, x0)) + +/* +** ldnt1_u8_pn15: +** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x4_t, uint8_t, + z0 = svldnt1_u8_x4 (pn15, x0), + z0 = svldnt1_x4 (pn15, x0)) + +/* +** ldnt1_vnum_u8_0: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 0), + z0 = svldnt1_vnum_x4 (pn8, x0, 0)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_1: +** incb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 1), + z0 = svldnt1_vnum_x4 (pn8, x0, 1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_2: +** incb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 2), + z0 = svldnt1_vnum_x4 (pn8, x0, 2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_3: +** incb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_3, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 3), + z0 = svldnt1_vnum_x4 (pn8, x0, 3)) + +/* +** ldnt1_vnum_u8_4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_4, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 4), + z0 = svldnt1_vnum_x4 (pn8, x0, 4)) + +/* +** ldnt1_vnum_u8_28: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_28, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 28), + z0 = svldnt1_vnum_x4 (pn8, x0, 28)) + +/* +** ldnt1_vnum_u8_32: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_32, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, 32), + z0 = svldnt1_vnum_x4 (pn8, x0, 32)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_m1: +** decb x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -1), + z0 = svldnt1_vnum_x4 (pn8, x0, -1)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_m2: +** decb x0, all, mul #2 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -2), + z0 = svldnt1_vnum_x4 (pn8, x0, -2)) + +/* Moving the constant into a register would also be OK. */ +/* +** ldnt1_vnum_u8_m3: +** decb x0, all, mul #3 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m3, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -3), + z0 = svldnt1_vnum_x4 (pn8, x0, -3)) + +/* +** ldnt1_vnum_u8_m4: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m4, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -4), + z0 = svldnt1_vnum_x4 (pn8, x0, -4)) + +/* +** ldnt1_vnum_u8_m32: +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m32, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -32), + z0 = svldnt1_vnum_x4 (pn8, x0, -32)) + +/* +** ldnt1_vnum_u8_m36: +** [^{]* +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\] +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_m36, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, -36), + z0 = svldnt1_vnum_x4 (pn8, x0, -36)) + +/* +** ldnt1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\] +** ) +** ret +*/ +TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x4_t, uint8_t, + z0 = svldnt1_vnum_u8_x4 (pn8, x0, x1), + z0 = svldnt1_vnum_x4 (pn8, x0, x1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c new file mode 100644 index 0000000000000000000000000000000000000000..a614fbc9537cb8692bc1e266c5504872bdcc4a58 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldr_zt.c @@ -0,0 +1,36 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#define STREAMING_COMPATIBLE +#define SHARED_ZT0 +#include "test_sme2_acle.h" + +/* +** ldr_zt0_x0: +** ldr zt0, \[x0\] +** ret +*/ +PROTO (ldr_zt0_x0, void, (char *x0)) { svldr_zt (0, x0); } + +/* +** ldr_zt0_x0p1: +** add (x[0-9]+), x0, #?1 +** ldr zt0, \[\1\] +** ret +*/ +PROTO (ldr_zt0_x0p1, void, (char *x0)) { svldr_zt (0, x0 + 1); } + +/* +** ldr_zt0_x0p64: +** add (x[0-9]+), x0, #?64 +** ldr zt0, \[\1\] +** ret +*/ +PROTO (ldr_zt0_x0p64, void, (char *x0)) { svldr_zt (0, x0 + 64); } + +/* +** ldr_zt0_x0_vl1: +** incb x0 +** ldr zt0, \[x0\] +** ret +*/ +PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntb()); } diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16.c new file mode 100644 index 0000000000000000000000000000000000000000..fd33428a9c87c2e9e1b6d49df91c6990826c2bf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16_t, svuint8_t, z1, + svluti2_lane_zt_bf16 (0, z0, 0), + svluti2_lane_zt_bf16 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.h, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svbfloat16_t, svuint8_t, z18, + svluti2_lane_zt_bf16 (0, z5, 15), + svluti2_lane_zt_bf16 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.h, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svbfloat16_t, svuint8_t, z24, + svluti2_lane_zt_bf16 (0, z7, 13), + svluti2_lane_zt_bf16 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.h, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svbfloat16_t, svuint8_t, z28, + svluti2_lane_zt_bf16 (0, z16, 11), + svluti2_lane_zt_bf16 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svbfloat16_t, svuint8_t, z24, + svluti2_lane_zt_bf16 (0, z23, 1), + svluti2_lane_zt_bf16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..52c0bc3f0a16517b4f77120220d14974c076038a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16x2_t, svuint8_t, z1, + svluti2_lane_zt_bf16_x2 (0, z0, 0), + svluti2_lane_zt_bf16_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.h - z19\.h}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svbfloat16x2_t, svuint8_t, z18, + svluti2_lane_zt_bf16_x2 (0, z5, 7), + svluti2_lane_zt_bf16_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.h - z25\.h}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svbfloat16x2_t, svuint8_t, z24, + svluti2_lane_zt_bf16_x2 (0, z7, 6), + svluti2_lane_zt_bf16_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.h - z29\.h}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svbfloat16x2_t, svuint8_t, z28, + svluti2_lane_zt_bf16_x2 (0, z16, 3), + svluti2_lane_zt_bf16_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.h - z25\.h}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svbfloat16x2_t, svuint8_t, z24, + svluti2_lane_zt_bf16_x2 (0, z23, 1), + svluti2_lane_zt_bf16_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6a88c2692b68c5b54e46c38053b66fe35658a8ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_bf16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svbfloat16x4_t, svuint8_t, z1, + svluti2_lane_zt_bf16_x4 (0, z0, 0), + svluti2_lane_zt_bf16_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svbfloat16x4_t, svuint8_t, z18, + svluti2_lane_zt_bf16_x4 (0, z5, 3), + svluti2_lane_zt_bf16_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.h - z27\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svbfloat16x4_t, svuint8_t, z24, + svluti2_lane_zt_bf16_x4 (0, z7, 2), + svluti2_lane_zt_bf16_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svbfloat16x4_t, svuint8_t, z28, + svluti2_lane_zt_bf16_x4 (0, z16, 1), + svluti2_lane_zt_bf16_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svbfloat16x4_t, svuint8_t, z24, + svluti2_lane_zt_bf16_x4 (0, z23, 0), + svluti2_lane_zt_bf16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16.c new file mode 100644 index 0000000000000000000000000000000000000000..9907e70b3fc1b3996d2cafe750c10c8a272d2873 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16_t, svuint8_t, z1, + svluti2_lane_zt_f16 (0, z0, 0), + svluti2_lane_zt_f16 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.h, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svfloat16_t, svuint8_t, z18, + svluti2_lane_zt_f16 (0, z5, 15), + svluti2_lane_zt_f16 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.h, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svfloat16_t, svuint8_t, z24, + svluti2_lane_zt_f16 (0, z7, 13), + svluti2_lane_zt_f16 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.h, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svfloat16_t, svuint8_t, z28, + svluti2_lane_zt_f16 (0, z16, 11), + svluti2_lane_zt_f16 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svfloat16_t, svuint8_t, z24, + svluti2_lane_zt_f16 (0, z23, 1), + svluti2_lane_zt_f16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1a2125773a988f7406cfd096c6e64da5b97ef953 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16x2_t, svuint8_t, z1, + svluti2_lane_zt_f16_x2 (0, z0, 0), + svluti2_lane_zt_f16_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.h - z19\.h}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svfloat16x2_t, svuint8_t, z18, + svluti2_lane_zt_f16_x2 (0, z5, 7), + svluti2_lane_zt_f16_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.h - z25\.h}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svfloat16x2_t, svuint8_t, z24, + svluti2_lane_zt_f16_x2 (0, z7, 6), + svluti2_lane_zt_f16_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.h - z29\.h}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svfloat16x2_t, svuint8_t, z28, + svluti2_lane_zt_f16_x2 (0, z16, 3), + svluti2_lane_zt_f16_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.h - z25\.h}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svfloat16x2_t, svuint8_t, z24, + svluti2_lane_zt_f16_x2 (0, z23, 1), + svluti2_lane_zt_f16_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..dd18bcc5a4595fe9a82fb71535d1b8ca49e3b5c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat16x4_t, svuint8_t, z1, + svluti2_lane_zt_f16_x4 (0, z0, 0), + svluti2_lane_zt_f16_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svfloat16x4_t, svuint8_t, z18, + svluti2_lane_zt_f16_x4 (0, z5, 3), + svluti2_lane_zt_f16_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.h - z27\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svfloat16x4_t, svuint8_t, z24, + svluti2_lane_zt_f16_x4 (0, z7, 2), + svluti2_lane_zt_f16_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svfloat16x4_t, svuint8_t, z28, + svluti2_lane_zt_f16_x4 (0, z16, 1), + svluti2_lane_zt_f16_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svfloat16x4_t, svuint8_t, z24, + svluti2_lane_zt_f16_x4 (0, z23, 0), + svluti2_lane_zt_f16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..17cf957f114ebbdcf88b013891e6c04fa8dacd3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32_t, svuint8_t, z1, + svluti2_lane_zt_f32 (0, z0, 0), + svluti2_lane_zt_f32 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.s, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svfloat32_t, svuint8_t, z18, + svluti2_lane_zt_f32 (0, z5, 15), + svluti2_lane_zt_f32 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.s, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svfloat32_t, svuint8_t, z24, + svluti2_lane_zt_f32 (0, z7, 13), + svluti2_lane_zt_f32 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.s, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svfloat32_t, svuint8_t, z28, + svluti2_lane_zt_f32 (0, z16, 11), + svluti2_lane_zt_f32 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svfloat32_t, svuint8_t, z24, + svluti2_lane_zt_f32 (0, z23, 1), + svluti2_lane_zt_f32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f391529548f910b835451c8446d86c84e4ed4093 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32x2_t, svuint8_t, z1, + svluti2_lane_zt_f32_x2 (0, z0, 0), + svluti2_lane_zt_f32_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.s - z19\.s}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svfloat32x2_t, svuint8_t, z18, + svluti2_lane_zt_f32_x2 (0, z5, 7), + svluti2_lane_zt_f32_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.s - z25\.s}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svfloat32x2_t, svuint8_t, z24, + svluti2_lane_zt_f32_x2 (0, z7, 6), + svluti2_lane_zt_f32_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.s - z29\.s}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svfloat32x2_t, svuint8_t, z28, + svluti2_lane_zt_f32_x2 (0, z16, 3), + svluti2_lane_zt_f32_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.s - z25\.s}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svfloat32x2_t, svuint8_t, z24, + svluti2_lane_zt_f32_x2 (0, z23, 1), + svluti2_lane_zt_f32_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ad08c8407848ed28e043491f9e9d172a6275f204 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_f32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svfloat32x4_t, svuint8_t, z1, + svluti2_lane_zt_f32_x4 (0, z0, 0), + svluti2_lane_zt_f32_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svfloat32x4_t, svuint8_t, z18, + svluti2_lane_zt_f32_x4 (0, z5, 3), + svluti2_lane_zt_f32_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.s - z27\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svfloat32x4_t, svuint8_t, z24, + svluti2_lane_zt_f32_x4 (0, z7, 2), + svluti2_lane_zt_f32_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svfloat32x4_t, svuint8_t, z28, + svluti2_lane_zt_f32_x4 (0, z16, 1), + svluti2_lane_zt_f32_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svfloat32x4_t, svuint8_t, z24, + svluti2_lane_zt_f32_x4 (0, z23, 0), + svluti2_lane_zt_f32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..5d802cf24b8baa79bd37bf1f93506d4c3cb929f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint16_t, svuint8_t, z1, + svluti2_lane_zt_s16 (0, z0, 0), + svluti2_lane_zt_s16 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.h, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svint16_t, svuint8_t, z18, + svluti2_lane_zt_s16 (0, z5, 15), + svluti2_lane_zt_s16 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.h, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svint16_t, svuint8_t, z24, + svluti2_lane_zt_s16 (0, z7, 13), + svluti2_lane_zt_s16 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.h, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svint16_t, svuint8_t, z28, + svluti2_lane_zt_s16 (0, z16, 11), + svluti2_lane_zt_s16 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint16_t, svuint8_t, z24, + svluti2_lane_zt_s16 (0, z23, 1), + svluti2_lane_zt_s16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..35100f81604d014c75e857a39eff820d9a0ca169 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint16x2_t, svuint8_t, z1, + svluti2_lane_zt_s16_x2 (0, z0, 0), + svluti2_lane_zt_s16_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.h - z19\.h}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svint16x2_t, svuint8_t, z18, + svluti2_lane_zt_s16_x2 (0, z5, 7), + svluti2_lane_zt_s16_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.h - z25\.h}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svint16x2_t, svuint8_t, z24, + svluti2_lane_zt_s16_x2 (0, z7, 6), + svluti2_lane_zt_s16_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.h - z29\.h}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svint16x2_t, svuint8_t, z28, + svluti2_lane_zt_s16_x2 (0, z16, 3), + svluti2_lane_zt_s16_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.h - z25\.h}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint16x2_t, svuint8_t, z24, + svluti2_lane_zt_s16_x2 (0, z23, 1), + svluti2_lane_zt_s16_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..68700706479f0575c4f031ba4237b98a2a8d5db3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint16x4_t, svuint8_t, z1, + svluti2_lane_zt_s16_x4 (0, z0, 0), + svluti2_lane_zt_s16_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svint16x4_t, svuint8_t, z18, + svluti2_lane_zt_s16_x4 (0, z5, 3), + svluti2_lane_zt_s16_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.h - z27\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svint16x4_t, svuint8_t, z24, + svluti2_lane_zt_s16_x4 (0, z7, 2), + svluti2_lane_zt_s16_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svint16x4_t, svuint8_t, z28, + svluti2_lane_zt_s16_x4 (0, z16, 1), + svluti2_lane_zt_s16_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svint16x4_t, svuint8_t, z24, + svluti2_lane_zt_s16_x4 (0, z23, 0), + svluti2_lane_zt_s16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..5829dcbcea63b2748b40dc13b16f1dd93cab10b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint32_t, svuint8_t, z1, + svluti2_lane_zt_s32 (0, z0, 0), + svluti2_lane_zt_s32 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.s, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svint32_t, svuint8_t, z18, + svluti2_lane_zt_s32 (0, z5, 15), + svluti2_lane_zt_s32 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.s, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svint32_t, svuint8_t, z24, + svluti2_lane_zt_s32 (0, z7, 13), + svluti2_lane_zt_s32 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.s, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svint32_t, svuint8_t, z28, + svluti2_lane_zt_s32 (0, z16, 11), + svluti2_lane_zt_s32 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint32_t, svuint8_t, z24, + svluti2_lane_zt_s32 (0, z23, 1), + svluti2_lane_zt_s32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b28b607d71c24a99a57304a0c1534d9b322e42a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint32x2_t, svuint8_t, z1, + svluti2_lane_zt_s32_x2 (0, z0, 0), + svluti2_lane_zt_s32_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.s - z19\.s}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svint32x2_t, svuint8_t, z18, + svluti2_lane_zt_s32_x2 (0, z5, 7), + svluti2_lane_zt_s32_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.s - z25\.s}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svint32x2_t, svuint8_t, z24, + svluti2_lane_zt_s32_x2 (0, z7, 6), + svluti2_lane_zt_s32_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.s - z29\.s}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svint32x2_t, svuint8_t, z28, + svluti2_lane_zt_s32_x2 (0, z16, 3), + svluti2_lane_zt_s32_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.s - z25\.s}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint32x2_t, svuint8_t, z24, + svluti2_lane_zt_s32_x2 (0, z23, 1), + svluti2_lane_zt_s32_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9b3dc1d01a17094b60fa8c55693dfb4289dccee0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint32x4_t, svuint8_t, z1, + svluti2_lane_zt_s32_x4 (0, z0, 0), + svluti2_lane_zt_s32_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svint32x4_t, svuint8_t, z18, + svluti2_lane_zt_s32_x4 (0, z5, 3), + svluti2_lane_zt_s32_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.s - z27\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svint32x4_t, svuint8_t, z24, + svluti2_lane_zt_s32_x4 (0, z7, 2), + svluti2_lane_zt_s32_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svint32x4_t, svuint8_t, z28, + svluti2_lane_zt_s32_x4 (0, z16, 1), + svluti2_lane_zt_s32_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svint32x4_t, svuint8_t, z24, + svluti2_lane_zt_s32_x4 (0, z23, 0), + svluti2_lane_zt_s32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..5ff4671dfb173f96f2da7ea4445972ee8a9b5703 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.b, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint8_t, svuint8_t, z1, + svluti2_lane_zt_s8 (0, z0, 0), + svluti2_lane_zt_s8 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.b, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svint8_t, svuint8_t, z18, + svluti2_lane_zt_s8 (0, z5, 15), + svluti2_lane_zt_s8 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.b, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svint8_t, svuint8_t, z24, + svluti2_lane_zt_s8 (0, z7, 13), + svluti2_lane_zt_s8 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.b, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svint8_t, svuint8_t, z28, + svluti2_lane_zt_s8 (0, z16, 11), + svluti2_lane_zt_s8 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.b, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint8_t, svuint8_t, z24, + svluti2_lane_zt_s8 (0, z23, 1), + svluti2_lane_zt_s8 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a6ff0cdd73c32c0f82a33401086d35c0cee3d2e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint8x2_t, svuint8_t, z1, + svluti2_lane_zt_s8_x2 (0, z0, 0), + svluti2_lane_zt_s8_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.b - z19\.b}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svint8x2_t, svuint8_t, z18, + svluti2_lane_zt_s8_x2 (0, z5, 7), + svluti2_lane_zt_s8_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.b - z25\.b}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svint8x2_t, svuint8_t, z24, + svluti2_lane_zt_s8_x2 (0, z7, 6), + svluti2_lane_zt_s8_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.b - z29\.b}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svint8x2_t, svuint8_t, z28, + svluti2_lane_zt_s8_x2 (0, z16, 3), + svluti2_lane_zt_s8_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.b - z25\.b}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svint8x2_t, svuint8_t, z24, + svluti2_lane_zt_s8_x2 (0, z23, 1), + svluti2_lane_zt_s8_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..23dd23ed1afbbfab18c2cde6be7f00fa1ed92f55 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_s8_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svint8x4_t, svuint8_t, z1, + svluti2_lane_zt_s8_x4 (0, z0, 0), + svluti2_lane_zt_s8_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svint8x4_t, svuint8_t, z18, + svluti2_lane_zt_s8_x4 (0, z5, 3), + svluti2_lane_zt_s8_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.b - z27\.b}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svint8x4_t, svuint8_t, z24, + svluti2_lane_zt_s8_x4 (0, z7, 2), + svluti2_lane_zt_s8_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.b - z31\.b}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svint8x4_t, svuint8_t, z28, + svluti2_lane_zt_s8_x4 (0, z16, 1), + svluti2_lane_zt_s8_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.b - z27\.b}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svint8x4_t, svuint8_t, z24, + svluti2_lane_zt_s8_x4 (0, z23, 0), + svluti2_lane_zt_s8_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..56c91460448b8da15c4f3f30f3fbfb1005f5172f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint16_t, svuint8_t, z1, + svluti2_lane_zt_u16 (0, z0, 0), + svluti2_lane_zt_u16 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.h, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svuint16_t, svuint8_t, z18, + svluti2_lane_zt_u16 (0, z5, 15), + svluti2_lane_zt_u16 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.h, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svuint16_t, svuint8_t, z24, + svluti2_lane_zt_u16 (0, z7, 13), + svluti2_lane_zt_u16 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.h, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svuint16_t, svuint8_t, z28, + svluti2_lane_zt_u16 (0, z16, 11), + svluti2_lane_zt_u16 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint16_t, svuint8_t, z24, + svluti2_lane_zt_u16 (0, z23, 1), + svluti2_lane_zt_u16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..cda16d8a7d5f91157ffe23cc4bb1537d7a840865 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint16x2_t, svuint8_t, z1, + svluti2_lane_zt_u16_x2 (0, z0, 0), + svluti2_lane_zt_u16_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.h - z19\.h}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svuint16x2_t, svuint8_t, z18, + svluti2_lane_zt_u16_x2 (0, z5, 7), + svluti2_lane_zt_u16_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.h - z25\.h}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svuint16x2_t, svuint8_t, z24, + svluti2_lane_zt_u16_x2 (0, z7, 6), + svluti2_lane_zt_u16_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.h - z29\.h}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svuint16x2_t, svuint8_t, z28, + svluti2_lane_zt_u16_x2 (0, z16, 3), + svluti2_lane_zt_u16_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.h - z25\.h}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint16x2_t, svuint8_t, z24, + svluti2_lane_zt_u16_x2 (0, z23, 1), + svluti2_lane_zt_u16_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..deb5ca1e9d4383337f3dcc2879cf5ac5234ff601 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint16x4_t, svuint8_t, z1, + svluti2_lane_zt_u16_x4 (0, z0, 0), + svluti2_lane_zt_u16_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svuint16x4_t, svuint8_t, z18, + svluti2_lane_zt_u16_x4 (0, z5, 3), + svluti2_lane_zt_u16_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.h - z27\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svuint16x4_t, svuint8_t, z24, + svluti2_lane_zt_u16_x4 (0, z7, 2), + svluti2_lane_zt_u16_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svuint16x4_t, svuint8_t, z28, + svluti2_lane_zt_u16_x4 (0, z16, 1), + svluti2_lane_zt_u16_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svuint16x4_t, svuint8_t, z24, + svluti2_lane_zt_u16_x4 (0, z23, 0), + svluti2_lane_zt_u16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..f88d615d64b8dcda4b1a03d6c65aec6c139ff641 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint32_t, svuint8_t, z1, + svluti2_lane_zt_u32 (0, z0, 0), + svluti2_lane_zt_u32 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.s, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svuint32_t, svuint8_t, z18, + svluti2_lane_zt_u32 (0, z5, 15), + svluti2_lane_zt_u32 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.s, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svuint32_t, svuint8_t, z24, + svluti2_lane_zt_u32 (0, z7, 13), + svluti2_lane_zt_u32 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.s, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svuint32_t, svuint8_t, z28, + svluti2_lane_zt_u32 (0, z16, 11), + svluti2_lane_zt_u32 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint32_t, svuint8_t, z24, + svluti2_lane_zt_u32 (0, z23, 1), + svluti2_lane_zt_u32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..78d0159aaaa837baeb295b9264629b988875bbf6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint32x2_t, svuint8_t, z1, + svluti2_lane_zt_u32_x2 (0, z0, 0), + svluti2_lane_zt_u32_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.s - z19\.s}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svuint32x2_t, svuint8_t, z18, + svluti2_lane_zt_u32_x2 (0, z5, 7), + svluti2_lane_zt_u32_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.s - z25\.s}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svuint32x2_t, svuint8_t, z24, + svluti2_lane_zt_u32_x2 (0, z7, 6), + svluti2_lane_zt_u32_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.s - z29\.s}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svuint32x2_t, svuint8_t, z28, + svluti2_lane_zt_u32_x2 (0, z16, 3), + svluti2_lane_zt_u32_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.s - z25\.s}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint32x2_t, svuint8_t, z24, + svluti2_lane_zt_u32_x2 (0, z23, 1), + svluti2_lane_zt_u32_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f7e8b633e529397c37cd4d4240a393c835c4a95b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint32x4_t, svuint8_t, z1, + svluti2_lane_zt_u32_x4 (0, z0, 0), + svluti2_lane_zt_u32_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svuint32x4_t, svuint8_t, z18, + svluti2_lane_zt_u32_x4 (0, z5, 3), + svluti2_lane_zt_u32_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.s - z27\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svuint32x4_t, svuint8_t, z24, + svluti2_lane_zt_u32_x4 (0, z7, 2), + svluti2_lane_zt_u32_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svuint32x4_t, svuint8_t, z28, + svluti2_lane_zt_u32_x4 (0, z16, 1), + svluti2_lane_zt_u32_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svuint32x4_t, svuint8_t, z24, + svluti2_lane_zt_u32_x4 (0, z23, 0), + svluti2_lane_zt_u32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..1cef4d53039c088cc721d8d4338652b5e52dbb23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 z1\.b, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint8_t, svuint8_t, z1, + svluti2_lane_zt_u8 (0, z0, 0), + svluti2_lane_zt_u8 (0, z0, 0)) + +/* +** luti2_z18_z5_15: +** luti2 z18\.b, zt0, z5\[15\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_15, svuint8_t, svuint8_t, z18, + svluti2_lane_zt_u8 (0, z5, 15), + svluti2_lane_zt_u8 (0, z5, 15)) + +/* +** luti2_z24_z7_13: +** luti2 z24\.b, zt0, z7\[13\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_13, svuint8_t, svuint8_t, z24, + svluti2_lane_zt_u8 (0, z7, 13), + svluti2_lane_zt_u8 (0, z7, 13)) + +/* +** luti2_z28_z16_11: +** luti2 z28\.b, zt0, z16\[11\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_11, svuint8_t, svuint8_t, z28, + svluti2_lane_zt_u8 (0, z16, 11), + svluti2_lane_zt_u8 (0, z16, 11)) + +/* +** luti2_z24_z23_1: +** luti2 z24\.b, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint8_t, svuint8_t, z24, + svluti2_lane_zt_u8 (0, z23, 1), + svluti2_lane_zt_u8 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..73447ec78df35ffe2d281e0d2f63ae725945123d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint8x2_t, svuint8_t, z1, + svluti2_lane_zt_u8_x2 (0, z0, 0), + svluti2_lane_zt_u8_x2 (0, z0, 0)) + +/* +** luti2_z18_z5_7: +** luti2 {z18\.b - z19\.b}, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_7, svuint8x2_t, svuint8_t, z18, + svluti2_lane_zt_u8_x2 (0, z5, 7), + svluti2_lane_zt_u8_x2 (0, z5, 7)) + +/* +** luti2_z24_z7_6: +** luti2 {z24\.b - z25\.b}, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_6, svuint8x2_t, svuint8_t, z24, + svluti2_lane_zt_u8_x2 (0, z7, 6), + svluti2_lane_zt_u8_x2 (0, z7, 6)) + +/* +** luti2_z28_z16_3: +** luti2 {z28\.b - z29\.b}, zt0, z16\[3\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_3, svuint8x2_t, svuint8_t, z28, + svluti2_lane_zt_u8_x2 (0, z16, 3), + svluti2_lane_zt_u8_x2 (0, z16, 3)) + +/* +** luti2_z24_z23_1: +** luti2 {z24\.b - z25\.b}, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_1, svuint8x2_t, svuint8_t, z24, + svluti2_lane_zt_u8_x2 (0, z23, 1), + svluti2_lane_zt_u8_x2 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3f64c6144935da0268eca32f4ecf47fe4ff90cb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti2_u8_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti2_z1_z0_0: +** luti2 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z1_z0_0, svuint8x4_t, svuint8_t, z1, + svluti2_lane_zt_u8_x4 (0, z0, 0), + svluti2_lane_zt_u8_x4 (0, z0, 0)) + +/* +** luti2_z18_z5_3: +** luti2 {[^\n]+}, zt0, z5\[3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti2_z18_z5_3, svuint8x4_t, svuint8_t, z18, + svluti2_lane_zt_u8_x4 (0, z5, 3), + svluti2_lane_zt_u8_x4 (0, z5, 3)) + +/* +** luti2_z24_z7_2: +** luti2 {z24\.b - z27\.b}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z7_2, svuint8x4_t, svuint8_t, z24, + svluti2_lane_zt_u8_x4 (0, z7, 2), + svluti2_lane_zt_u8_x4 (0, z7, 2)) + +/* +** luti2_z28_z16_1: +** luti2 {z28\.b - z31\.b}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti2_z28_z16_1, svuint8x4_t, svuint8_t, z28, + svluti2_lane_zt_u8_x4 (0, z16, 1), + svluti2_lane_zt_u8_x4 (0, z16, 1)) + +/* +** luti2_z24_z23_0: +** luti2 {z24\.b - z27\.b}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti2_z24_z23_0, svuint8x4_t, svuint8_t, z24, + svluti2_lane_zt_u8_x4 (0, z23, 0), + svluti2_lane_zt_u8_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16.c new file mode 100644 index 0000000000000000000000000000000000000000..77d7b602f84112c32b9c5520026fd30eb830add0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16_t, svuint8_t, z1, + svluti4_lane_zt_bf16 (0, z0, 0), + svluti4_lane_zt_bf16 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.h, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svbfloat16_t, svuint8_t, z18, + svluti4_lane_zt_bf16 (0, z5, 7), + svluti4_lane_zt_bf16 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.h, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svbfloat16_t, svuint8_t, z24, + svluti4_lane_zt_bf16 (0, z7, 6), + svluti4_lane_zt_bf16 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.h, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svbfloat16_t, svuint8_t, z28, + svluti4_lane_zt_bf16 (0, z16, 4), + svluti4_lane_zt_bf16 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svbfloat16_t, svuint8_t, z24, + svluti4_lane_zt_bf16 (0, z23, 1), + svluti4_lane_zt_bf16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b86b020af43d96db86cb1ce480a853cf2597ea8f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16x2_t, svuint8_t, z1, + svluti4_lane_zt_bf16_x2 (0, z0, 0), + svluti4_lane_zt_bf16_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.h - z19\.h}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svbfloat16x2_t, svuint8_t, z18, + svluti4_lane_zt_bf16_x2 (0, z5, 3), + svluti4_lane_zt_bf16_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.h - z25\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svbfloat16x2_t, svuint8_t, z24, + svluti4_lane_zt_bf16_x2 (0, z7, 2), + svluti4_lane_zt_bf16_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z29\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svbfloat16x2_t, svuint8_t, z28, + svluti4_lane_zt_bf16_x2 (0, z16, 1), + svluti4_lane_zt_bf16_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z25\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svbfloat16x2_t, svuint8_t, z24, + svluti4_lane_zt_bf16_x2 (0, z23, 0), + svluti4_lane_zt_bf16_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..148db5daca6fa240668638754b77531ac4dce1ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_bf16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svbfloat16x4_t, svuint8_t, z1, + svluti4_lane_zt_bf16_x4 (0, z0, 0), + svluti4_lane_zt_bf16_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svbfloat16x4_t, svuint8_t, z18, + svluti4_lane_zt_bf16_x4 (0, z5, 1), + svluti4_lane_zt_bf16_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.h - z27\.h}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svbfloat16x4_t, svuint8_t, z24, + svluti4_lane_zt_bf16_x4 (0, z7, 0), + svluti4_lane_zt_bf16_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svbfloat16x4_t, svuint8_t, z28, + svluti4_lane_zt_bf16_x4 (0, z16, 1), + svluti4_lane_zt_bf16_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svbfloat16x4_t, svuint8_t, z24, + svluti4_lane_zt_bf16_x4 (0, z23, 0), + svluti4_lane_zt_bf16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16.c new file mode 100644 index 0000000000000000000000000000000000000000..b8f6e069b3a4b2b5da7617b3f785b8e7e5caf148 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16_t, svuint8_t, z1, + svluti4_lane_zt_f16 (0, z0, 0), + svluti4_lane_zt_f16 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.h, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svfloat16_t, svuint8_t, z18, + svluti4_lane_zt_f16 (0, z5, 7), + svluti4_lane_zt_f16 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.h, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svfloat16_t, svuint8_t, z24, + svluti4_lane_zt_f16 (0, z7, 6), + svluti4_lane_zt_f16 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.h, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svfloat16_t, svuint8_t, z28, + svluti4_lane_zt_f16 (0, z16, 4), + svluti4_lane_zt_f16 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svfloat16_t, svuint8_t, z24, + svluti4_lane_zt_f16 (0, z23, 1), + svluti4_lane_zt_f16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b6c5f951c845352f96da3d1bf22ceb84290609c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16x2_t, svuint8_t, z1, + svluti4_lane_zt_f16_x2 (0, z0, 0), + svluti4_lane_zt_f16_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.h - z19\.h}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svfloat16x2_t, svuint8_t, z18, + svluti4_lane_zt_f16_x2 (0, z5, 3), + svluti4_lane_zt_f16_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.h - z25\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svfloat16x2_t, svuint8_t, z24, + svluti4_lane_zt_f16_x2 (0, z7, 2), + svluti4_lane_zt_f16_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z29\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svfloat16x2_t, svuint8_t, z28, + svluti4_lane_zt_f16_x2 (0, z16, 1), + svluti4_lane_zt_f16_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z25\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svfloat16x2_t, svuint8_t, z24, + svluti4_lane_zt_f16_x2 (0, z23, 0), + svluti4_lane_zt_f16_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8cbad8b99b429cb65a6d77e5e9a82ec00c22f789 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat16x4_t, svuint8_t, z1, + svluti4_lane_zt_f16_x4 (0, z0, 0), + svluti4_lane_zt_f16_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svfloat16x4_t, svuint8_t, z18, + svluti4_lane_zt_f16_x4 (0, z5, 1), + svluti4_lane_zt_f16_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.h - z27\.h}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svfloat16x4_t, svuint8_t, z24, + svluti4_lane_zt_f16_x4 (0, z7, 0), + svluti4_lane_zt_f16_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svfloat16x4_t, svuint8_t, z28, + svluti4_lane_zt_f16_x4 (0, z16, 1), + svluti4_lane_zt_f16_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svfloat16x4_t, svuint8_t, z24, + svluti4_lane_zt_f16_x4 (0, z23, 0), + svluti4_lane_zt_f16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..3dcc69cb13b002f6b31b7111fac5c733e070c6b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32_t, svuint8_t, z1, + svluti4_lane_zt_f32 (0, z0, 0), + svluti4_lane_zt_f32 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.s, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svfloat32_t, svuint8_t, z18, + svluti4_lane_zt_f32 (0, z5, 7), + svluti4_lane_zt_f32 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.s, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svfloat32_t, svuint8_t, z24, + svluti4_lane_zt_f32 (0, z7, 6), + svluti4_lane_zt_f32 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.s, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svfloat32_t, svuint8_t, z28, + svluti4_lane_zt_f32 (0, z16, 4), + svluti4_lane_zt_f32 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svfloat32_t, svuint8_t, z24, + svluti4_lane_zt_f32 (0, z23, 1), + svluti4_lane_zt_f32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7f9705843151ce560484d65a2e799144a6f40cc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32x2_t, svuint8_t, z1, + svluti4_lane_zt_f32_x2 (0, z0, 0), + svluti4_lane_zt_f32_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.s - z19\.s}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svfloat32x2_t, svuint8_t, z18, + svluti4_lane_zt_f32_x2 (0, z5, 3), + svluti4_lane_zt_f32_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.s - z25\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svfloat32x2_t, svuint8_t, z24, + svluti4_lane_zt_f32_x2 (0, z7, 2), + svluti4_lane_zt_f32_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z29\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svfloat32x2_t, svuint8_t, z28, + svluti4_lane_zt_f32_x2 (0, z16, 1), + svluti4_lane_zt_f32_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z25\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svfloat32x2_t, svuint8_t, z24, + svluti4_lane_zt_f32_x2 (0, z23, 0), + svluti4_lane_zt_f32_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c32c67489a1d0c575d97121853b940926558d4a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_f32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svfloat32x4_t, svuint8_t, z1, + svluti4_lane_zt_f32_x4 (0, z0, 0), + svluti4_lane_zt_f32_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svfloat32x4_t, svuint8_t, z18, + svluti4_lane_zt_f32_x4 (0, z5, 1), + svluti4_lane_zt_f32_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.s - z27\.s}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svfloat32x4_t, svuint8_t, z24, + svluti4_lane_zt_f32_x4 (0, z7, 0), + svluti4_lane_zt_f32_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svfloat32x4_t, svuint8_t, z28, + svluti4_lane_zt_f32_x4 (0, z16, 1), + svluti4_lane_zt_f32_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svfloat32x4_t, svuint8_t, z24, + svluti4_lane_zt_f32_x4 (0, z23, 0), + svluti4_lane_zt_f32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..792cf77363114e8438af68c745935ae360c1b687 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint16_t, svuint8_t, z1, + svluti4_lane_zt_s16 (0, z0, 0), + svluti4_lane_zt_s16 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.h, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svint16_t, svuint8_t, z18, + svluti4_lane_zt_s16 (0, z5, 7), + svluti4_lane_zt_s16 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.h, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svint16_t, svuint8_t, z24, + svluti4_lane_zt_s16 (0, z7, 6), + svluti4_lane_zt_s16 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.h, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svint16_t, svuint8_t, z28, + svluti4_lane_zt_s16 (0, z16, 4), + svluti4_lane_zt_s16 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svint16_t, svuint8_t, z24, + svluti4_lane_zt_s16 (0, z23, 1), + svluti4_lane_zt_s16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d51852bf73d6695fb0eeb1ca34076763179b1d2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint16x2_t, svuint8_t, z1, + svluti4_lane_zt_s16_x2 (0, z0, 0), + svluti4_lane_zt_s16_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.h - z19\.h}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svint16x2_t, svuint8_t, z18, + svluti4_lane_zt_s16_x2 (0, z5, 3), + svluti4_lane_zt_s16_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.h - z25\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svint16x2_t, svuint8_t, z24, + svluti4_lane_zt_s16_x2 (0, z7, 2), + svluti4_lane_zt_s16_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z29\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svint16x2_t, svuint8_t, z28, + svluti4_lane_zt_s16_x2 (0, z16, 1), + svluti4_lane_zt_s16_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z25\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svint16x2_t, svuint8_t, z24, + svluti4_lane_zt_s16_x2 (0, z23, 0), + svluti4_lane_zt_s16_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d964a24cf5c88cf4de55cfe19a0d9f7fd2bd8256 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint16x4_t, svuint8_t, z1, + svluti4_lane_zt_s16_x4 (0, z0, 0), + svluti4_lane_zt_s16_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svint16x4_t, svuint8_t, z18, + svluti4_lane_zt_s16_x4 (0, z5, 1), + svluti4_lane_zt_s16_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.h - z27\.h}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svint16x4_t, svuint8_t, z24, + svluti4_lane_zt_s16_x4 (0, z7, 0), + svluti4_lane_zt_s16_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svint16x4_t, svuint8_t, z28, + svluti4_lane_zt_s16_x4 (0, z16, 1), + svluti4_lane_zt_s16_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svint16x4_t, svuint8_t, z24, + svluti4_lane_zt_s16_x4 (0, z23, 0), + svluti4_lane_zt_s16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..36390adfe982a23ea667504185378d2f215bbfcf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint32_t, svuint8_t, z1, + svluti4_lane_zt_s32 (0, z0, 0), + svluti4_lane_zt_s32 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.s, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svint32_t, svuint8_t, z18, + svluti4_lane_zt_s32 (0, z5, 7), + svluti4_lane_zt_s32 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.s, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svint32_t, svuint8_t, z24, + svluti4_lane_zt_s32 (0, z7, 6), + svluti4_lane_zt_s32 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.s, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svint32_t, svuint8_t, z28, + svluti4_lane_zt_s32 (0, z16, 4), + svluti4_lane_zt_s32 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svint32_t, svuint8_t, z24, + svluti4_lane_zt_s32 (0, z23, 1), + svluti4_lane_zt_s32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7345a1dbe8a93478cf599f437fc1dd03966180a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint32x2_t, svuint8_t, z1, + svluti4_lane_zt_s32_x2 (0, z0, 0), + svluti4_lane_zt_s32_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.s - z19\.s}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svint32x2_t, svuint8_t, z18, + svluti4_lane_zt_s32_x2 (0, z5, 3), + svluti4_lane_zt_s32_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.s - z25\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svint32x2_t, svuint8_t, z24, + svluti4_lane_zt_s32_x2 (0, z7, 2), + svluti4_lane_zt_s32_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z29\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svint32x2_t, svuint8_t, z28, + svluti4_lane_zt_s32_x2 (0, z16, 1), + svluti4_lane_zt_s32_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z25\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svint32x2_t, svuint8_t, z24, + svluti4_lane_zt_s32_x2 (0, z23, 0), + svluti4_lane_zt_s32_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..13844ce79675c674f23b5979269428dd8b59cdbf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint32x4_t, svuint8_t, z1, + svluti4_lane_zt_s32_x4 (0, z0, 0), + svluti4_lane_zt_s32_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svint32x4_t, svuint8_t, z18, + svluti4_lane_zt_s32_x4 (0, z5, 1), + svluti4_lane_zt_s32_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.s - z27\.s}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svint32x4_t, svuint8_t, z24, + svluti4_lane_zt_s32_x4 (0, z7, 0), + svluti4_lane_zt_s32_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svint32x4_t, svuint8_t, z28, + svluti4_lane_zt_s32_x4 (0, z16, 1), + svluti4_lane_zt_s32_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svint32x4_t, svuint8_t, z24, + svluti4_lane_zt_s32_x4 (0, z23, 0), + svluti4_lane_zt_s32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..9c2573a528960ef8ef547730cfc138dd0d65c774 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.b, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint8_t, svuint8_t, z1, + svluti4_lane_zt_s8 (0, z0, 0), + svluti4_lane_zt_s8 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.b, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svint8_t, svuint8_t, z18, + svluti4_lane_zt_s8 (0, z5, 7), + svluti4_lane_zt_s8 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.b, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svint8_t, svuint8_t, z24, + svluti4_lane_zt_s8 (0, z7, 6), + svluti4_lane_zt_s8 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.b, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svint8_t, svuint8_t, z28, + svluti4_lane_zt_s8 (0, z16, 4), + svluti4_lane_zt_s8 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.b, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svint8_t, svuint8_t, z24, + svluti4_lane_zt_s8 (0, z23, 1), + svluti4_lane_zt_s8 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fcb74a69a50fc29564655b1ec749992450df7301 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_s8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svint8x2_t, svuint8_t, z1, + svluti4_lane_zt_s8_x2 (0, z0, 0), + svluti4_lane_zt_s8_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.b - z19\.b}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svint8x2_t, svuint8_t, z18, + svluti4_lane_zt_s8_x2 (0, z5, 3), + svluti4_lane_zt_s8_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.b - z25\.b}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svint8x2_t, svuint8_t, z24, + svluti4_lane_zt_s8_x2 (0, z7, 2), + svluti4_lane_zt_s8_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.b - z29\.b}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svint8x2_t, svuint8_t, z28, + svluti4_lane_zt_s8_x2 (0, z16, 1), + svluti4_lane_zt_s8_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.b - z25\.b}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svint8x2_t, svuint8_t, z24, + svluti4_lane_zt_s8_x2 (0, z23, 0), + svluti4_lane_zt_s8_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..c542051158adb2ecbe983044d623e21a2fa3a8cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.h, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint16_t, svuint8_t, z1, + svluti4_lane_zt_u16 (0, z0, 0), + svluti4_lane_zt_u16 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.h, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svuint16_t, svuint8_t, z18, + svluti4_lane_zt_u16 (0, z5, 7), + svluti4_lane_zt_u16 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.h, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svuint16_t, svuint8_t, z24, + svluti4_lane_zt_u16 (0, z7, 6), + svluti4_lane_zt_u16 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.h, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svuint16_t, svuint8_t, z28, + svluti4_lane_zt_u16 (0, z16, 4), + svluti4_lane_zt_u16 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.h, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svuint16_t, svuint8_t, z24, + svluti4_lane_zt_u16 (0, z23, 1), + svluti4_lane_zt_u16 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..df22fd13527ee5c467743f7c0207af7d25a6ff76 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint16x2_t, svuint8_t, z1, + svluti4_lane_zt_u16_x2 (0, z0, 0), + svluti4_lane_zt_u16_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.h - z19\.h}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svuint16x2_t, svuint8_t, z18, + svluti4_lane_zt_u16_x2 (0, z5, 3), + svluti4_lane_zt_u16_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.h - z25\.h}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svuint16x2_t, svuint8_t, z24, + svluti4_lane_zt_u16_x2 (0, z7, 2), + svluti4_lane_zt_u16_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z29\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svuint16x2_t, svuint8_t, z28, + svluti4_lane_zt_u16_x2 (0, z16, 1), + svluti4_lane_zt_u16_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z25\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svuint16x2_t, svuint8_t, z24, + svluti4_lane_zt_u16_x2 (0, z23, 0), + svluti4_lane_zt_u16_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..06180f6c3c4497fb0f36e9ff8f3e1124005d58c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u16_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint16x4_t, svuint8_t, z1, + svluti4_lane_zt_u16_x4 (0, z0, 0), + svluti4_lane_zt_u16_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svuint16x4_t, svuint8_t, z18, + svluti4_lane_zt_u16_x4 (0, z5, 1), + svluti4_lane_zt_u16_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.h - z27\.h}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svuint16x4_t, svuint8_t, z24, + svluti4_lane_zt_u16_x4 (0, z7, 0), + svluti4_lane_zt_u16_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.h - z31\.h}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svuint16x4_t, svuint8_t, z28, + svluti4_lane_zt_u16_x4 (0, z16, 1), + svluti4_lane_zt_u16_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.h - z27\.h}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svuint16x4_t, svuint8_t, z24, + svluti4_lane_zt_u16_x4 (0, z23, 0), + svluti4_lane_zt_u16_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..6cba7fecd6e5d4c3fbcfdeba6e191d1d4e323286 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.s, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint32_t, svuint8_t, z1, + svluti4_lane_zt_u32 (0, z0, 0), + svluti4_lane_zt_u32 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.s, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svuint32_t, svuint8_t, z18, + svluti4_lane_zt_u32 (0, z5, 7), + svluti4_lane_zt_u32 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.s, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svuint32_t, svuint8_t, z24, + svluti4_lane_zt_u32 (0, z7, 6), + svluti4_lane_zt_u32 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.s, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svuint32_t, svuint8_t, z28, + svluti4_lane_zt_u32 (0, z16, 4), + svluti4_lane_zt_u32 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.s, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svuint32_t, svuint8_t, z24, + svluti4_lane_zt_u32 (0, z23, 1), + svluti4_lane_zt_u32 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..14bba18b35ca8c8184b00c250b345a4838b60d5b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint32x2_t, svuint8_t, z1, + svluti4_lane_zt_u32_x2 (0, z0, 0), + svluti4_lane_zt_u32_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.s - z19\.s}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svuint32x2_t, svuint8_t, z18, + svluti4_lane_zt_u32_x2 (0, z5, 3), + svluti4_lane_zt_u32_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.s - z25\.s}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svuint32x2_t, svuint8_t, z24, + svluti4_lane_zt_u32_x2 (0, z7, 2), + svluti4_lane_zt_u32_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z29\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svuint32x2_t, svuint8_t, z28, + svluti4_lane_zt_u32_x2 (0, z16, 1), + svluti4_lane_zt_u32_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z25\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svuint32x2_t, svuint8_t, z24, + svluti4_lane_zt_u32_x2 (0, z23, 0), + svluti4_lane_zt_u32_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..841b9c0f15b1e0671c14dece20547e0f9dca98fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u32_x4.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint32x4_t, svuint8_t, z1, + svluti4_lane_zt_u32_x4 (0, z0, 0), + svluti4_lane_zt_u32_x4 (0, z0, 0)) + +/* +** luti4_z18_z5_1: +** luti4 {[^\n]+}, zt0, z5\[1\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_1, svuint32x4_t, svuint8_t, z18, + svluti4_lane_zt_u32_x4 (0, z5, 1), + svluti4_lane_zt_u32_x4 (0, z5, 1)) + +/* +** luti4_z24_z7_0: +** luti4 {z24\.s - z27\.s}, zt0, z7\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_0, svuint32x4_t, svuint8_t, z24, + svluti4_lane_zt_u32_x4 (0, z7, 0), + svluti4_lane_zt_u32_x4 (0, z7, 0)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.s - z31\.s}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svuint32x4_t, svuint8_t, z28, + svluti4_lane_zt_u32_x4 (0, z16, 1), + svluti4_lane_zt_u32_x4 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.s - z27\.s}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svuint32x4_t, svuint8_t, z24, + svluti4_lane_zt_u32_x4 (0, z23, 0), + svluti4_lane_zt_u32_x4 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..95247956ef5b8f5d0ccf1867051c94279e4a3a38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 z1\.b, zt0, z0\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint8_t, svuint8_t, z1, + svluti4_lane_zt_u8 (0, z0, 0), + svluti4_lane_zt_u8 (0, z0, 0)) + +/* +** luti4_z18_z5_7: +** luti4 z18\.b, zt0, z5\[7\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_7, svuint8_t, svuint8_t, z18, + svluti4_lane_zt_u8 (0, z5, 7), + svluti4_lane_zt_u8 (0, z5, 7)) + +/* +** luti4_z24_z7_6: +** luti4 z24\.b, zt0, z7\[6\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_6, svuint8_t, svuint8_t, z24, + svluti4_lane_zt_u8 (0, z7, 6), + svluti4_lane_zt_u8 (0, z7, 6)) + +/* +** luti4_z28_z16_4: +** luti4 z28\.b, zt0, z16\[4\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_4, svuint8_t, svuint8_t, z28, + svluti4_lane_zt_u8 (0, z16, 4), + svluti4_lane_zt_u8 (0, z16, 4)) + +/* +** luti4_z24_z23_1: +** luti4 z24\.b, zt0, z23\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_1, svuint8_t, svuint8_t, z24, + svluti4_lane_zt_u8 (0, z23, 1), + svluti4_lane_zt_u8 (0, z23, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ce7a84010e5972a95a10fcc76732a2cebf6c1b13 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/luti4_u8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** luti4_z1_z0_0: +** luti4 {[^\n]+}, zt0, z0\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (luti4_z1_z0_0, svuint8x2_t, svuint8_t, z1, + svluti4_lane_zt_u8_x2 (0, z0, 0), + svluti4_lane_zt_u8_x2 (0, z0, 0)) + +/* +** luti4_z18_z5_3: +** luti4 {z18\.b - z19\.b}, zt0, z5\[3\] +** ret +*/ +TEST_XN_SINGLE (luti4_z18_z5_3, svuint8x2_t, svuint8_t, z18, + svluti4_lane_zt_u8_x2 (0, z5, 3), + svluti4_lane_zt_u8_x2 (0, z5, 3)) + +/* +** luti4_z24_z7_2: +** luti4 {z24\.b - z25\.b}, zt0, z7\[2\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z7_2, svuint8x2_t, svuint8_t, z24, + svluti4_lane_zt_u8_x2 (0, z7, 2), + svluti4_lane_zt_u8_x2 (0, z7, 2)) + +/* +** luti4_z28_z16_1: +** luti4 {z28\.b - z29\.b}, zt0, z16\[1\] +** ret +*/ +TEST_XN_SINGLE (luti4_z28_z16_1, svuint8x2_t, svuint8_t, z28, + svluti4_lane_zt_u8_x2 (0, z16, 1), + svluti4_lane_zt_u8_x2 (0, z16, 1)) + +/* +** luti4_z24_z23_0: +** luti4 {z24\.b - z25\.b}, zt0, z23\[0\] +** ret +*/ +TEST_XN_SINGLE (luti4_z24_z23_0, svuint8x2_t, svuint8_t, z24, + svluti4_lane_zt_u8_x2 (0, z23, 0), + svluti4_lane_zt_u8_x2 (0, z23, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..808e52899274398bc96113882ff118f3b2891272 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat16x2_t, z0, + svmax_f16_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat16x2_t, z0, + svmax_f16_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.h - z29\.h} +** | +** fmax [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat16x2_t, z0, + svmax_f16_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** fmax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat16x2_t, z18, + svmax_f16_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svfloat16x2_t, z23, + svmax_f16_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** fmax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat16x2_t, z28, + svmax_f16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat16x2_t, z0, + svmax_f16_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** fmax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat16x2_t, z4, + svmax_f16_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24, + svmax_single_f16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** fmax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24, + svmax_single_f16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24, + svmax_single_f16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1, + svmax_single_f16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1, + svmax_single_f16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** fmax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18, + svmax_single_f16_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svmax_single_f16_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat16x2_t, svfloat16_t, + z0 = svmax_single_f16_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24, + svmax_single_f16_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..61cab28a18ed383bad50c0c28b5b49d63c27ba64 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat16x4_t, z0, + svmax_f16_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat16x4_t, z0, + svmax_f16_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.h - z31\.h} +** | +** fmax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat16x4_t, z0, + svmax_f16_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat16x4_t, z18, + svmax_f16_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svfloat16x4_t, z23, + svmax_f16_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** fmax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat16x4_t, z28, + svmax_f16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat16x4_t, z0, + svmax_f16_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fmax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat16x4_t, z4, + svmax_f16_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24, + svmax_single_f16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** fmax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24, + svmax_single_f16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24, + svmax_single_f16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1, + svmax_single_f16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1, + svmax_single_f16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18, + svmax_single_f16_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svmax_single_f16_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat16x4_t, svfloat16_t, + z0 = svmax_single_f16_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24, + svmax_single_f16_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d53878a7a0047a7fa16a010df912ef98d63825b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat32x2_t, z0, + svmax_f32_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat32x2_t, z0, + svmax_f32_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.s - z29\.s} +** | +** fmax [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat32x2_t, z0, + svmax_f32_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** fmax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat32x2_t, z18, + svmax_f32_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svfloat32x2_t, z23, + svmax_f32_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** fmax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat32x2_t, z28, + svmax_f32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat32x2_t, z0, + svmax_f32_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** fmax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat32x2_t, z4, + svmax_f32_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24, + svmax_single_f32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** fmax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24, + svmax_single_f32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24, + svmax_single_f32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1, + svmax_single_f32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1, + svmax_single_f32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** fmax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18, + svmax_single_f32_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svmax_single_f32_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat32x2_t, svfloat32_t, + z0 = svmax_single_f32_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24, + svmax_single_f32_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d0a6598d4881384ad6d6f97e6c837e497683e580 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat32x4_t, z0, + svmax_f32_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat32x4_t, z0, + svmax_f32_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.s - z31\.s} +** | +** fmax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat32x4_t, z0, + svmax_f32_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat32x4_t, z18, + svmax_f32_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svfloat32x4_t, z23, + svmax_f32_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** fmax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat32x4_t, z28, + svmax_f32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat32x4_t, z0, + svmax_f32_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fmax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat32x4_t, z4, + svmax_f32_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24, + svmax_single_f32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** fmax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24, + svmax_single_f32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24, + svmax_single_f32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1, + svmax_single_f32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1, + svmax_single_f32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18, + svmax_single_f32_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svmax_single_f32_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat32x4_t, svfloat32_t, + z0 = svmax_single_f32_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24, + svmax_single_f32_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a51ceac7414a0e39e83cbe28f4db2c1d35b17192 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat64x2_t, z0, + svmax_f64_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat64x2_t, z0, + svmax_f64_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.d - z29\.d} +** | +** fmax [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat64x2_t, z0, + svmax_f64_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** fmax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat64x2_t, z18, + svmax_f64_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svfloat64x2_t, z23, + svmax_f64_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** fmax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat64x2_t, z28, + svmax_f64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat64x2_t, z0, + svmax_f64_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** fmax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat64x2_t, z4, + svmax_f64_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24, + svmax_single_f64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** fmax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24, + svmax_single_f64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24, + svmax_single_f64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1, + svmax_single_f64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1, + svmax_single_f64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** fmax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18, + svmax_single_f64_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svmax_single_f64_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat64x2_t, svfloat64_t, + z0 = svmax_single_f64_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24, + svmax_single_f64_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..712b14e7ff391a7275d1054b4ed4bcdea93cbe30 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_f64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svfloat64x4_t, z0, + svmax_f64_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svfloat64x4_t, z0, + svmax_f64_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.d - z31\.d} +** | +** fmax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svfloat64x4_t, z0, + svmax_f64_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svfloat64x4_t, z18, + svmax_f64_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svfloat64x4_t, z23, + svmax_f64_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** fmax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svfloat64x4_t, z28, + svmax_f64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svfloat64x4_t, z0, + svmax_f64_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fmax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svfloat64x4_t, z4, + svmax_f64_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24, + svmax_single_f64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** fmax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24, + svmax_single_f64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24, + svmax_single_f64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1, + svmax_single_f64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1, + svmax_single_f64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmax [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18, + svmax_single_f64_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** fmax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svmax_single_f64_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** fmax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svfloat64x4_t, svfloat64_t, + z0 = svmax_single_f64_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24, + svmax_single_f64_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..34b4cdd1eaf07a34823e63edab196398398e7441 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint16x2_t, z0, + svmax_s16_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint16x2_t, z0, + svmax_s16_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.h - z29\.h} +** | +** smax [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint16x2_t, z0, + svmax_s16_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** smax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z18_z18_z4, svint16x2_t, z18, + svmax_s16_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svint16x2_t, z23, + svmax_s16_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** smax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint16x2_t, z28, + svmax_s16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (max_z0_z0_z18, svint16x2_t, z0, + svmax_s16_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** smax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint16x2_t, z4, + svmax_s16_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint16x2_t, svint16_t, z24, + svmax_single_s16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** smax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint16x2_t, svint16_t, z24, + svmax_single_s16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint16x2_t, svint16_t, z24, + svmax_single_s16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint16x2_t, svint16_t, z1, + svmax_single_s16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint16x2_t, svint16_t, z1, + svmax_single_s16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** smax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint16x2_t, svint16_t, z18, + svmax_single_s16_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint16x2_t, svint16_t, + z0_res = svmax_single_s16_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint16x2_t, svint16_t, + z0 = svmax_single_s16_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint16x2_t, svint16_t, z24, + svmax_single_s16_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3b44ec822a28cd0a798948c4aee9d30db8e02e70 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint16x4_t, z0, + svmax_s16_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint16x4_t, z0, + svmax_s16_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.h - z31\.h} +** | +** smax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint16x4_t, z0, + svmax_s16_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svint16x4_t, z18, + svmax_s16_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svint16x4_t, z23, + svmax_s16_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** smax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint16x4_t, z28, + svmax_s16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svint16x4_t, z0, + svmax_s16_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** smax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint16x4_t, z4, + svmax_s16_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint16x4_t, svint16_t, z24, + svmax_single_s16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** smax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint16x4_t, svint16_t, z24, + svmax_single_s16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint16x4_t, svint16_t, z24, + svmax_single_s16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint16x4_t, svint16_t, z1, + svmax_single_s16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint16x4_t, svint16_t, z1, + svmax_single_s16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint16x4_t, svint16_t, z18, + svmax_single_s16_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint16x4_t, svint16_t, + z0_res = svmax_single_s16_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint16x4_t, svint16_t, + z0 = svmax_single_s16_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint16x4_t, svint16_t, z24, + svmax_single_s16_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..bb9fc22b8e26d614db26e5713eb934593a8404a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint32x2_t, z0, + svmax_s32_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint32x2_t, z0, + svmax_s32_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.s - z29\.s} +** | +** smax [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint32x2_t, z0, + svmax_s32_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** smax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z18_z18_z4, svint32x2_t, z18, + svmax_s32_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svint32x2_t, z23, + svmax_s32_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** smax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint32x2_t, z28, + svmax_s32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (max_z0_z0_z18, svint32x2_t, z0, + svmax_s32_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** smax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint32x2_t, z4, + svmax_s32_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint32x2_t, svint32_t, z24, + svmax_single_s32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** smax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint32x2_t, svint32_t, z24, + svmax_single_s32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint32x2_t, svint32_t, z24, + svmax_single_s32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint32x2_t, svint32_t, z1, + svmax_single_s32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint32x2_t, svint32_t, z1, + svmax_single_s32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** smax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint32x2_t, svint32_t, z18, + svmax_single_s32_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint32x2_t, svint32_t, + z0_res = svmax_single_s32_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint32x2_t, svint32_t, + z0 = svmax_single_s32_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint32x2_t, svint32_t, z24, + svmax_single_s32_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f3db66ae92e8897e029adf614c773b5797043d74 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint32x4_t, z0, + svmax_s32_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint32x4_t, z0, + svmax_s32_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.s - z31\.s} +** | +** smax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint32x4_t, z0, + svmax_s32_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svint32x4_t, z18, + svmax_s32_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svint32x4_t, z23, + svmax_s32_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** smax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint32x4_t, z28, + svmax_s32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svint32x4_t, z0, + svmax_s32_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** smax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint32x4_t, z4, + svmax_s32_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint32x4_t, svint32_t, z24, + svmax_single_s32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** smax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint32x4_t, svint32_t, z24, + svmax_single_s32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint32x4_t, svint32_t, z24, + svmax_single_s32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint32x4_t, svint32_t, z1, + svmax_single_s32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint32x4_t, svint32_t, z1, + svmax_single_s32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint32x4_t, svint32_t, z18, + svmax_single_s32_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint32x4_t, svint32_t, + z0_res = svmax_single_s32_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint32x4_t, svint32_t, + z0 = svmax_single_s32_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint32x4_t, svint32_t, z24, + svmax_single_s32_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..384dd6e72259dcbd0d8132663a21161c4739be19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint64x2_t, z0, + svmax_s64_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint64x2_t, z0, + svmax_s64_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.d - z29\.d} +** | +** smax [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint64x2_t, z0, + svmax_s64_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** smax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z18_z18_z4, svint64x2_t, z18, + svmax_s64_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svint64x2_t, z23, + svmax_s64_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** smax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint64x2_t, z28, + svmax_s64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (max_z0_z0_z18, svint64x2_t, z0, + svmax_s64_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** smax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint64x2_t, z4, + svmax_s64_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint64x2_t, svint64_t, z24, + svmax_single_s64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** smax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint64x2_t, svint64_t, z24, + svmax_single_s64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint64x2_t, svint64_t, z24, + svmax_single_s64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint64x2_t, svint64_t, z1, + svmax_single_s64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint64x2_t, svint64_t, z1, + svmax_single_s64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** smax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint64x2_t, svint64_t, z18, + svmax_single_s64_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint64x2_t, svint64_t, + z0_res = svmax_single_s64_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint64x2_t, svint64_t, + z0 = svmax_single_s64_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint64x2_t, svint64_t, z24, + svmax_single_s64_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..666c79b293c14196ea22666d906a9b0733063f4f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint64x4_t, z0, + svmax_s64_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint64x4_t, z0, + svmax_s64_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.d - z31\.d} +** | +** smax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint64x4_t, z0, + svmax_s64_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svint64x4_t, z18, + svmax_s64_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svint64x4_t, z23, + svmax_s64_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** smax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint64x4_t, z28, + svmax_s64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svint64x4_t, z0, + svmax_s64_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** smax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint64x4_t, z4, + svmax_s64_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint64x4_t, svint64_t, z24, + svmax_single_s64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** smax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint64x4_t, svint64_t, z24, + svmax_single_s64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint64x4_t, svint64_t, z24, + svmax_single_s64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint64x4_t, svint64_t, z1, + svmax_single_s64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint64x4_t, svint64_t, z1, + svmax_single_s64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint64x4_t, svint64_t, z18, + svmax_single_s64_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint64x4_t, svint64_t, + z0_res = svmax_single_s64_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint64x4_t, svint64_t, + z0 = svmax_single_s64_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint64x4_t, svint64_t, z24, + svmax_single_s64_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..76144b868e39173cbd94977635f5e3f22e11a494 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint8x2_t, z0, + svmax_s8_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint8x2_t, z0, + svmax_s8_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.b - z29\.b} +** | +** smax [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint8x2_t, z0, + svmax_s8_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** smax {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z18_z18_z4, svint8x2_t, z18, + svmax_s8_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svint8x2_t, z23, + svmax_s8_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** smax {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint8x2_t, z28, + svmax_s8_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (max_z0_z0_z18, svint8x2_t, z0, + svmax_s8_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** smax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint8x2_t, z4, + svmax_s8_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint8x2_t, svint8_t, z24, + svmax_single_s8_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** smax {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint8x2_t, svint8_t, z24, + svmax_single_s8_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint8x2_t, svint8_t, z24, + svmax_single_s8_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint8x2_t, svint8_t, z1, + svmax_single_s8_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint8x2_t, svint8_t, z1, + svmax_single_s8_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** smax {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint8x2_t, svint8_t, z18, + svmax_single_s8_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint8x2_t, svint8_t, + z0_res = svmax_single_s8_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint8x2_t, svint8_t, + z0 = svmax_single_s8_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint8x2_t, svint8_t, z24, + svmax_single_s8_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f15f3a792a953fc0f332fe1823058b31ca2d8d02 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_s8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (max_z0_z0_z4, svint8x4_t, z0, + svmax_s8_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (max_z0_z4_z0, svint8x4_t, z0, + svmax_s8_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.b - z31\.b} +** | +** smax [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svint8x4_t, z0, + svmax_s8_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svint8x4_t, z18, + svmax_s8_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svint8x4_t, z23, + svmax_s8_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** smax {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (max_z28_z28_z0, svint8x4_t, z28, + svmax_s8_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svint8x4_t, z0, + svmax_s8_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** smax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svint8x4_t, z4, + svmax_s8_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svint8x4_t, svint8_t, z24, + svmax_single_s8_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** smax {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svint8x4_t, svint8_t, z24, + svmax_single_s8_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svint8x4_t, svint8_t, z24, + svmax_single_s8_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svint8x4_t, svint8_t, z1, + svmax_single_s8_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svint8x4_t, svint8_t, z1, + svmax_single_s8_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smax [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svint8x4_t, svint8_t, z18, + svmax_single_s8_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** smax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svint8x4_t, svint8_t, + z0_res = svmax_single_s8_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** smax {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svint8x4_t, svint8_t, + z0 = svmax_single_s8_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smax {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svint8x4_t, svint8_t, z24, + svmax_single_s8_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..cf9d8621af072505c70b7bbbbc4a981c36a4925e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint16x2_t, z0, + svmax_u16_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint16x2_t, z0, + svmax_u16_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.h - z29\.h} +** | +** umax [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint16x2_t, z0, + svmax_u16_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** umax {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint16x2_t, z18, + svmax_u16_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svuint16x2_t, z23, + svmax_u16_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** umax {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint16x2_t, z28, + svmax_u16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint16x2_t, z0, + svmax_u16_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** umax {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint16x2_t, z4, + svmax_u16_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint16x2_t, svuint16_t, z24, + svmax_single_u16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** umax {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint16x2_t, svuint16_t, z24, + svmax_single_u16_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint16x2_t, svuint16_t, z24, + svmax_single_u16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint16x2_t, svuint16_t, z1, + svmax_single_u16_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint16x2_t, svuint16_t, z1, + svmax_single_u16_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** umax {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint16x2_t, svuint16_t, z18, + svmax_single_u16_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint16x2_t, svuint16_t, + z0_res = svmax_single_u16_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint16x2_t, svuint16_t, + z0 = svmax_single_u16_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint16x2_t, svuint16_t, z24, + svmax_single_u16_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b23738b4646d6c2eb0f8331e3ec51bf5547c5f3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint16x4_t, z0, + svmax_u16_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint16x4_t, z0, + svmax_u16_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.h - z31\.h} +** | +** umax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint16x4_t, z0, + svmax_u16_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint16x4_t, z18, + svmax_u16_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svuint16x4_t, z23, + svmax_u16_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** umax {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint16x4_t, z28, + svmax_u16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint16x4_t, z0, + svmax_u16_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** umax {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint16x4_t, z4, + svmax_u16_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint16x4_t, svuint16_t, z24, + svmax_single_u16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** umax {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint16x4_t, svuint16_t, z24, + svmax_single_u16_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint16x4_t, svuint16_t, z24, + svmax_single_u16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint16x4_t, svuint16_t, z1, + svmax_single_u16_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint16x4_t, svuint16_t, z1, + svmax_single_u16_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint16x4_t, svuint16_t, z18, + svmax_single_u16_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint16x4_t, svuint16_t, + z0_res = svmax_single_u16_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint16x4_t, svuint16_t, + z0 = svmax_single_u16_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint16x4_t, svuint16_t, z24, + svmax_single_u16_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fff317571adecf026b2e74a8369eddc3393ae6e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint32x2_t, z0, + svmax_u32_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint32x2_t, z0, + svmax_u32_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.s - z29\.s} +** | +** umax [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint32x2_t, z0, + svmax_u32_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** umax {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint32x2_t, z18, + svmax_u32_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svuint32x2_t, z23, + svmax_u32_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** umax {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint32x2_t, z28, + svmax_u32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint32x2_t, z0, + svmax_u32_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** umax {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint32x2_t, z4, + svmax_u32_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint32x2_t, svuint32_t, z24, + svmax_single_u32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** umax {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint32x2_t, svuint32_t, z24, + svmax_single_u32_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint32x2_t, svuint32_t, z24, + svmax_single_u32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint32x2_t, svuint32_t, z1, + svmax_single_u32_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint32x2_t, svuint32_t, z1, + svmax_single_u32_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** umax {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint32x2_t, svuint32_t, z18, + svmax_single_u32_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint32x2_t, svuint32_t, + z0_res = svmax_single_u32_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint32x2_t, svuint32_t, + z0 = svmax_single_u32_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint32x2_t, svuint32_t, z24, + svmax_single_u32_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e5b32a28dde2712ba4440c64e6fb140618a76126 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint32x4_t, z0, + svmax_u32_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint32x4_t, z0, + svmax_u32_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.s - z31\.s} +** | +** umax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint32x4_t, z0, + svmax_u32_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint32x4_t, z18, + svmax_u32_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svuint32x4_t, z23, + svmax_u32_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** umax {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint32x4_t, z28, + svmax_u32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint32x4_t, z0, + svmax_u32_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** umax {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint32x4_t, z4, + svmax_u32_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint32x4_t, svuint32_t, z24, + svmax_single_u32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** umax {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint32x4_t, svuint32_t, z24, + svmax_single_u32_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint32x4_t, svuint32_t, z24, + svmax_single_u32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint32x4_t, svuint32_t, z1, + svmax_single_u32_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint32x4_t, svuint32_t, z1, + svmax_single_u32_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint32x4_t, svuint32_t, z18, + svmax_single_u32_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint32x4_t, svuint32_t, + z0_res = svmax_single_u32_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint32x4_t, svuint32_t, + z0 = svmax_single_u32_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint32x4_t, svuint32_t, z24, + svmax_single_u32_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..12a45760bc16b905b87be5d39d2ba363fafeb788 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint64x2_t, z0, + svmax_u64_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint64x2_t, z0, + svmax_u64_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.d - z29\.d} +** | +** umax [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint64x2_t, z0, + svmax_u64_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** umax {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint64x2_t, z18, + svmax_u64_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svuint64x2_t, z23, + svmax_u64_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** umax {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint64x2_t, z28, + svmax_u64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint64x2_t, z0, + svmax_u64_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** umax {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint64x2_t, z4, + svmax_u64_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint64x2_t, svuint64_t, z24, + svmax_single_u64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** umax {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint64x2_t, svuint64_t, z24, + svmax_single_u64_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint64x2_t, svuint64_t, z24, + svmax_single_u64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint64x2_t, svuint64_t, z1, + svmax_single_u64_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint64x2_t, svuint64_t, z1, + svmax_single_u64_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** umax {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint64x2_t, svuint64_t, z18, + svmax_single_u64_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint64x2_t, svuint64_t, + z0_res = svmax_single_u64_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint64x2_t, svuint64_t, + z0 = svmax_single_u64_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint64x2_t, svuint64_t, z24, + svmax_single_u64_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1f100cb7f24848ec67bfb8a4399169656d06b574 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint64x4_t, z0, + svmax_u64_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint64x4_t, z0, + svmax_u64_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.d - z31\.d} +** | +** umax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint64x4_t, z0, + svmax_u64_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint64x4_t, z18, + svmax_u64_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svuint64x4_t, z23, + svmax_u64_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** umax {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint64x4_t, z28, + svmax_u64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint64x4_t, z0, + svmax_u64_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** umax {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint64x4_t, z4, + svmax_u64_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint64x4_t, svuint64_t, z24, + svmax_single_u64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** umax {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint64x4_t, svuint64_t, z24, + svmax_single_u64_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint64x4_t, svuint64_t, z24, + svmax_single_u64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint64x4_t, svuint64_t, z1, + svmax_single_u64_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint64x4_t, svuint64_t, z1, + svmax_single_u64_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint64x4_t, svuint64_t, z18, + svmax_single_u64_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint64x4_t, svuint64_t, + z0_res = svmax_single_u64_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint64x4_t, svuint64_t, + z0 = svmax_single_u64_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint64x4_t, svuint64_t, z24, + svmax_single_u64_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..836985a124f686d70f0b5fe9fb19f3479f37d7a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint8x2_t, z0, + svmax_u8_x2 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint8x2_t, z0, + svmax_u8_x2 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.b - z29\.b} +** | +** umax [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint8x2_t, z0, + svmax_u8_x2 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** umax {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint8x2_t, z18, + svmax_u8_x2 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z18, svuint8x2_t, z23, + svmax_u8_x2 (z23, z18), + svmax (z23, z18)) + +/* +** max_z28_z28_z0: +** umax {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint8x2_t, z28, + svmax_u8_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint8x2_t, z0, + svmax_u8_x2 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** umax {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint8x2_t, z4, + svmax_u8_x2 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint8x2_t, svuint8_t, z24, + svmax_single_u8_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** umax {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint8x2_t, svuint8_t, z24, + svmax_single_u8_x2 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint8x2_t, svuint8_t, z24, + svmax_single_u8_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint8x2_t, svuint8_t, z1, + svmax_single_u8_x2 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint8x2_t, svuint8_t, z1, + svmax_single_u8_x2 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** umax {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint8x2_t, svuint8_t, z18, + svmax_single_u8_x2 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint8x2_t, svuint8_t, + z0_res = svmax_single_u8_x2 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint8x2_t, svuint8_t, + z0 = svmax_single_u8_x2 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint8x2_t, svuint8_t, z24, + svmax_single_u8_x2 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f61a762a3fafbc090d677bc3b5a3cf737f4129b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/max_u8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** max_z0_z0_z4: +** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (max_z0_z0_z4, svuint8x4_t, z0, + svmax_u8_x4 (z0, z4), + svmax (z0, z4)) + +/* +** max_z0_z4_z0: +** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (max_z0_z4_z0, svuint8x4_t, z0, + svmax_u8_x4 (z4, z0), + svmax (z4, z0)) + +/* +** max_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.b - z31\.b} +** | +** umax [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z4_z28, svuint8x4_t, z0, + svmax_u8_x4 (z4, z28), + svmax (z4, z28)) + +/* +** max_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z18_z18_z4, svuint8x4_t, z18, + svmax_u8_x4 (z18, z4), + svmax (z18, z4)) + +/* +** max_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (max_z23_z23_z28, svuint8x4_t, z23, + svmax_u8_x4 (z23, z28), + svmax (z23, z28)) + +/* +** max_z28_z28_z0: +** umax {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (max_z28_z28_z0, svuint8x4_t, z28, + svmax_u8_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z0_z0_z18, svuint8x4_t, z0, + svmax_u8_x4 (z0, z18), + svmax (z0, z18)) + +/* +** max_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** umax {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (max_z4_z4_z23, svuint8x4_t, z4, + svmax_u8_x4 (z4, z23), + svmax (z4, z23)) + +/* +** max_single_z24_z24_z0: +** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z0, svuint8x4_t, svuint8_t, z24, + svmax_single_u8_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** umax {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z28_z0, svuint8x4_t, svuint8_t, z24, + svmax_single_u8_x4 (z28, z0), + svmax (z28, z0)) + +/* +** max_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z1_z0, svuint8x4_t, svuint8_t, z24, + svmax_single_u8_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z1_z24_z0: +** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z24_z0, svuint8x4_t, svuint8_t, z1, + svmax_single_u8_x4 (z24, z0), + svmax (z24, z0)) + +/* +** max_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z1_z1_z0, svuint8x4_t, svuint8_t, z1, + svmax_single_u8_x4 (z1, z0), + svmax (z1, z0)) + +/* +** max_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umax [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (max_single_z18_z18_z0, svuint8x4_t, svuint8_t, z18, + svmax_single_u8_x4 (z18, z0), + svmax (z18, z0)) + +/* +** max_single_awkward: +** ... +** umax ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (max_single_awkward, svuint8x4_t, svuint8_t, + z0_res = svmax_single_u8_x4 (z1, z0), + z0_res = svmax (z1, z0)) + +/* +** max_single_z0_z0_z15: +** ... +** umax {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (max_single_z0_z0_z15, svuint8x4_t, svuint8_t, + z0 = svmax_single_u8_x4 (z0, z15), + z0 = svmax (z0, z15)) + +/* +** max_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umax {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (max_single_z24_z24_z16, svuint8x4_t, svuint8_t, z24, + svmax_single_u8_x4 (z24, z16), + svmax (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5b962fbf0b803ee9373893bf2646850462d70128 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat16x2_t, z0, + svmaxnm_f16_x2 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat16x2_t, z0, + svmaxnm_f16_x2 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.h - z29\.h} +** | +** fmaxnm [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat16x2_t, z0, + svmaxnm_f16_x2 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** fmaxnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat16x2_t, z18, + svmaxnm_f16_x2 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z18, svfloat16x2_t, z23, + svmaxnm_f16_x2 (z23, z18), + svmaxnm (z23, z18)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat16x2_t, z28, + svmaxnm_f16_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat16x2_t, z0, + svmaxnm_f16_x2 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** fmaxnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat16x2_t, z4, + svmaxnm_f16_x2 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24, + svmaxnm_single_f16_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** fmaxnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24, + svmaxnm_single_f16_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24, + svmaxnm_single_f16_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1, + svmaxnm_single_f16_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1, + svmaxnm_single_f16_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** fmaxnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18, + svmaxnm_single_f16_x2 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svmaxnm_single_f16_x2 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat16x2_t, svfloat16_t, + z0 = svmaxnm_single_f16_x2 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24, + svmaxnm_single_f16_x2 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9012280290790fed8f1d650078edfee51973340d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat16x4_t, z0, + svmaxnm_f16_x4 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat16x4_t, z0, + svmaxnm_f16_x4 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.h - z31\.h} +** | +** fmaxnm [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat16x4_t, z0, + svmaxnm_f16_x4 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat16x4_t, z18, + svmaxnm_f16_x4 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z28, svfloat16x4_t, z23, + svmaxnm_f16_x4 (z23, z28), + svmaxnm (z23, z28)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat16x4_t, z28, + svmaxnm_f16_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat16x4_t, z0, + svmaxnm_f16_x4 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fmaxnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat16x4_t, z4, + svmaxnm_f16_x4 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24, + svmaxnm_single_f16_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** fmaxnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24, + svmaxnm_single_f16_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24, + svmaxnm_single_f16_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1, + svmaxnm_single_f16_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1, + svmaxnm_single_f16_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18, + svmaxnm_single_f16_x4 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svmaxnm_single_f16_x4 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat16x4_t, svfloat16_t, + z0 = svmaxnm_single_f16_x4 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24, + svmaxnm_single_f16_x4 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..729729e905c70d7094ebc7633d633fad85dc3159 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat32x2_t, z0, + svmaxnm_f32_x2 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat32x2_t, z0, + svmaxnm_f32_x2 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.s - z29\.s} +** | +** fmaxnm [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat32x2_t, z0, + svmaxnm_f32_x2 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** fmaxnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat32x2_t, z18, + svmaxnm_f32_x2 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z18, svfloat32x2_t, z23, + svmaxnm_f32_x2 (z23, z18), + svmaxnm (z23, z18)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat32x2_t, z28, + svmaxnm_f32_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat32x2_t, z0, + svmaxnm_f32_x2 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** fmaxnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat32x2_t, z4, + svmaxnm_f32_x2 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24, + svmaxnm_single_f32_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** fmaxnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24, + svmaxnm_single_f32_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24, + svmaxnm_single_f32_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1, + svmaxnm_single_f32_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1, + svmaxnm_single_f32_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** fmaxnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18, + svmaxnm_single_f32_x2 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svmaxnm_single_f32_x2 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat32x2_t, svfloat32_t, + z0 = svmaxnm_single_f32_x2 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24, + svmaxnm_single_f32_x2 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..dda0b1c0d94371ee5057ff29c30c6cc408ce4716 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat32x4_t, z0, + svmaxnm_f32_x4 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat32x4_t, z0, + svmaxnm_f32_x4 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.s - z31\.s} +** | +** fmaxnm [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat32x4_t, z0, + svmaxnm_f32_x4 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat32x4_t, z18, + svmaxnm_f32_x4 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z28, svfloat32x4_t, z23, + svmaxnm_f32_x4 (z23, z28), + svmaxnm (z23, z28)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat32x4_t, z28, + svmaxnm_f32_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat32x4_t, z0, + svmaxnm_f32_x4 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fmaxnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat32x4_t, z4, + svmaxnm_f32_x4 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24, + svmaxnm_single_f32_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** fmaxnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24, + svmaxnm_single_f32_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24, + svmaxnm_single_f32_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1, + svmaxnm_single_f32_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1, + svmaxnm_single_f32_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18, + svmaxnm_single_f32_x4 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svmaxnm_single_f32_x4 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat32x4_t, svfloat32_t, + z0 = svmaxnm_single_f32_x4 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24, + svmaxnm_single_f32_x4 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..cbffc43b74b3b9bc8cb85a1a53315e54ea0d8e64 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat64x2_t, z0, + svmaxnm_f64_x2 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat64x2_t, z0, + svmaxnm_f64_x2 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.d - z29\.d} +** | +** fmaxnm [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat64x2_t, z0, + svmaxnm_f64_x2 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** fmaxnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat64x2_t, z18, + svmaxnm_f64_x2 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z18, svfloat64x2_t, z23, + svmaxnm_f64_x2 (z23, z18), + svmaxnm (z23, z18)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat64x2_t, z28, + svmaxnm_f64_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat64x2_t, z0, + svmaxnm_f64_x2 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** fmaxnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat64x2_t, z4, + svmaxnm_f64_x2 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24, + svmaxnm_single_f64_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** fmaxnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24, + svmaxnm_single_f64_x2 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24, + svmaxnm_single_f64_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1, + svmaxnm_single_f64_x2 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1, + svmaxnm_single_f64_x2 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** fmaxnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18, + svmaxnm_single_f64_x2 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svmaxnm_single_f64_x2 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat64x2_t, svfloat64_t, + z0 = svmaxnm_single_f64_x2 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24, + svmaxnm_single_f64_x2 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ba78edf8187e99e690de561ce2cd2f5b1c7975f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/maxnm_f64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** maxnm_z0_z0_z4: +** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (maxnm_z0_z0_z4, svfloat64x4_t, z0, + svmaxnm_f64_x4 (z0, z4), + svmaxnm (z0, z4)) + +/* +** maxnm_z0_z4_z0: +** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (maxnm_z0_z4_z0, svfloat64x4_t, z0, + svmaxnm_f64_x4 (z4, z0), + svmaxnm (z4, z0)) + +/* +** maxnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.d - z31\.d} +** | +** fmaxnm [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z4_z28, svfloat64x4_t, z0, + svmaxnm_f64_x4 (z4, z28), + svmaxnm (z4, z28)) + +/* +** maxnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z18_z18_z4, svfloat64x4_t, z18, + svmaxnm_f64_x4 (z18, z4), + svmaxnm (z18, z4)) + +/* +** maxnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (maxnm_z23_z23_z28, svfloat64x4_t, z23, + svmaxnm_f64_x4 (z23, z28), + svmaxnm (z23, z28)) + +/* +** maxnm_z28_z28_z0: +** fmaxnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (maxnm_z28_z28_z0, svfloat64x4_t, z28, + svmaxnm_f64_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z0_z0_z18, svfloat64x4_t, z0, + svmaxnm_f64_x4 (z0, z18), + svmaxnm (z0, z18)) + +/* +** maxnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fmaxnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (maxnm_z4_z4_z23, svfloat64x4_t, z4, + svmaxnm_f64_x4 (z4, z23), + svmaxnm (z4, z23)) + +/* +** maxnm_single_z24_z24_z0: +** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24, + svmaxnm_single_f64_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** fmaxnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24, + svmaxnm_single_f64_x4 (z28, z0), + svmaxnm (z28, z0)) + +/* +** maxnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24, + svmaxnm_single_f64_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z1_z24_z0: +** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1, + svmaxnm_single_f64_x4 (z24, z0), + svmaxnm (z24, z0)) + +/* +** maxnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1, + svmaxnm_single_f64_x4 (z1, z0), + svmaxnm (z1, z0)) + +/* +** maxnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmaxnm [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18, + svmaxnm_single_f64_x4 (z18, z0), + svmaxnm (z18, z0)) + +/* +** maxnm_single_awkward: +** ... +** fmaxnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (maxnm_single_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svmaxnm_single_f64_x4 (z1, z0), + z0_res = svmaxnm (z1, z0)) + +/* +** maxnm_single_z0_z0_z15: +** ... +** fmaxnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (maxnm_single_z0_z0_z15, svfloat64x4_t, svfloat64_t, + z0 = svmaxnm_single_f64_x4 (z0, z15), + z0 = svmaxnm (z0, z15)) + +/* +** maxnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmaxnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (maxnm_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24, + svmaxnm_single_f64_x4 (z24, z16), + svmaxnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..91b652472497357522e508306023bb54b9382606 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat16x2_t, z0, + svmin_f16_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat16x2_t, z0, + svmin_f16_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.h - z29\.h} +** | +** fmin [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat16x2_t, z0, + svmin_f16_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** fmin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat16x2_t, z18, + svmin_f16_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svfloat16x2_t, z23, + svmin_f16_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** fmin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat16x2_t, z28, + svmin_f16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat16x2_t, z0, + svmin_f16_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** fmin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat16x2_t, z4, + svmin_f16_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24, + svmin_single_f16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** fmin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24, + svmin_single_f16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24, + svmin_single_f16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1, + svmin_single_f16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1, + svmin_single_f16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** fmin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18, + svmin_single_f16_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svmin_single_f16_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat16x2_t, svfloat16_t, + z0 = svmin_single_f16_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24, + svmin_single_f16_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b3763e64bd86a2cf32a1a999c5b7c8eab8797ddc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat16x4_t, z0, + svmin_f16_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat16x4_t, z0, + svmin_f16_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.h - z31\.h} +** | +** fmin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat16x4_t, z0, + svmin_f16_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat16x4_t, z18, + svmin_f16_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svfloat16x4_t, z23, + svmin_f16_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** fmin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat16x4_t, z28, + svmin_f16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat16x4_t, z0, + svmin_f16_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fmin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat16x4_t, z4, + svmin_f16_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24, + svmin_single_f16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** fmin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24, + svmin_single_f16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24, + svmin_single_f16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1, + svmin_single_f16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1, + svmin_single_f16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18, + svmin_single_f16_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svmin_single_f16_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat16x4_t, svfloat16_t, + z0 = svmin_single_f16_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24, + svmin_single_f16_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4b9734520308f34c2d937240db09acca74ccfa9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat32x2_t, z0, + svmin_f32_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat32x2_t, z0, + svmin_f32_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.s - z29\.s} +** | +** fmin [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat32x2_t, z0, + svmin_f32_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** fmin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat32x2_t, z18, + svmin_f32_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svfloat32x2_t, z23, + svmin_f32_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** fmin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat32x2_t, z28, + svmin_f32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat32x2_t, z0, + svmin_f32_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** fmin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat32x2_t, z4, + svmin_f32_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24, + svmin_single_f32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** fmin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24, + svmin_single_f32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24, + svmin_single_f32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1, + svmin_single_f32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1, + svmin_single_f32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** fmin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18, + svmin_single_f32_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svmin_single_f32_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat32x2_t, svfloat32_t, + z0 = svmin_single_f32_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24, + svmin_single_f32_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d556270f4f6475ba2f3f03a906a7852a73c5096c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat32x4_t, z0, + svmin_f32_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat32x4_t, z0, + svmin_f32_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.s - z31\.s} +** | +** fmin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat32x4_t, z0, + svmin_f32_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat32x4_t, z18, + svmin_f32_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svfloat32x4_t, z23, + svmin_f32_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** fmin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat32x4_t, z28, + svmin_f32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat32x4_t, z0, + svmin_f32_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fmin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat32x4_t, z4, + svmin_f32_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24, + svmin_single_f32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** fmin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24, + svmin_single_f32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24, + svmin_single_f32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1, + svmin_single_f32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1, + svmin_single_f32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18, + svmin_single_f32_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svmin_single_f32_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat32x4_t, svfloat32_t, + z0 = svmin_single_f32_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24, + svmin_single_f32_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d8596bca0b637cff6e42539ce80594110d6b1eb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat64x2_t, z0, + svmin_f64_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat64x2_t, z0, + svmin_f64_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.d - z29\.d} +** | +** fmin [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat64x2_t, z0, + svmin_f64_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** fmin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat64x2_t, z18, + svmin_f64_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svfloat64x2_t, z23, + svmin_f64_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** fmin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat64x2_t, z28, + svmin_f64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat64x2_t, z0, + svmin_f64_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** fmin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat64x2_t, z4, + svmin_f64_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24, + svmin_single_f64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** fmin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24, + svmin_single_f64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24, + svmin_single_f64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1, + svmin_single_f64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1, + svmin_single_f64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** fmin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18, + svmin_single_f64_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svmin_single_f64_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat64x2_t, svfloat64_t, + z0 = svmin_single_f64_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24, + svmin_single_f64_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a7668dc45bcef2e0d78f018a403763cbc05a96ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_f64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svfloat64x4_t, z0, + svmin_f64_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svfloat64x4_t, z0, + svmin_f64_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.d - z31\.d} +** | +** fmin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svfloat64x4_t, z0, + svmin_f64_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svfloat64x4_t, z18, + svmin_f64_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svfloat64x4_t, z23, + svmin_f64_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** fmin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svfloat64x4_t, z28, + svmin_f64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svfloat64x4_t, z0, + svmin_f64_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fmin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svfloat64x4_t, z4, + svmin_f64_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24, + svmin_single_f64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** fmin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24, + svmin_single_f64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24, + svmin_single_f64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1, + svmin_single_f64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1, + svmin_single_f64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmin [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18, + svmin_single_f64_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** fmin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svmin_single_f64_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** fmin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svfloat64x4_t, svfloat64_t, + z0 = svmin_single_f64_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24, + svmin_single_f64_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5a0c19231f319fb47c45a388b1c9bcb6ef4821e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint16x2_t, z0, + svmin_s16_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint16x2_t, z0, + svmin_s16_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.h - z29\.h} +** | +** smin [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint16x2_t, z0, + svmin_s16_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** smin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z18_z18_z4, svint16x2_t, z18, + svmin_s16_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svint16x2_t, z23, + svmin_s16_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** smin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint16x2_t, z28, + svmin_s16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (min_z0_z0_z18, svint16x2_t, z0, + svmin_s16_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** smin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint16x2_t, z4, + svmin_s16_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint16x2_t, svint16_t, z24, + svmin_single_s16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** smin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint16x2_t, svint16_t, z24, + svmin_single_s16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint16x2_t, svint16_t, z24, + svmin_single_s16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint16x2_t, svint16_t, z1, + svmin_single_s16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint16x2_t, svint16_t, z1, + svmin_single_s16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** smin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint16x2_t, svint16_t, z18, + svmin_single_s16_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint16x2_t, svint16_t, + z0_res = svmin_single_s16_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint16x2_t, svint16_t, + z0 = svmin_single_s16_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint16x2_t, svint16_t, z24, + svmin_single_s16_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6fc0e905d072fe80f7a72e7177d8bcf3dd58b27d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint16x4_t, z0, + svmin_s16_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint16x4_t, z0, + svmin_s16_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.h - z31\.h} +** | +** smin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint16x4_t, z0, + svmin_s16_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svint16x4_t, z18, + svmin_s16_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svint16x4_t, z23, + svmin_s16_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** smin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint16x4_t, z28, + svmin_s16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svint16x4_t, z0, + svmin_s16_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** smin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint16x4_t, z4, + svmin_s16_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint16x4_t, svint16_t, z24, + svmin_single_s16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** smin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint16x4_t, svint16_t, z24, + svmin_single_s16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint16x4_t, svint16_t, z24, + svmin_single_s16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint16x4_t, svint16_t, z1, + svmin_single_s16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint16x4_t, svint16_t, z1, + svmin_single_s16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint16x4_t, svint16_t, z18, + svmin_single_s16_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint16x4_t, svint16_t, + z0_res = svmin_single_s16_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint16x4_t, svint16_t, + z0 = svmin_single_s16_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint16x4_t, svint16_t, z24, + svmin_single_s16_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a8a9e104a50312fbda1b7398ea2dd3b5e558a8ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint32x2_t, z0, + svmin_s32_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint32x2_t, z0, + svmin_s32_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.s - z29\.s} +** | +** smin [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint32x2_t, z0, + svmin_s32_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** smin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z18_z18_z4, svint32x2_t, z18, + svmin_s32_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svint32x2_t, z23, + svmin_s32_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** smin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint32x2_t, z28, + svmin_s32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (min_z0_z0_z18, svint32x2_t, z0, + svmin_s32_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** smin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint32x2_t, z4, + svmin_s32_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint32x2_t, svint32_t, z24, + svmin_single_s32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** smin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint32x2_t, svint32_t, z24, + svmin_single_s32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint32x2_t, svint32_t, z24, + svmin_single_s32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint32x2_t, svint32_t, z1, + svmin_single_s32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint32x2_t, svint32_t, z1, + svmin_single_s32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** smin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint32x2_t, svint32_t, z18, + svmin_single_s32_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint32x2_t, svint32_t, + z0_res = svmin_single_s32_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint32x2_t, svint32_t, + z0 = svmin_single_s32_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint32x2_t, svint32_t, z24, + svmin_single_s32_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..df5bf894a15740ec30c81ce9e7a05cbeebd95a76 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint32x4_t, z0, + svmin_s32_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint32x4_t, z0, + svmin_s32_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.s - z31\.s} +** | +** smin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint32x4_t, z0, + svmin_s32_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svint32x4_t, z18, + svmin_s32_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svint32x4_t, z23, + svmin_s32_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** smin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint32x4_t, z28, + svmin_s32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svint32x4_t, z0, + svmin_s32_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** smin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint32x4_t, z4, + svmin_s32_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint32x4_t, svint32_t, z24, + svmin_single_s32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** smin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint32x4_t, svint32_t, z24, + svmin_single_s32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint32x4_t, svint32_t, z24, + svmin_single_s32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint32x4_t, svint32_t, z1, + svmin_single_s32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint32x4_t, svint32_t, z1, + svmin_single_s32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint32x4_t, svint32_t, z18, + svmin_single_s32_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint32x4_t, svint32_t, + z0_res = svmin_single_s32_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint32x4_t, svint32_t, + z0 = svmin_single_s32_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint32x4_t, svint32_t, z24, + svmin_single_s32_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5b16c46e209b5324e5de9d4a1ca292a907279f19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint64x2_t, z0, + svmin_s64_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint64x2_t, z0, + svmin_s64_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.d - z29\.d} +** | +** smin [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint64x2_t, z0, + svmin_s64_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** smin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z18_z18_z4, svint64x2_t, z18, + svmin_s64_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svint64x2_t, z23, + svmin_s64_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** smin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint64x2_t, z28, + svmin_s64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (min_z0_z0_z18, svint64x2_t, z0, + svmin_s64_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** smin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint64x2_t, z4, + svmin_s64_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint64x2_t, svint64_t, z24, + svmin_single_s64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** smin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint64x2_t, svint64_t, z24, + svmin_single_s64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint64x2_t, svint64_t, z24, + svmin_single_s64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint64x2_t, svint64_t, z1, + svmin_single_s64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint64x2_t, svint64_t, z1, + svmin_single_s64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** smin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint64x2_t, svint64_t, z18, + svmin_single_s64_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint64x2_t, svint64_t, + z0_res = svmin_single_s64_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint64x2_t, svint64_t, + z0 = svmin_single_s64_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint64x2_t, svint64_t, z24, + svmin_single_s64_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4b926d298e036ddaf3326528f8c47cdb2f26cf6a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint64x4_t, z0, + svmin_s64_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint64x4_t, z0, + svmin_s64_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.d - z31\.d} +** | +** smin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint64x4_t, z0, + svmin_s64_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svint64x4_t, z18, + svmin_s64_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svint64x4_t, z23, + svmin_s64_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** smin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint64x4_t, z28, + svmin_s64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svint64x4_t, z0, + svmin_s64_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** smin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint64x4_t, z4, + svmin_s64_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint64x4_t, svint64_t, z24, + svmin_single_s64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** smin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint64x4_t, svint64_t, z24, + svmin_single_s64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint64x4_t, svint64_t, z24, + svmin_single_s64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint64x4_t, svint64_t, z1, + svmin_single_s64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint64x4_t, svint64_t, z1, + svmin_single_s64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint64x4_t, svint64_t, z18, + svmin_single_s64_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint64x4_t, svint64_t, + z0_res = svmin_single_s64_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint64x4_t, svint64_t, + z0 = svmin_single_s64_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint64x4_t, svint64_t, z24, + svmin_single_s64_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9082ef75ff8cf67c6f962314ffbd6932389ee0a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint8x2_t, z0, + svmin_s8_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint8x2_t, z0, + svmin_s8_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.b - z29\.b} +** | +** smin [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint8x2_t, z0, + svmin_s8_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** smin {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z18_z18_z4, svint8x2_t, z18, + svmin_s8_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svint8x2_t, z23, + svmin_s8_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** smin {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint8x2_t, z28, + svmin_s8_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (min_z0_z0_z18, svint8x2_t, z0, + svmin_s8_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** smin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint8x2_t, z4, + svmin_s8_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint8x2_t, svint8_t, z24, + svmin_single_s8_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** smin {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint8x2_t, svint8_t, z24, + svmin_single_s8_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint8x2_t, svint8_t, z24, + svmin_single_s8_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint8x2_t, svint8_t, z1, + svmin_single_s8_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint8x2_t, svint8_t, z1, + svmin_single_s8_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** smin {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint8x2_t, svint8_t, z18, + svmin_single_s8_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint8x2_t, svint8_t, + z0_res = svmin_single_s8_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint8x2_t, svint8_t, + z0 = svmin_single_s8_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint8x2_t, svint8_t, z24, + svmin_single_s8_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e3229669ee887c0f331be10ba28f7f2e5a05d36f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_s8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (min_z0_z0_z4, svint8x4_t, z0, + svmin_s8_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (min_z0_z4_z0, svint8x4_t, z0, + svmin_s8_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.b - z31\.b} +** | +** smin [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svint8x4_t, z0, + svmin_s8_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svint8x4_t, z18, + svmin_s8_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svint8x4_t, z23, + svmin_s8_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** smin {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (min_z28_z28_z0, svint8x4_t, z28, + svmin_s8_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svint8x4_t, z0, + svmin_s8_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** smin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svint8x4_t, z4, + svmin_s8_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svint8x4_t, svint8_t, z24, + svmin_single_s8_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** smin {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svint8x4_t, svint8_t, z24, + svmin_single_s8_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svint8x4_t, svint8_t, z24, + svmin_single_s8_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svint8x4_t, svint8_t, z1, + svmin_single_s8_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svint8x4_t, svint8_t, z1, + svmin_single_s8_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smin [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svint8x4_t, svint8_t, z18, + svmin_single_s8_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** smin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svint8x4_t, svint8_t, + z0_res = svmin_single_s8_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** smin {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svint8x4_t, svint8_t, + z0 = svmin_single_s8_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** smin {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svint8x4_t, svint8_t, z24, + svmin_single_s8_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..40c41d19f89ec752f28894b0d4882e2ca0c18efd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint16x2_t, z0, + svmin_u16_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint16x2_t, z0, + svmin_u16_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.h - z29\.h} +** | +** umin [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint16x2_t, z0, + svmin_u16_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** umin {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint16x2_t, z18, + svmin_u16_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svuint16x2_t, z23, + svmin_u16_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** umin {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint16x2_t, z28, + svmin_u16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint16x2_t, z0, + svmin_u16_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** umin {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint16x2_t, z4, + svmin_u16_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint16x2_t, svuint16_t, z24, + svmin_single_u16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** umin {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint16x2_t, svuint16_t, z24, + svmin_single_u16_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint16x2_t, svuint16_t, z24, + svmin_single_u16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint16x2_t, svuint16_t, z1, + svmin_single_u16_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint16x2_t, svuint16_t, z1, + svmin_single_u16_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** umin {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint16x2_t, svuint16_t, z18, + svmin_single_u16_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint16x2_t, svuint16_t, + z0_res = svmin_single_u16_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint16x2_t, svuint16_t, + z0 = svmin_single_u16_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint16x2_t, svuint16_t, z24, + svmin_single_u16_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ebe8da17331a69a4a2dbcc97fb8481b58bc78279 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint16x4_t, z0, + svmin_u16_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint16x4_t, z0, + svmin_u16_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.h - z31\.h} +** | +** umin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint16x4_t, z0, + svmin_u16_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint16x4_t, z18, + svmin_u16_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svuint16x4_t, z23, + svmin_u16_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** umin {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint16x4_t, z28, + svmin_u16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint16x4_t, z0, + svmin_u16_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** umin {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint16x4_t, z4, + svmin_u16_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint16x4_t, svuint16_t, z24, + svmin_single_u16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** umin {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint16x4_t, svuint16_t, z24, + svmin_single_u16_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint16x4_t, svuint16_t, z24, + svmin_single_u16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint16x4_t, svuint16_t, z1, + svmin_single_u16_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint16x4_t, svuint16_t, z1, + svmin_single_u16_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint16x4_t, svuint16_t, z18, + svmin_single_u16_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint16x4_t, svuint16_t, + z0_res = svmin_single_u16_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint16x4_t, svuint16_t, + z0 = svmin_single_u16_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint16x4_t, svuint16_t, z24, + svmin_single_u16_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5173e226f01ef2e0b10e185df74f3267da501519 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint32x2_t, z0, + svmin_u32_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint32x2_t, z0, + svmin_u32_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.s - z29\.s} +** | +** umin [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint32x2_t, z0, + svmin_u32_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** umin {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint32x2_t, z18, + svmin_u32_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svuint32x2_t, z23, + svmin_u32_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** umin {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint32x2_t, z28, + svmin_u32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint32x2_t, z0, + svmin_u32_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** umin {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint32x2_t, z4, + svmin_u32_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint32x2_t, svuint32_t, z24, + svmin_single_u32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** umin {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint32x2_t, svuint32_t, z24, + svmin_single_u32_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint32x2_t, svuint32_t, z24, + svmin_single_u32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint32x2_t, svuint32_t, z1, + svmin_single_u32_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint32x2_t, svuint32_t, z1, + svmin_single_u32_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** umin {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint32x2_t, svuint32_t, z18, + svmin_single_u32_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint32x2_t, svuint32_t, + z0_res = svmin_single_u32_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint32x2_t, svuint32_t, + z0 = svmin_single_u32_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint32x2_t, svuint32_t, z24, + svmin_single_u32_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f2e40791095651a83294c9d738d95824039e1a06 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint32x4_t, z0, + svmin_u32_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint32x4_t, z0, + svmin_u32_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.s - z31\.s} +** | +** umin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint32x4_t, z0, + svmin_u32_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint32x4_t, z18, + svmin_u32_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svuint32x4_t, z23, + svmin_u32_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** umin {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint32x4_t, z28, + svmin_u32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint32x4_t, z0, + svmin_u32_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** umin {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint32x4_t, z4, + svmin_u32_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint32x4_t, svuint32_t, z24, + svmin_single_u32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** umin {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint32x4_t, svuint32_t, z24, + svmin_single_u32_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint32x4_t, svuint32_t, z24, + svmin_single_u32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint32x4_t, svuint32_t, z1, + svmin_single_u32_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint32x4_t, svuint32_t, z1, + svmin_single_u32_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint32x4_t, svuint32_t, z18, + svmin_single_u32_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint32x4_t, svuint32_t, + z0_res = svmin_single_u32_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint32x4_t, svuint32_t, + z0 = svmin_single_u32_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint32x4_t, svuint32_t, z24, + svmin_single_u32_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6babb789bc868af1ecc5b1b18016b5142a55add3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint64x2_t, z0, + svmin_u64_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint64x2_t, z0, + svmin_u64_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.d - z29\.d} +** | +** umin [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint64x2_t, z0, + svmin_u64_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** umin {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint64x2_t, z18, + svmin_u64_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svuint64x2_t, z23, + svmin_u64_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** umin {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint64x2_t, z28, + svmin_u64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint64x2_t, z0, + svmin_u64_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** umin {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint64x2_t, z4, + svmin_u64_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint64x2_t, svuint64_t, z24, + svmin_single_u64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** umin {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint64x2_t, svuint64_t, z24, + svmin_single_u64_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint64x2_t, svuint64_t, z24, + svmin_single_u64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint64x2_t, svuint64_t, z1, + svmin_single_u64_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint64x2_t, svuint64_t, z1, + svmin_single_u64_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** umin {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint64x2_t, svuint64_t, z18, + svmin_single_u64_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint64x2_t, svuint64_t, + z0_res = svmin_single_u64_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint64x2_t, svuint64_t, + z0 = svmin_single_u64_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint64x2_t, svuint64_t, z24, + svmin_single_u64_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b3dba47faea6cb752433363ce0979f9ef4f0516a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint64x4_t, z0, + svmin_u64_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint64x4_t, z0, + svmin_u64_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.d - z31\.d} +** | +** umin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint64x4_t, z0, + svmin_u64_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint64x4_t, z18, + svmin_u64_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svuint64x4_t, z23, + svmin_u64_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** umin {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint64x4_t, z28, + svmin_u64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint64x4_t, z0, + svmin_u64_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** umin {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint64x4_t, z4, + svmin_u64_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint64x4_t, svuint64_t, z24, + svmin_single_u64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** umin {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint64x4_t, svuint64_t, z24, + svmin_single_u64_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint64x4_t, svuint64_t, z24, + svmin_single_u64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint64x4_t, svuint64_t, z1, + svmin_single_u64_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint64x4_t, svuint64_t, z1, + svmin_single_u64_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint64x4_t, svuint64_t, z18, + svmin_single_u64_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint64x4_t, svuint64_t, + z0_res = svmin_single_u64_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint64x4_t, svuint64_t, + z0 = svmin_single_u64_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint64x4_t, svuint64_t, z24, + svmin_single_u64_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4b4c723f212a28d808a5d927518d220ab2721eb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint8x2_t, z0, + svmin_u8_x2 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint8x2_t, z0, + svmin_u8_x2 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.b - z29\.b} +** | +** umin [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint8x2_t, z0, + svmin_u8_x2 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** umin {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint8x2_t, z18, + svmin_u8_x2 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z18, svuint8x2_t, z23, + svmin_u8_x2 (z23, z18), + svmin (z23, z18)) + +/* +** min_z28_z28_z0: +** umin {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint8x2_t, z28, + svmin_u8_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint8x2_t, z0, + svmin_u8_x2 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** umin {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint8x2_t, z4, + svmin_u8_x2 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint8x2_t, svuint8_t, z24, + svmin_single_u8_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** umin {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint8x2_t, svuint8_t, z24, + svmin_single_u8_x2 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint8x2_t, svuint8_t, z24, + svmin_single_u8_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint8x2_t, svuint8_t, z1, + svmin_single_u8_x2 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint8x2_t, svuint8_t, z1, + svmin_single_u8_x2 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** umin {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint8x2_t, svuint8_t, z18, + svmin_single_u8_x2 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint8x2_t, svuint8_t, + z0_res = svmin_single_u8_x2 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint8x2_t, svuint8_t, + z0 = svmin_single_u8_x2 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint8x2_t, svuint8_t, z24, + svmin_single_u8_x2 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..81243fd7d2c2ed197711ec6005303e6097283f48 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/min_u8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** min_z0_z0_z4: +** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (min_z0_z0_z4, svuint8x4_t, z0, + svmin_u8_x4 (z0, z4), + svmin (z0, z4)) + +/* +** min_z0_z4_z0: +** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (min_z0_z4_z0, svuint8x4_t, z0, + svmin_u8_x4 (z4, z0), + svmin (z4, z0)) + +/* +** min_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.b - z31\.b} +** | +** umin [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z4_z28, svuint8x4_t, z0, + svmin_u8_x4 (z4, z28), + svmin (z4, z28)) + +/* +** min_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z18_z18_z4, svuint8x4_t, z18, + svmin_u8_x4 (z18, z4), + svmin (z18, z4)) + +/* +** min_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (min_z23_z23_z28, svuint8x4_t, z23, + svmin_u8_x4 (z23, z28), + svmin (z23, z28)) + +/* +** min_z28_z28_z0: +** umin {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (min_z28_z28_z0, svuint8x4_t, z28, + svmin_u8_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z0_z0_z18, svuint8x4_t, z0, + svmin_u8_x4 (z0, z18), + svmin (z0, z18)) + +/* +** min_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** umin {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (min_z4_z4_z23, svuint8x4_t, z4, + svmin_u8_x4 (z4, z23), + svmin (z4, z23)) + +/* +** min_single_z24_z24_z0: +** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z0, svuint8x4_t, svuint8_t, z24, + svmin_single_u8_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** umin {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z28_z0, svuint8x4_t, svuint8_t, z24, + svmin_single_u8_x4 (z28, z0), + svmin (z28, z0)) + +/* +** min_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z1_z0, svuint8x4_t, svuint8_t, z24, + svmin_single_u8_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z1_z24_z0: +** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z24_z0, svuint8x4_t, svuint8_t, z1, + svmin_single_u8_x4 (z24, z0), + svmin (z24, z0)) + +/* +** min_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z1_z1_z0, svuint8x4_t, svuint8_t, z1, + svmin_single_u8_x4 (z1, z0), + svmin (z1, z0)) + +/* +** min_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umin [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (min_single_z18_z18_z0, svuint8x4_t, svuint8_t, z18, + svmin_single_u8_x4 (z18, z0), + svmin (z18, z0)) + +/* +** min_single_awkward: +** ... +** umin ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (min_single_awkward, svuint8x4_t, svuint8_t, + z0_res = svmin_single_u8_x4 (z1, z0), + z0_res = svmin (z1, z0)) + +/* +** min_single_z0_z0_z15: +** ... +** umin {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (min_single_z0_z0_z15, svuint8x4_t, svuint8_t, + z0 = svmin_single_u8_x4 (z0, z15), + z0 = svmin (z0, z15)) + +/* +** min_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** umin {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (min_single_z24_z24_z16, svuint8x4_t, svuint8_t, z24, + svmin_single_u8_x4 (z24, z16), + svmin (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..62c13d227e51af2d7cfa7aa6f52835d48ecbdcf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat16x2_t, z0, + svminnm_f16_x2 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat16x2_t, z0, + svminnm_f16_x2 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.h - z29\.h} +** | +** fminnm [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat16x2_t, z0, + svminnm_f16_x2 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** fminnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat16x2_t, z18, + svminnm_f16_x2 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z18, svfloat16x2_t, z23, + svminnm_f16_x2 (z23, z18), + svminnm (z23, z18)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat16x2_t, z28, + svminnm_f16_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat16x2_t, z0, + svminnm_f16_x2 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** fminnm {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat16x2_t, z4, + svminnm_f16_x2 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24, + svminnm_single_f16_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** fminnm {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24, + svminnm_single_f16_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24, + svminnm_single_f16_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1, + svminnm_single_f16_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1, + svminnm_single_f16_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** fminnm {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18, + svminnm_single_f16_x2 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svminnm_single_f16_x2 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat16x2_t, svfloat16_t, + z0 = svminnm_single_f16_x2 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24, + svminnm_single_f16_x2 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6afb754dac3b84d6dcf362dbe49157ce5fa2b435 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat16x4_t, z0, + svminnm_f16_x4 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat16x4_t, z0, + svminnm_f16_x4 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.h - z31\.h} +** | +** fminnm [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat16x4_t, z0, + svminnm_f16_x4 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat16x4_t, z18, + svminnm_f16_x4 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z28, svfloat16x4_t, z23, + svminnm_f16_x4 (z23, z28), + svminnm (z23, z28)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat16x4_t, z28, + svminnm_f16_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat16x4_t, z0, + svminnm_f16_x4 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fminnm {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat16x4_t, z4, + svminnm_f16_x4 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24, + svminnm_single_f16_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** fminnm {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24, + svminnm_single_f16_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24, + svminnm_single_f16_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1, + svminnm_single_f16_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1, + svminnm_single_f16_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18, + svminnm_single_f16_x4 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svminnm_single_f16_x4 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat16x4_t, svfloat16_t, + z0 = svminnm_single_f16_x4 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24, + svminnm_single_f16_x4 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..19149fed86ab32987cefe4ffd0ea989e8527aacd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat32x2_t, z0, + svminnm_f32_x2 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat32x2_t, z0, + svminnm_f32_x2 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.s - z29\.s} +** | +** fminnm [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat32x2_t, z0, + svminnm_f32_x2 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** fminnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat32x2_t, z18, + svminnm_f32_x2 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z18, svfloat32x2_t, z23, + svminnm_f32_x2 (z23, z18), + svminnm (z23, z18)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat32x2_t, z28, + svminnm_f32_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat32x2_t, z0, + svminnm_f32_x2 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** fminnm {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat32x2_t, z4, + svminnm_f32_x2 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24, + svminnm_single_f32_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** fminnm {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24, + svminnm_single_f32_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24, + svminnm_single_f32_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1, + svminnm_single_f32_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1, + svminnm_single_f32_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** fminnm {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18, + svminnm_single_f32_x2 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svminnm_single_f32_x2 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat32x2_t, svfloat32_t, + z0 = svminnm_single_f32_x2 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24, + svminnm_single_f32_x2 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..aa1079ac2504e8b298c136e22a2559c0a5508d4c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat32x4_t, z0, + svminnm_f32_x4 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat32x4_t, z0, + svminnm_f32_x4 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.s - z31\.s} +** | +** fminnm [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat32x4_t, z0, + svminnm_f32_x4 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat32x4_t, z18, + svminnm_f32_x4 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z28, svfloat32x4_t, z23, + svminnm_f32_x4 (z23, z28), + svminnm (z23, z28)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat32x4_t, z28, + svminnm_f32_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat32x4_t, z0, + svminnm_f32_x4 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fminnm {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat32x4_t, z4, + svminnm_f32_x4 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24, + svminnm_single_f32_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** fminnm {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24, + svminnm_single_f32_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24, + svminnm_single_f32_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1, + svminnm_single_f32_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1, + svminnm_single_f32_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18, + svminnm_single_f32_x4 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svminnm_single_f32_x4 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat32x4_t, svfloat32_t, + z0 = svminnm_single_f32_x4 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24, + svminnm_single_f32_x4 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b2fe2f0ae0e08f4a36a76f17f428bdcde0ad1c11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat64x2_t, z0, + svminnm_f64_x2 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat64x2_t, z0, + svminnm_f64_x2 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.d - z29\.d} +** | +** fminnm [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat64x2_t, z0, + svminnm_f64_x2 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** fminnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat64x2_t, z18, + svminnm_f64_x2 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z18, svfloat64x2_t, z23, + svminnm_f64_x2 (z23, z18), + svminnm (z23, z18)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat64x2_t, z28, + svminnm_f64_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat64x2_t, z0, + svminnm_f64_x2 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** fminnm {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat64x2_t, z4, + svminnm_f64_x2 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24, + svminnm_single_f64_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** fminnm {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24, + svminnm_single_f64_x2 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24, + svminnm_single_f64_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1, + svminnm_single_f64_x2 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1, + svminnm_single_f64_x2 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** fminnm {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18, + svminnm_single_f64_x2 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svminnm_single_f64_x2 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat64x2_t, svfloat64_t, + z0 = svminnm_single_f64_x2 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24, + svminnm_single_f64_x2 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..22e659cfd3d6bb83a21bd0262b7da80a3a9718c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/minnm_f64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** minnm_z0_z0_z4: +** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (minnm_z0_z0_z4, svfloat64x4_t, z0, + svminnm_f64_x4 (z0, z4), + svminnm (z0, z4)) + +/* +** minnm_z0_z4_z0: +** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (minnm_z0_z4_z0, svfloat64x4_t, z0, + svminnm_f64_x4 (z4, z0), + svminnm (z4, z0)) + +/* +** minnm_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.d - z31\.d} +** | +** fminnm [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z4_z28, svfloat64x4_t, z0, + svminnm_f64_x4 (z4, z28), + svminnm (z4, z28)) + +/* +** minnm_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z18_z18_z4, svfloat64x4_t, z18, + svminnm_f64_x4 (z18, z4), + svminnm (z18, z4)) + +/* +** minnm_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (minnm_z23_z23_z28, svfloat64x4_t, z23, + svminnm_f64_x4 (z23, z28), + svminnm (z23, z28)) + +/* +** minnm_z28_z28_z0: +** fminnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (minnm_z28_z28_z0, svfloat64x4_t, z28, + svminnm_f64_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z0_z0_z18, svfloat64x4_t, z0, + svminnm_f64_x4 (z0, z18), + svminnm (z0, z18)) + +/* +** minnm_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fminnm {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (minnm_z4_z4_z23, svfloat64x4_t, z4, + svminnm_f64_x4 (z4, z23), + svminnm (z4, z23)) + +/* +** minnm_single_z24_z24_z0: +** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24, + svminnm_single_f64_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** fminnm {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24, + svminnm_single_f64_x4 (z28, z0), + svminnm (z28, z0)) + +/* +** minnm_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24, + svminnm_single_f64_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z1_z24_z0: +** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1, + svminnm_single_f64_x4 (z24, z0), + svminnm (z24, z0)) + +/* +** minnm_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1, + svminnm_single_f64_x4 (z1, z0), + svminnm (z1, z0)) + +/* +** minnm_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fminnm [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (minnm_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18, + svminnm_single_f64_x4 (z18, z0), + svminnm (z18, z0)) + +/* +** minnm_single_awkward: +** ... +** fminnm ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (minnm_single_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svminnm_single_f64_x4 (z1, z0), + z0_res = svminnm (z1, z0)) + +/* +** minnm_single_z0_z0_z15: +** ... +** fminnm {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (minnm_single_z0_z0_z15, svfloat64x4_t, svfloat64_t, + z0 = svminnm_single_f64_x4 (z0, z15), + z0 = svminnm (z0, z15)) + +/* +** minnm_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fminnm {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (minnm_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24, + svminnm_single_f64_x4 (z24, z16), + svminnm (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..c61f6382ce29965ff05f90940fb3d135c978f264 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (0, z0, z0, 0), + svmla_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w0, z0, z3, 1), + svmla_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w7, z0, z3, 2), + svmla_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** bfmlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8, z7, z3, 3), + svmla_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** bfmlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8, z31, z16, 4), + svmla_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** bfmlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p6_z23_z0_7: +** bfmlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w11 + 6, z23, z0, 7), + svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 7, z7, z7, 0), + svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p10_z23_z0_1: +** bfmlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w11 + 10, z23, z0, 1), + svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** bfmlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 14, z23, z0, 2), + svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 15, z7, z7, 3), + svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 + 16, z7, z7, 4), + svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w8 - 1, z16, z0, 5), + svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svbfloat16_t, + svmla_lane_za32_bf16_vg2x1 (w12, z0, z3, 6), + svmla_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..357ef6cd973dafbb8d023b2959ed3de384d80ce4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (0, z0, z4, 0), + svmla_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w0, z0, z7, 1), + svmla_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8, z28, z4, 2), + svmla_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** bfmlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8, z4, z15, 6), + svmla_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8, z28, z16, 7), + svmla_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8, z17, z7, 0), + svmla_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x2 (w8, z22, z4, 1), + svmla_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9a06ff4537c98782de66e928fe511c5805190074 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_bf16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (0, z0, z4, 0), + svmla_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w0, z0, z7, 1), + svmla_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8, z28, z4, 2), + svmla_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** bfmlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8, z4, z15, 6), + svmla_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8, z28, z16, 7), + svmla_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8, z17, z7, 0), + svmla_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t, + svmla_lane_za32_bf16_vg2x4 (w8, z22, z4, 1), + svmla_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..8c2214b68c1895caee1536db07d351df36a01812 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (0, z0, z0, 0), + svmla_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w0, z0, z3, 1), + svmla_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w7, z0, z3, 2), + svmla_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** fmlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8, z7, z3, 3), + svmla_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** fmlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8, z31, z16, 4), + svmla_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** fmlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p6_z23_z0_7: +** fmlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w11 + 6, z23, z0, 7), + svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 7, z7, z7, 0), + svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p10_z23_z0_1: +** fmlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w11 + 10, z23, z0, 1), + svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** fmlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 14, z23, z0, 2), + svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 15, z7, z7, 3), + svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 + 16, z7, z7, 4), + svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w8 - 1, z16, z0, 5), + svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svfloat16_t, + svmla_lane_za32_f16_vg2x1 (w12, z0, z3, 6), + svmla_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6c7735582cc4a20fdcc8b1ba8dc98f997e900fb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (0, z0, z4, 0), + svmla_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w0, z0, z7, 1), + svmla_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8, z28, z4, 2), + svmla_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** fmlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8, z4, z15, 6), + svmla_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8, z28, z16, 7), + svmla_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8, z17, z7, 0), + svmla_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** fmlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svmla_lane_za32_f16_vg2x2 (w8, z22, z4, 1), + svmla_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4c9f5455fbb9fee1d4f60e9af37359ef5025577f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (0, z0, z4, 0), + svmla_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w0, z0, z7, 1), + svmla_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8, z28, z4, 2), + svmla_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** fmlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8, z4, z15, 6), + svmla_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8, z28, z16, 7), + svmla_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8, z17, z7, 0), + svmla_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t, + svmla_lane_za32_f16_vg2x4 (w8, z22, z4, 1), + svmla_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e99f36efa8f6279776ddbb99d51fe27c2ad9aff9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (0, z0, z4, 0), + svmla_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z7\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w0, z0, z7, 1), + svmla_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, z4\.s\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8, z28, z4, 2), + svmla_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}, z4\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8 + 8, z0, z4, 0), + svmla_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** mla_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w0 - 1, z0, z4, 1), + svmla_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** mla_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, z15\.s\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8, z4, z15, 2), + svmla_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmla za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, \1\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8, z28, z16, 3), + svmla_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.s\[w8, 0, vgx2\], [^\n]+, z7\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8, z17, z7, 0), + svmla_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** fmla za\.s\[w8, 0, vgx2\], {z22\.s - z23\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat32x2_t, svfloat32_t, + svmla_lane_za32_f32_vg1x2 (w8, z22, z4, 1), + svmla_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..86dd66192197e3effe1d1e72697e194ba26202d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_f32_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (0, z0, z4, 0), + svmla_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z7\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w0, z0, z7, 1), + svmla_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, z4\.s\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8, z28, z4, 2), + svmla_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}, z4\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8 + 8, z0, z4, 0), + svmla_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** mla_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w0 - 1, z0, z4, 1), + svmla_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** mla_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmla za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}, z15\.s\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8, z4, z15, 2), + svmla_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, \1\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8, z28, z16, 3), + svmla_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.s\[w8, 0, vgx4\], [^\n]+, z7\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8, z17, z7, 0), + svmla_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.s\[w8, 0, vgx4\], [^\n]+, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat32x4_t, svfloat32_t, + svmla_lane_za32_f32_vg1x4 (w8, z22, z4, 1), + svmla_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..e21f6f2f20de001bcf274b63b0158de81e3387f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint16_t, + svmla_lane_za32_s16_vg2x1 (0, z0, z0, 0), + svmla_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint16_t, + svmla_lane_za32_s16_vg2x1 (w0, z0, z3, 1), + svmla_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint16_t, + svmla_lane_za32_s16_vg2x1 (w7, z0, z3, 2), + svmla_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** smlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8, z7, z3, 3), + svmla_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8, z31, z16, 4), + svmla_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** smlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p6_z23_z0_7: +** smlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svint16_t, + svmla_lane_za32_s16_vg2x1 (w11 + 6, z23, z0, 7), + svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 7, z7, z7, 0), + svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p10_z23_z0_1: +** smlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svint16_t, + svmla_lane_za32_s16_vg2x1 (w11 + 10, z23, z0, 1), + svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** smlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 14, z23, z0, 2), + svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 15, z7, z7, 3), + svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 + 16, z7, z7, 4), + svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svint16_t, + svmla_lane_za32_s16_vg2x1 (w8 - 1, z16, z0, 5), + svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svint16_t, + svmla_lane_za32_s16_vg2x1 (w12, z0, z3, 6), + svmla_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f67987137d9e0ab4d6d43ac896aceef9437b5faa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (0, z0, z4, 0), + svmla_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w0, z0, z7, 1), + svmla_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8, z28, z4, 2), + svmla_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** smlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** smlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8, z4, z15, 6), + svmla_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8, z28, z16, 7), + svmla_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** smlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8, z17, z7, 0), + svmla_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** smlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svint16x2_t, svint16_t, + svmla_lane_za32_s16_vg2x2 (w8, z22, z4, 1), + svmla_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..81d87de48e4ca52f6823486d009148f5972b9ee8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (0, z0, z4, 0), + svmla_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w0, z0, z7, 1), + svmla_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8, z28, z4, 2), + svmla_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** smlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** smlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8, z4, z15, 6), + svmla_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8, z28, z16, 7), + svmla_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8, z17, z7, 0), + svmla_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svint16x4_t, svint16_t, + svmla_lane_za32_s16_vg2x4 (w8, z22, z4, 1), + svmla_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..7bdd9b621a1da512de78902bf7f30b50c26751f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x1.c @@ -0,0 +1,150 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint8_t, + svmla_lane_za32_s8_vg4x1 (0, z0, z0, 0), + svmla_lane_za32_vg4x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint8_t, + svmla_lane_za32_s8_vg4x1 (w0, z0, z3, 1), + svmla_lane_za32_vg4x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint8_t, + svmla_lane_za32_s8_vg4x1 (w7, z0, z3, 2), + svmla_lane_za32_vg4x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** smlall za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8, z7, z3, 3), + svmla_lane_za32_vg4x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlall za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8, z31, z16, 4), + svmla_lane_za32_vg4x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p4_z23_z0_7: +** smlall za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svint8_t, + svmla_lane_za32_s8_vg4x1 (w11 + 4, z23, z0, 7), + svmla_lane_za32_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 7, z7, z7, 8), + svmla_lane_za32_vg4x1 (w8 + 7, z7, z7, 8)) + +/* +** mla_lane_w11p12_z23_z0_9: +** smlall za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_9, svint8_t, + svmla_lane_za32_s8_vg4x1 (w11 + 12, z23, z0, 9), + svmla_lane_za32_vg4x1 (w11 + 12, z23, z0, 9)) + +/* +** mla_lane_w8p14_z23_z0_10: +** add (w8|w9|w10|w11), w8, #?14 +** smlall za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 14, z23, z0, 10), + svmla_lane_za32_vg4x1 (w8 + 14, z23, z0, 10)) + +/* +** mla_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 15, z7, z7, 11), + svmla_lane_za32_vg4x1 (w8 + 15, z7, z7, 11)) + +/* +** mla_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 + 16, z7, z7, 12), + svmla_lane_za32_vg4x1 (w8 + 16, z7, z7, 12)) + +/* +** mla_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svint8_t, + svmla_lane_za32_s8_vg4x1 (w8 - 1, z16, z0, 13), + svmla_lane_za32_vg4x1 (w8 - 1, z16, z0, 13)) + +/* +** mla_lane_w12_z0_z3_15: +** mov (w8|w9|w10|w11), w12 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svint8_t, + svmla_lane_za32_s8_vg4x1 (w12, z0, z3, 15), + svmla_lane_za32_vg4x1 (w12, z0, z3, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e84706ce6296fa434336ffc31a41dedc65cedb0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (0, z0, z4, 0), + svmla_lane_za32_vg4x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w0, z0, z7, 1), + svmla_lane_za32_vg4x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8, z28, z4, 2), + svmla_lane_za32_vg4x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** smlall za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w11 + 4, z0, z4, 3), + svmla_lane_za32_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8 + 6, z0, z4, 4), + svmla_lane_za32_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8 + 7, z0, z4, 5), + svmla_lane_za32_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8 + 8, z0, z4, 7), + svmla_lane_za32_vg4x2 (w8 + 8, z0, z4, 7)) + +/* +** mla_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w0 - 1, z0, z4, 9), + svmla_lane_za32_vg4x2 (w0 - 1, z0, z4, 9)) + +/* +** mla_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8, z4, z15, 10), + svmla_lane_za32_vg4x2 (w8, z4, z15, 10)) + +/* +** mla_lane_w8_z28_z16_11: +** mov (z[0-7]).d, z16.d +** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8, z28, z16, 11), + svmla_lane_za32_vg4x2 (w8, z28, z16, 11)) + +/* +** mla_lane_w8_z17_z7_13: +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8, z17, z7, 13), + svmla_lane_za32_vg4x2 (w8, z17, z7, 13)) + +/* +** mla_lane_w8_z22_z4_15: +** smlall za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svint8x2_t, svint8_t, + svmla_lane_za32_s8_vg4x2 (w8, z22, z4, 15), + svmla_lane_za32_vg4x2 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..dbb64efe5b5b9ce75a1faccf236cfda1ca097fb7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_s8_vg4x4.c @@ -0,0 +1,128 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (0, z0, z4, 0), + svmla_lane_za32_vg4x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w0, z0, z7, 1), + svmla_lane_za32_vg4x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8, z28, z4, 2), + svmla_lane_za32_vg4x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_7: +** smlall za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_7, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w11 + 4, z0, z4, 7), + svmla_lane_za32_vg4x4 (w11 + 4, z0, z4, 7)) + +/* +** mla_lane_w8p6_z0_z4_8: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8 + 6, z0, z4, 8), + svmla_lane_za32_vg4x4 (w8 + 6, z0, z4, 8)) + +/* +** mla_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8 + 7, z0, z4, 9), + svmla_lane_za32_vg4x4 (w8 + 7, z0, z4, 9)) + +/* +** mla_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8 + 8, z0, z4, 10), + svmla_lane_za32_vg4x4 (w8 + 8, z0, z4, 10)) + +/* +** mla_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w0 - 1, z0, z4, 11), + svmla_lane_za32_vg4x4 (w0 - 1, z0, z4, 11)) + +/* +** mla_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** smlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8, z4, z15, 12), + svmla_lane_za32_vg4x4 (w8, z4, z15, 12)) + +/* +** mla_lane_w8_z28_z16_13: +** mov (z[0-7]).d, z16.d +** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8, z28, z16, 13), + svmla_lane_za32_vg4x4 (w8, z28, z16, 13)) + +/* +** mla_lane_w8_z17_z7_14: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8, z17, z7, 14), + svmla_lane_za32_vg4x4 (w8, z17, z7, 14)) + +/* +** mla_lane_w8_z22_z4_15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svint8x4_t, svint8_t, + svmla_lane_za32_s8_vg4x4 (w8, z22, z4, 15), + svmla_lane_za32_vg4x4 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..811272f45271ab8606572a2be3ed860e5626daf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint16_t, + svmla_lane_za32_u16_vg2x1 (0, z0, z0, 0), + svmla_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w0, z0, z3, 1), + svmla_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w7, z0, z3, 2), + svmla_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** umlal za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8, z7, z3, 3), + svmla_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlal za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8, z31, z16, 4), + svmla_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** umlal za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p6_z23_z0_7: +** umlal za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w11 + 6, z23, z0, 7), + svmla_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 7, z7, z7, 0), + svmla_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p10_z23_z0_1: +** umlal za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p10_z23_z0_1, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w11 + 10, z23, z0, 1), + svmla_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** umlal za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 14, z23, z0, 2), + svmla_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 15, z7, z7, 3), + svmla_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 + 16, z7, z7, 4), + svmla_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w8 - 1, z16, z0, 5), + svmla_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svuint16_t, + svmla_lane_za32_u16_vg2x1 (w12, z0, z3, 6), + svmla_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9b0f70bcf689f585455df7d1fe0a53cb76a89963 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (0, z0, z4, 0), + svmla_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w0, z0, z7, 1), + svmla_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8, z28, z4, 2), + svmla_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** umlal za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** umlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8, z4, z15, 6), + svmla_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8, z28, z16, 7), + svmla_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** umlal za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8, z17, z7, 0), + svmla_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** umlal za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t, + svmla_lane_za32_u16_vg2x2 (w8, z22, z4, 1), + svmla_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e8d84724c893882caa688b44f3524aebce62786b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (0, z0, z4, 0), + svmla_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w0, z0, z7, 1), + svmla_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8, z28, z4, 2), + svmla_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w8p6_z0_z4_7: +** umlal za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_7, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8 + 6, z0, z4, 7), + svmla_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mla_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8 + 7, z0, z4, 3), + svmla_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mla_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8 + 8, z0, z4, 4), + svmla_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mla_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w0 - 1, z0, z4, 5), + svmla_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mla_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** umlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8, z4, z15, 6), + svmla_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mla_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8, z28, z16, 7), + svmla_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8, z17, z7, 0), + svmla_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t, + svmla_lane_za32_u16_vg2x4 (w8, z22, z4, 1), + svmla_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..c4ca5344ae30d628e3f9d16f603ff78c8e13df09 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x1.c @@ -0,0 +1,150 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint8_t, + svmla_lane_za32_u8_vg4x1 (0, z0, z0, 0), + svmla_lane_za32_vg4x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w0, z0, z3, 1), + svmla_lane_za32_vg4x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w7, z0, z3, 2), + svmla_lane_za32_vg4x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** umlall za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8, z7, z3, 3), + svmla_lane_za32_vg4x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlall za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8, z31, z16, 4), + svmla_lane_za32_vg4x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 1, z0, z0, 5), + svmla_lane_za32_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 2, z23, z0, 6), + svmla_lane_za32_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p4_z23_z0_7: +** umlall za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w11 + 4, z23, z0, 7), + svmla_lane_za32_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 7, z7, z7, 8), + svmla_lane_za32_vg4x1 (w8 + 7, z7, z7, 8)) + +/* +** mla_lane_w11p12_z23_z0_9: +** umlall za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_9, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w11 + 12, z23, z0, 9), + svmla_lane_za32_vg4x1 (w11 + 12, z23, z0, 9)) + +/* +** mla_lane_w8p14_z23_z0_10: +** add (w8|w9|w10|w11), w8, #?14 +** umlall za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 14, z23, z0, 10), + svmla_lane_za32_vg4x1 (w8 + 14, z23, z0, 10)) + +/* +** mla_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 15, z7, z7, 11), + svmla_lane_za32_vg4x1 (w8 + 15, z7, z7, 11)) + +/* +** mla_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 + 16, z7, z7, 12), + svmla_lane_za32_vg4x1 (w8 + 16, z7, z7, 12)) + +/* +** mla_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w8 - 1, z16, z0, 13), + svmla_lane_za32_vg4x1 (w8 - 1, z16, z0, 13)) + +/* +** mla_lane_w12_z0_z3_15: +** mov (w8|w9|w10|w11), w12 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svuint8_t, + svmla_lane_za32_u8_vg4x1 (w12, z0, z3, 15), + svmla_lane_za32_vg4x1 (w12, z0, z3, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1bee8b511331eaafef312c0c3ad7c465f9114424 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (0, z0, z4, 0), + svmla_lane_za32_vg4x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w0, z0, z7, 1), + svmla_lane_za32_vg4x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8, z28, z4, 2), + svmla_lane_za32_vg4x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** umlall za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w11 + 4, z0, z4, 3), + svmla_lane_za32_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8 + 6, z0, z4, 4), + svmla_lane_za32_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8 + 7, z0, z4, 5), + svmla_lane_za32_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8 + 8, z0, z4, 7), + svmla_lane_za32_vg4x2 (w8 + 8, z0, z4, 7)) + +/* +** mla_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w0 - 1, z0, z4, 9), + svmla_lane_za32_vg4x2 (w0 - 1, z0, z4, 9)) + +/* +** mla_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8, z4, z15, 10), + svmla_lane_za32_vg4x2 (w8, z4, z15, 10)) + +/* +** mla_lane_w8_z28_z16_11: +** mov (z[0-7]).d, z16.d +** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8, z28, z16, 11), + svmla_lane_za32_vg4x2 (w8, z28, z16, 11)) + +/* +** mla_lane_w8_z17_z7_13: +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8, z17, z7, 13), + svmla_lane_za32_vg4x2 (w8, z17, z7, 13)) + +/* +** mla_lane_w8_z22_z4_15: +** umlall za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svuint8x2_t, svuint8_t, + svmla_lane_za32_u8_vg4x2 (w8, z22, z4, 15), + svmla_lane_za32_vg4x2 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e0e3a536b9c249da9802a769a9aa91cf6809c92b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_u8_vg4x4.c @@ -0,0 +1,128 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (0, z0, z4, 0), + svmla_lane_za32_vg4x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w0, z0, z7, 1), + svmla_lane_za32_vg4x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8, z28, z4, 2), + svmla_lane_za32_vg4x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_7: +** umlall za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_7, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w11 + 4, z0, z4, 7), + svmla_lane_za32_vg4x4 (w11 + 4, z0, z4, 7)) + +/* +** mla_lane_w8p6_z0_z4_8: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8 + 6, z0, z4, 8), + svmla_lane_za32_vg4x4 (w8 + 6, z0, z4, 8)) + +/* +** mla_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8 + 7, z0, z4, 9), + svmla_lane_za32_vg4x4 (w8 + 7, z0, z4, 9)) + +/* +** mla_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8 + 8, z0, z4, 10), + svmla_lane_za32_vg4x4 (w8 + 8, z0, z4, 10)) + +/* +** mla_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w0 - 1, z0, z4, 11), + svmla_lane_za32_vg4x4 (w0 - 1, z0, z4, 11)) + +/* +** mla_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** umlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8, z4, z15, 12), + svmla_lane_za32_vg4x4 (w8, z4, z15, 12)) + +/* +** mla_lane_w8_z28_z16_13: +** mov (z[0-7]).d, z16.d +** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8, z28, z16, 13), + svmla_lane_za32_vg4x4 (w8, z28, z16, 13)) + +/* +** mla_lane_w8_z17_z7_14: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8, z17, z7, 14), + svmla_lane_za32_vg4x4 (w8, z17, z7, 14)) + +/* +** mla_lane_w8_z22_z4_15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svuint8x4_t, svuint8_t, + svmla_lane_za32_u8_vg4x4 (w8, z22, z4, 15), + svmla_lane_za32_vg4x4 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f1f7fc7ab1452c4f8fe4e05f6d070152e0919b62 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x2.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (0, z0, z4, 0), + svmla_lane_za64_vg1x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z7\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w0, z0, z7, 1), + svmla_lane_za64_vg1x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8, z28, z4, 0), + svmla_lane_za64_vg1x2 (w8, z28, z4, 0)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8 + 7, z0, z4, 1), + svmla_lane_za64_vg1x2 (w8 + 7, z0, z4, 1)) + +/* +** mla_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8 + 8, z0, z4, 0), + svmla_lane_za64_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** mla_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w0 - 1, z0, z4, 1), + svmla_lane_za64_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** mla_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, z15\.d\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8, z4, z15, 0), + svmla_lane_za64_vg1x2 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmla za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, \1\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8, z28, z16, 1), + svmla_lane_za64_vg1x2 (w8, z28, z16, 1)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.d\[w8, 0, vgx2\], [^\n]+, z7\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8, z17, z7, 0), + svmla_lane_za64_vg1x2 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** fmla za\.d\[w8, 0, vgx2\], {z22\.d - z23\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat64x2_t, svfloat64_t, + svmla_lane_za64_f64_vg1x2 (w8, z22, z4, 1), + svmla_lane_za64_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3dbb7c0de57e543f1df8c02fa7223dd471904967 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_f64_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (0, z0, z4, 0), + svmla_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z7\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w0, z0, z7, 1), + svmla_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8, z28, z4, 0), + svmla_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** mla_lane_w8p7_z0_z4_3: +** fmla za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8 + 7, z0, z4, 1), + svmla_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** mla_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_0, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8 + 8, z0, z4, 0), + svmla_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** mla_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_1, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w0 - 1, z0, z4, 1), + svmla_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** mla_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmla za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}, z15\.d\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_2, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8, z4, z15, 0), + svmla_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, \1\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8, z28, z16, 1), + svmla_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** mla_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.d\[w8, 0, vgx4\], [^\n]+, z7\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8, z17, z7, 0), + svmla_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** mla_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmla za\.d\[w8, 0, vgx4\], [^\n]+, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat64x4_t, svfloat64_t, + svmla_lane_za64_f64_vg1x4 (w8, z22, z4, 1), + svmla_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..0ba8caeb2c0255ab3870c1a7b5dcd9dfab092eb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x1.c @@ -0,0 +1,152 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svint16_t, + svmla_lane_za64_s16_vg4x1 (0, z0, z0, 0), + svmla_lane_za64_vg4x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svint16_t, + svmla_lane_za64_s16_vg4x1 (w0, z0, z3, 1), + svmla_lane_za64_vg4x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svint16_t, + svmla_lane_za64_s16_vg4x1 (w7, z0, z3, 2), + svmla_lane_za64_vg4x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** smlall za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8, z7, z3, 3), + svmla_lane_za64_vg4x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlall za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8, z31, z16, 4), + svmla_lane_za64_vg4x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 1, z0, z0, 5), + svmla_lane_za64_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 2, z23, z0, 6), + svmla_lane_za64_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p4_z23_z0_7: +** smlall za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svint16_t, + svmla_lane_za64_s16_vg4x1 (w11 + 4, z23, z0, 7), + svmla_lane_za64_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 7, z7, z7, 0), + svmla_lane_za64_vg4x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p12_z23_z0_1: +** smlall za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_1, svint16_t, + svmla_lane_za64_s16_vg4x1 (w11 + 12, z23, z0, 1), + svmla_lane_za64_vg4x1 (w11 + 12, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** add (w8|w9|w10|w11), w8, #?14 +** smlall za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 14, z23, z0, 2), + svmla_lane_za64_vg4x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 15, z7, z7, 3), + svmla_lane_za64_vg4x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 + 16, z7, z7, 4), + svmla_lane_za64_vg4x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svint16_t, + svmla_lane_za64_s16_vg4x1 (w8 - 1, z16, z0, 5), + svmla_lane_za64_vg4x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svint16_t, + svmla_lane_za64_s16_vg4x1 (w12, z0, z3, 6), + svmla_lane_za64_vg4x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..583f3a51d50c65329a07cff725140d024b251843 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (0, z0, z4, 0), + svmla_lane_za64_vg4x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w0, z0, z7, 1), + svmla_lane_za64_vg4x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8, z28, z4, 2), + svmla_lane_za64_vg4x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** smlall za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w11 + 4, z0, z4, 3), + svmla_lane_za64_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8 + 6, z0, z4, 4), + svmla_lane_za64_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8 + 7, z0, z4, 5), + svmla_lane_za64_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8 + 8, z0, z4, 6), + svmla_lane_za64_vg4x2 (w8 + 8, z0, z4, 6)) + +/* +** mla_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w0 - 1, z0, z4, 7), + svmla_lane_za64_vg4x2 (w0 - 1, z0, z4, 7)) + +/* +** mla_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8, z4, z15, 0), + svmla_lane_za64_vg4x2 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_1, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8, z28, z16, 1), + svmla_lane_za64_vg4x2 (w8, z28, z16, 1)) + +/* +** mla_lane_w8_z17_z7_3: +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_3, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8, z17, z7, 3), + svmla_lane_za64_vg4x2 (w8, z17, z7, 3)) + +/* +** mla_lane_w8_z22_z4_5: +** smlall za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_5, svint16x2_t, svint16_t, + svmla_lane_za64_s16_vg4x2 (w8, z22, z4, 5), + svmla_lane_za64_vg4x2 (w8, z22, z4, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9a118a72fb3bbedbff61e56c48ce2e4c9594ce56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_s16_vg4x4.c @@ -0,0 +1,130 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (0, z0, z4, 0), + svmla_lane_za64_vg4x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w0, z0, z7, 1), + svmla_lane_za64_vg4x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8, z28, z4, 2), + svmla_lane_za64_vg4x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** smlall za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w11 + 4, z0, z4, 3), + svmla_lane_za64_vg4x4 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8 + 6, z0, z4, 4), + svmla_lane_za64_vg4x4 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8 + 7, z0, z4, 5), + svmla_lane_za64_vg4x4 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8 + 8, z0, z4, 6), + svmla_lane_za64_vg4x4 (w8 + 8, z0, z4, 6)) + +/* +** mla_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w0 - 1, z0, z4, 7), + svmla_lane_za64_vg4x4 (w0 - 1, z0, z4, 7)) + +/* +** mla_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** smlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8, z4, z15, 0), + svmla_lane_za64_vg4x4 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8, z28, z16, 3), + svmla_lane_za64_vg4x4 (w8, z28, z16, 3)) + +/* +** mla_lane_w8_z17_z7_4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_4, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8, z17, z7, 4), + svmla_lane_za64_vg4x4 (w8, z17, z7, 4)) + +/* +** mla_lane_w8_z22_z4_6: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_6, svint16x4_t, svint16_t, + svmla_lane_za64_s16_vg4x4 (w8, z22, z4, 6), + svmla_lane_za64_vg4x4 (w8, z22, z4, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..d55a9cb26efaf1f22addbdae3324c103bfaf26e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x1.c @@ -0,0 +1,152 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svuint16_t, + svmla_lane_za64_u16_vg4x1 (0, z0, z0, 0), + svmla_lane_za64_vg4x1 (0, z0, z0, 0)) + +/* +** mla_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w0, z0, z3, 1), + svmla_lane_za64_vg4x1 (w0, z0, z3, 1)) + +/* +** mla_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w7, z0, z3, 2), + svmla_lane_za64_vg4x1 (w7, z0, z3, 2)) + +/* +** mla_lane_w8_z7_z3_3: +** umlall za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8, z7, z3, 3), + svmla_lane_za64_vg4x1 (w8, z7, z3, 3)) + +/* +** mla_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlall za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8, z31, z16, 4), + svmla_lane_za64_vg4x1 (w8, z31, z16, 4)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 1, z0, z0, 5), + svmla_lane_za64_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mla_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 2, z23, z0, 6), + svmla_lane_za64_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mla_lane_w11p4_z23_z0_7: +** umlall za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w11 + 4, z23, z0, 7), + svmla_lane_za64_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mla_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_0, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 7, z7, z7, 0), + svmla_lane_za64_vg4x1 (w8 + 7, z7, z7, 0)) + +/* +** mla_lane_w11p12_z23_z0_1: +** umlall za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_1, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w11 + 12, z23, z0, 1), + svmla_lane_za64_vg4x1 (w11 + 12, z23, z0, 1)) + +/* +** mla_lane_w8p14_z23_z0_2: +** add (w8|w9|w10|w11), w8, #?14 +** umlall za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_2, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 14, z23, z0, 2), + svmla_lane_za64_vg4x1 (w8 + 14, z23, z0, 2)) + +/* +** mla_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_3, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 15, z7, z7, 3), + svmla_lane_za64_vg4x1 (w8 + 15, z7, z7, 3)) + +/* +** mla_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_4, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 + 16, z7, z7, 4), + svmla_lane_za64_vg4x1 (w8 + 16, z7, z7, 4)) + +/* +** mla_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_5, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w8 - 1, z16, z0, 5), + svmla_lane_za64_vg4x1 (w8 - 1, z16, z0, 5)) + +/* +** mla_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_6, svuint16_t, + svmla_lane_za64_u16_vg4x1 (w12, z0, z3, 6), + svmla_lane_za64_vg4x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8bfb05a767089898f4613cea9203712d2fa265c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (0, z0, z4, 0), + svmla_lane_za64_vg4x2 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w0, z0, z7, 1), + svmla_lane_za64_vg4x2 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8, z28, z4, 2), + svmla_lane_za64_vg4x2 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** umlall za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w11 + 4, z0, z4, 3), + svmla_lane_za64_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8 + 6, z0, z4, 4), + svmla_lane_za64_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8 + 7, z0, z4, 5), + svmla_lane_za64_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8 + 8, z0, z4, 6), + svmla_lane_za64_vg4x2 (w8 + 8, z0, z4, 6)) + +/* +** mla_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w0 - 1, z0, z4, 7), + svmla_lane_za64_vg4x2 (w0 - 1, z0, z4, 7)) + +/* +** mla_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8, z4, z15, 0), + svmla_lane_za64_vg4x2 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8, z28, z16, 1), + svmla_lane_za64_vg4x2 (w8, z28, z16, 1)) + +/* +** mla_lane_w8_z17_z7_3: +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_3, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8, z17, z7, 3), + svmla_lane_za64_vg4x2 (w8, z17, z7, 3)) + +/* +** mla_lane_w8_z22_z4_5: +** umlall za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_5, svuint16x2_t, svuint16_t, + svmla_lane_za64_u16_vg4x2 (w8, z22, z4, 5), + svmla_lane_za64_vg4x2 (w8, z22, z4, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e9a3e4762859128d5142d694c641b4cffbe657df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za64_u16_vg4x4.c @@ -0,0 +1,130 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (0, z0, z4, 0), + svmla_lane_za64_vg4x4 (0, z0, z4, 0)) + +/* +** mla_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w0, z0, z7, 1), + svmla_lane_za64_vg4x4 (w0, z0, z7, 1)) + +/* +** mla_lane_w8_z28_z4_2: +** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8, z28, z4, 2), + svmla_lane_za64_vg4x4 (w8, z28, z4, 2)) + +/* +** mla_lane_w11p4_z0_z4_3: +** umlall za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w11 + 4, z0, z4, 3), + svmla_lane_za64_vg4x4 (w11 + 4, z0, z4, 3)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8 + 6, z0, z4, 4), + svmla_lane_za64_vg4x4 (w8 + 6, z0, z4, 4)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8 + 7, z0, z4, 5), + svmla_lane_za64_vg4x4 (w8 + 7, z0, z4, 5)) + +/* +** mla_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_6, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8 + 8, z0, z4, 6), + svmla_lane_za64_vg4x4 (w8 + 8, z0, z4, 6)) + +/* +** mla_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_7, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w0 - 1, z0, z4, 7), + svmla_lane_za64_vg4x4 (w0 - 1, z0, z4, 7)) + +/* +** mla_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** umlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8, z4, z15, 0), + svmla_lane_za64_vg4x4 (w8, z4, z15, 0)) + +/* +** mla_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8, z28, z16, 3), + svmla_lane_za64_vg4x4 (w8, z28, z16, 3)) + +/* +** mla_lane_w8_z17_z7_4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_4, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8, z17, z7, 4), + svmla_lane_za64_vg4x4 (w8, z17, z7, 4)) + +/* +** mla_lane_w8_z22_z4_6: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_6, svuint16x4_t, svuint16_t, + svmla_lane_za64_u16_vg4x4 (w8, z22, z4, 6), + svmla_lane_za64_vg4x4 (w8, z22, z4, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..ffa67361683944746320f563e546ab75b9b131cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (0, z0, z0), + svmla_za32_vg2x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w0, z0, z3), + svmla_za32_vg2x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w7, z0, z3), + svmla_za32_vg2x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** bfmlal za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8, z7, z3), + svmla_za32_vg2x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** bfmlal za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8, z31, z16), + svmla_za32_vg2x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 1, z0, z0), + svmla_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mla_w8p2_z23_z0: +** bfmlal za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p2_z23_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 2, z23, z0), + svmla_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** bfmlal za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w11 + 6, z23, z0), + svmla_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mla_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p7_z7_z7, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 7, z7, z7), + svmla_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mla_w11p10_z23_z0: +** bfmlal za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p10_z23_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w11 + 10, z23, z0), + svmla_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** bfmlal za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 14, z23, z0), + svmla_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 15, z7, z7), + svmla_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** bfmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 + 16, z7, z7), + svmla_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w8 - 1, z16, z0), + svmla_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** bfmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svbfloat16_t, + svmla_za32_bf16_vg2x1 (w12, z0, z3), + svmla_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..db432e9b02ff1223b36ac43d09cdf6d771af9786 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (0, z0, z0), + svmla_za32_vg2x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w0, z0, z0), + svmla_za32_vg2x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z0, z4), + svmla_za32_vg2x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z4, z18), + svmla_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z0, z23), + svmla_za32_vg2x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** bfmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z23, z0), + svmla_za32_vg2x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z18, z28), + svmla_za32_vg2x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8, z28, z4), + svmla_za32_vg2x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 + 1, z4, z0), + svmla_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** bfmlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 + 2, z4, z0), + svmla_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** bfmlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 + 6, z4, z0), + svmla_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 + 7, z4, z0), + svmla_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 + 8, z4, z4), + svmla_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x2_t, + svmla_za32_bf16_vg2x2 (w8 - 1, z4, z0), + svmla_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (0, z1, z0), + svmla_za32_vg2x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w0, z1, z0), + svmla_za32_vg2x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** bfmlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8, z1, z0), + svmla_za32_vg2x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8 + 1, z1, z0), + svmla_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** bfmlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8 + 4, z20, z0), + svmla_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** bfmlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8 + 6, z27, z0), + svmla_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8 + 7, z1, z0), + svmla_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8 + 8, z1, z0), + svmla_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w0 - 1, z1, z0), + svmla_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8, z0, z15), + svmla_za32_vg2x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x2 (w8, z20, z16), + svmla_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..74282782fbec4cdf0469d8299b0f719269ad89de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_bf16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (0, z0, z0), + svmla_za32_vg2x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w0, z0, z0), + svmla_za32_vg2x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z0, z4), + svmla_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z0, z18), + svmla_za32_vg2x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z18, z0), + svmla_za32_vg2x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z0, z23), + svmla_za32_vg2x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** bfmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z23, z0), + svmla_za32_vg2x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** bfmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z4, z28), + svmla_za32_vg2x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** bfmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8, z28, z0), + svmla_za32_vg2x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 + 1, z4, z0), + svmla_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** bfmlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 + 2, z4, z0), + svmla_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** bfmlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 + 6, z4, z0), + svmla_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 + 7, z4, z0), + svmla_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 + 8, z4, z4), + svmla_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svbfloat16x4_t, + svmla_za32_bf16_vg2x4 (w8 - 1, z4, z0), + svmla_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (0, z1, z0), + svmla_za32_vg2x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w0, z1, z0), + svmla_za32_vg2x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** bfmlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8, z1, z0), + svmla_za32_vg2x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8 + 1, z1, z0), + svmla_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** bfmlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8 + 4, z20, z0), + svmla_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** bfmlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8 + 6, z27, z0), + svmla_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8 + 7, z1, z0), + svmla_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8 + 8, z1, z0), + svmla_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w0 - 1, z1, z0), + svmla_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8, z0, z15), + svmla_za32_vg2x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t, + svmla_single_za32_bf16_vg2x4 (w8, z20, z16), + svmla_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..7b74179152a16791bbea3522fed5c10a37b7cc26 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (0, z0, z0), + svmla_za32_vg2x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svfloat16_t, + svmla_za32_f16_vg2x1 (w0, z0, z3), + svmla_za32_vg2x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svfloat16_t, + svmla_za32_f16_vg2x1 (w7, z0, z3), + svmla_za32_vg2x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** fmlal za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svfloat16_t, + svmla_za32_f16_vg2x1 (w8, z7, z3), + svmla_za32_vg2x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** fmlal za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svfloat16_t, + svmla_za32_f16_vg2x1 (w8, z31, z16), + svmla_za32_vg2x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 1, z0, z0), + svmla_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mla_w8p2_z23_z0: +** fmlal za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p2_z23_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 2, z23, z0), + svmla_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** fmlal za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w11 + 6, z23, z0), + svmla_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mla_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p7_z7_z7, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 7, z7, z7), + svmla_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mla_w11p10_z23_z0: +** fmlal za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p10_z23_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w11 + 10, z23, z0), + svmla_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** fmlal za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 14, z23, z0), + svmla_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 15, z7, z7), + svmla_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** fmlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 + 16, z7, z7), + svmla_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svfloat16_t, + svmla_za32_f16_vg2x1 (w8 - 1, z16, z0), + svmla_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** fmlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svfloat16_t, + svmla_za32_f16_vg2x1 (w12, z0, z3), + svmla_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fcc4b05b7bdebd70aed49608e97029eb96a3238c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (0, z0, z0), + svmla_za32_vg2x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w0, z0, z0), + svmla_za32_vg2x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z0, z4), + svmla_za32_vg2x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** fmlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z4, z18), + svmla_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z0, z23), + svmla_za32_vg2x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** fmlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z23, z0), + svmla_za32_vg2x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** fmlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z18, z28), + svmla_za32_vg2x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** fmlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8, z28, z4), + svmla_za32_vg2x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 + 1, z4, z0), + svmla_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** fmlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 + 2, z4, z0), + svmla_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** fmlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 + 6, z4, z0), + svmla_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 + 7, z4, z0), + svmla_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 + 8, z4, z4), + svmla_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x2_t, + svmla_za32_f16_vg2x2 (w8 - 1, z4, z0), + svmla_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (0, z1, z0), + svmla_za32_vg2x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w0, z1, z0), + svmla_za32_vg2x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8, z1, z0), + svmla_za32_vg2x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8 + 1, z1, z0), + svmla_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** fmlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8 + 4, z20, z0), + svmla_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** fmlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8 + 6, z27, z0), + svmla_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8 + 7, z1, z0), + svmla_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8 + 8, z1, z0), + svmla_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w0 - 1, z1, z0), + svmla_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8, z0, z15), + svmla_za32_vg2x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x2_t, svfloat16_t, + svmla_single_za32_f16_vg2x2 (w8, z20, z16), + svmla_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f87552882e5ab14a52c49026befbaab7af86a2f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (0, z0, z0), + svmla_za32_vg2x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w0, z0, z0), + svmla_za32_vg2x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z0, z4), + svmla_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z0, z18), + svmla_za32_vg2x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z18, z0), + svmla_za32_vg2x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z0, z23), + svmla_za32_vg2x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** fmlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z23, z0), + svmla_za32_vg2x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** fmlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z4, z28), + svmla_za32_vg2x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** fmlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8, z28, z0), + svmla_za32_vg2x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 + 1, z4, z0), + svmla_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** fmlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 + 2, z4, z0), + svmla_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** fmlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 + 6, z4, z0), + svmla_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 + 7, z4, z0), + svmla_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 + 8, z4, z4), + svmla_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x4_t, + svmla_za32_f16_vg2x4 (w8 - 1, z4, z0), + svmla_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (0, z1, z0), + svmla_za32_vg2x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w0, z1, z0), + svmla_za32_vg2x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8, z1, z0), + svmla_za32_vg2x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8 + 1, z1, z0), + svmla_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** fmlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8 + 4, z20, z0), + svmla_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** fmlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8 + 6, z27, z0), + svmla_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8 + 7, z1, z0), + svmla_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8 + 8, z1, z0), + svmla_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w0 - 1, z1, z0), + svmla_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8, z0, z15), + svmla_za32_vg2x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x4_t, svfloat16_t, + svmla_single_za32_f16_vg2x4 (w8, z20, z16), + svmla_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9f0ca68907b3a73dcdbf30cff03712e28931a479 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat32x2_t, + svmla_za32_f32_vg1x2 (0, z0, z0), + svmla_za32_vg1x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w0, z0, z0), + svmla_za32_vg1x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8, z0, z4), + svmla_za32_vg1x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8, z4, z18), + svmla_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z23_z0: +** ... +** fmla za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8, z23, z0), + svmla_za32_vg1x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z23: +** ... +** fmla za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z23, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8, z18, z23), + svmla_za32_vg1x2 (w8, z18, z23)) + +/* +** mla_w8_z4_z28: +** fmla za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8, z4, z28), + svmla_za32_vg1x2 (w8, z4, z28)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8 + 7, z4, z0), + svmla_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8 + 8, z4, z4), + svmla_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat32x2_t, + svmla_za32_f32_vg1x2 (w8 - 1, z4, z0), + svmla_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (0, z1, z0), + svmla_za32_vg1x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w0, z1, z0), + svmla_za32_vg1x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w8, z1, z0), + svmla_za32_vg1x2 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w8 + 7, z1, z0), + svmla_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w8 + 8, z1, z0), + svmla_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w0 - 1, z1, z0), + svmla_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w8, z0, z15), + svmla_za32_vg1x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat32x2_t, svfloat32_t, + svmla_single_za32_f32_vg1x2 (w8, z20, z16), + svmla_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6afc9cdf518a83d4c06f8f9a1f8612b1f9cf30ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_f32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat32x4_t, + svmla_za32_f32_vg1x4 (0, z0, z0), + svmla_za32_vg1x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w0, z0, z0), + svmla_za32_vg1x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8, z0, z4), + svmla_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8, z0, z18), + svmla_za32_vg1x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z28: +** ... +** fmla za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8, z18, z28), + svmla_za32_vg1x4 (w8, z18, z28)) + +/* +** mla_w8_z28_z23: +** ... +** fmla za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z23, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8, z28, z23), + svmla_za32_vg1x4 (w8, z28, z23)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8 + 7, z4, z0), + svmla_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8 + 8, z4, z4), + svmla_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat32x4_t, + svmla_za32_f32_vg1x4 (w8 - 1, z4, z0), + svmla_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (0, z1, z0), + svmla_za32_vg1x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w0, z1, z0), + svmla_za32_vg1x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w8, z1, z0), + svmla_za32_vg1x4 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w8 + 7, z1, z0), + svmla_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w8 + 8, z1, z0), + svmla_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w0 - 1, z1, z0), + svmla_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w8, z0, z15), + svmla_za32_vg1x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat32x4_t, svfloat32_t, + svmla_single_za32_f32_vg1x4 (w8, z20, z16), + svmla_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..cb7842038f6c08a881efe9e264639bca0b31ffed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svint16_t, + svmla_za32_s16_vg2x1 (0, z0, z0), + svmla_za32_vg2x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svint16_t, + svmla_za32_s16_vg2x1 (w0, z0, z3), + svmla_za32_vg2x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svint16_t, + svmla_za32_s16_vg2x1 (w7, z0, z3), + svmla_za32_vg2x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** smlal za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svint16_t, + svmla_za32_s16_vg2x1 (w8, z7, z3), + svmla_za32_vg2x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlal za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svint16_t, + svmla_za32_s16_vg2x1 (w8, z31, z16), + svmla_za32_vg2x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 1, z0, z0), + svmla_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mla_w8p2_z23_z0: +** smlal za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p2_z23_z0, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 2, z23, z0), + svmla_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** smlal za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svint16_t, + svmla_za32_s16_vg2x1 (w11 + 6, z23, z0), + svmla_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mla_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p7_z7_z7, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 7, z7, z7), + svmla_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mla_w11p10_z23_z0: +** smlal za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p10_z23_z0, svint16_t, + svmla_za32_s16_vg2x1 (w11 + 10, z23, z0), + svmla_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** smlal za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 14, z23, z0), + svmla_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 15, z7, z7), + svmla_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svint16_t, + svmla_za32_s16_vg2x1 (w8 + 16, z7, z7), + svmla_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svint16_t, + svmla_za32_s16_vg2x1 (w8 - 1, z16, z0), + svmla_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svint16_t, + svmla_za32_s16_vg2x1 (w12, z0, z3), + svmla_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..0832374932c3f67b2ce85592f90abbc0e7cd3dd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (0, z0, z0), + svmla_za32_vg2x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w0, z0, z0), + svmla_za32_vg2x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z0, z4), + svmla_za32_vg2x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** smlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z4, z18), + svmla_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z0, z23), + svmla_za32_vg2x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z23, z0), + svmla_za32_vg2x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** smlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z18, z28), + svmla_za32_vg2x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** smlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svint16x2_t, + svmla_za32_s16_vg2x2 (w8, z28, z4), + svmla_za32_vg2x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 + 1, z4, z0), + svmla_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** smlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 + 2, z4, z0), + svmla_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** smlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 + 6, z4, z0), + svmla_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 + 7, z4, z0), + svmla_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 + 8, z4, z4), + svmla_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint16x2_t, + svmla_za32_s16_vg2x2 (w8 - 1, z4, z0), + svmla_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (0, z1, z0), + svmla_za32_vg2x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w0, z1, z0), + svmla_za32_vg2x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8, z1, z0), + svmla_za32_vg2x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8 + 1, z1, z0), + svmla_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** smlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8 + 4, z20, z0), + svmla_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** smlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8 + 6, z27, z0), + svmla_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8 + 7, z1, z0), + svmla_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8 + 8, z1, z0), + svmla_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w0 - 1, z1, z0), + svmla_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8, z0, z15), + svmla_za32_vg2x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x2_t, svint16_t, + svmla_single_za32_s16_vg2x2 (w8, z20, z16), + svmla_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..049cf0a2805c27a31119c1ae949c7188fd235389 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (0, z0, z0), + svmla_za32_vg2x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w0, z0, z0), + svmla_za32_vg2x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z0, z4), + svmla_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z0, z18), + svmla_za32_vg2x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z18, z0), + svmla_za32_vg2x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z0, z23), + svmla_za32_vg2x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z23, z0), + svmla_za32_vg2x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** smlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z4, z28), + svmla_za32_vg2x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** smlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8, z28, z0), + svmla_za32_vg2x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 + 1, z4, z0), + svmla_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** smlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 + 2, z4, z0), + svmla_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** smlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 + 6, z4, z0), + svmla_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 + 7, z4, z0), + svmla_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 + 8, z4, z4), + svmla_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint16x4_t, + svmla_za32_s16_vg2x4 (w8 - 1, z4, z0), + svmla_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (0, z1, z0), + svmla_za32_vg2x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w0, z1, z0), + svmla_za32_vg2x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8, z1, z0), + svmla_za32_vg2x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8 + 1, z1, z0), + svmla_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** smlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8 + 4, z20, z0), + svmla_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** smlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8 + 6, z27, z0), + svmla_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8 + 7, z1, z0), + svmla_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8 + 8, z1, z0), + svmla_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w0 - 1, z1, z0), + svmla_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8, z0, z15), + svmla_za32_vg2x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x4_t, svint16_t, + svmla_single_za32_s16_vg2x4 (w8, z20, z16), + svmla_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..da9d04e6ef8e4ba81b315f35082ea23e63ec7cec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x1.c @@ -0,0 +1,149 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svint8_t, + svmla_za32_s8_vg4x1 (0, z0, z0), + svmla_za32_vg4x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svint8_t, + svmla_za32_s8_vg4x1 (w0, z0, z3), + svmla_za32_vg4x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svint8_t, + svmla_za32_s8_vg4x1 (w7, z0, z3), + svmla_za32_vg4x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** smlall za\.s\[w8, 0:3\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svint8_t, + svmla_za32_s8_vg4x1 (w8, z7, z3), + svmla_za32_vg4x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlall za\.s\[w8, 0:3\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svint8_t, + svmla_za32_s8_vg4x1 (w8, z31, z16), + svmla_za32_vg4x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svint8_t, + svmla_za32_s8_vg4x1 (w8 + 1, z0, z0), + svmla_za32_vg4x1 (w8 + 1, z0, z0)) + +/* +** mla_w10p4_z23_z0: +** smlall za\.s\[w10, 4:7\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svint8_t, + svmla_za32_s8_vg4x1 (w10 + 4, z23, z0), + svmla_za32_vg4x1 (w10 + 4, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svint8_t, + svmla_za32_s8_vg4x1 (w11 + 6, z23, z0), + svmla_za32_vg4x1 (w11 + 6, z23, z0)) + +/* +** mla_w9p8_z7_z7: +** smlall za\.s\[w9, 8:11\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svint8_t, + svmla_za32_s8_vg4x1 (w9 + 8, z7, z7), + svmla_za32_vg4x1 (w9 + 8, z7, z7)) + +/* +** mla_w11p12_z23_z0: +** smlall za\.s\[w11, 12:15\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svint8_t, + svmla_za32_s8_vg4x1 (w11 + 12, z23, z0), + svmla_za32_vg4x1 (w11 + 12, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** smlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svint8_t, + svmla_za32_s8_vg4x1 (w8 + 14, z23, z0), + svmla_za32_vg4x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svint8_t, + svmla_za32_s8_vg4x1 (w8 + 15, z7, z7), + svmla_za32_vg4x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svint8_t, + svmla_za32_s8_vg4x1 (w8 + 16, z7, z7), + svmla_za32_vg4x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svint8_t, + svmla_za32_s8_vg4x1 (w8 - 1, z16, z0), + svmla_za32_vg4x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svint8_t, + svmla_za32_s8_vg4x1 (w12, z0, z3), + svmla_za32_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..67dd252cc112374815bc5dad756aa01f3fd69482 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x2.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (0, z0, z0), + svmla_za32_vg4x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w0, z0, z0), + svmla_za32_vg4x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z0, z4), + svmla_za32_vg4x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z4, z18), + svmla_za32_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z0, z23), + svmla_za32_vg4x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlall za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z23, z0), + svmla_za32_vg4x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** smlall za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z18, z28), + svmla_za32_vg4x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** smlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svint8x2_t, + svmla_za32_s8_vg4x2 (w8, z28, z4), + svmla_za32_vg4x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w8 + 1, z4, z0), + svmla_za32_vg4x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w8 + 2, z4, z0), + svmla_za32_vg4x2 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** smlall za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w11 + 4, z4, z0), + svmla_za32_vg4x2 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w8 + 7, z4, z0), + svmla_za32_vg4x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint8x2_t, + svmla_za32_s8_vg4x2 (w8 + 8, z4, z4), + svmla_za32_vg4x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint8x2_t, + svmla_za32_s8_vg4x2 (w8 - 1, z4, z0), + svmla_za32_vg4x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (0, z1, z0), + svmla_za32_vg4x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w0, z1, z0), + svmla_za32_vg4x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlall za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8, z1, z0), + svmla_za32_vg4x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8 + 1, z1, z0), + svmla_za32_vg4x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8 + 2, z20, z0), + svmla_za32_vg4x2 (w8 + 2, z20, z0)) + +/* +** mla_single_w11p4_z27_z0: +** smlall za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w11 + 4, z27, z0), + svmla_za32_vg4x2 (w11 + 4, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8 + 7, z1, z0), + svmla_za32_vg4x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8 + 8, z1, z0), + svmla_za32_vg4x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w0 - 1, z1, z0), + svmla_za32_vg4x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8, z0, z15), + svmla_za32_vg4x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlall za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint8x2_t, svint8_t, + svmla_single_za32_s8_vg4x2 (w8, z20, z16), + svmla_za32_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..eb286005aaca7d8a2a0f5a27cd9e113e1f87f5fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_s8_vg4x4.c @@ -0,0 +1,260 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (0, z0, z0), + svmla_za32_vg4x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w0, z0, z0), + svmla_za32_vg4x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z0, z4), + svmla_za32_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z0, z18), + svmla_za32_vg4x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z18, z0), + svmla_za32_vg4x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z0, z23), + svmla_za32_vg4x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z23, z0), + svmla_za32_vg4x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** smlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z4, z28), + svmla_za32_vg4x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** smlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8, z28, z0), + svmla_za32_vg4x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8 + 1, z4, z0), + svmla_za32_vg4x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8 + 2, z4, z0), + svmla_za32_vg4x4 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** smlall za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w11 + 4, z4, z0), + svmla_za32_vg4x4 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8 + 7, z4, z0), + svmla_za32_vg4x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint8x4_t, + svmla_za32_s8_vg4x4 (w8 + 8, z4, z4), + svmla_za32_vg4x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint8x4_t, + svmla_za32_s8_vg4x4 (w8 - 1, z4, z0), + svmla_za32_vg4x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (0, z1, z0), + svmla_za32_vg4x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w0, z1, z0), + svmla_za32_vg4x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlall za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8, z1, z0), + svmla_za32_vg4x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8 + 1, z1, z0), + svmla_za32_vg4x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** smlall za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8 + 4, z20, z0), + svmla_za32_vg4x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8 + 6, z27, z0), + svmla_za32_vg4x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8 + 7, z1, z0), + svmla_za32_vg4x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8 + 8, z1, z0), + svmla_za32_vg4x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w0 - 1, z1, z0), + svmla_za32_vg4x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8, z0, z15), + svmla_za32_vg4x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlall za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint8x4_t, svint8_t, + svmla_single_za32_s8_vg4x4 (w8, z20, z16), + svmla_za32_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..9072787884b2402d3db35006ce91bbe016303443 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svuint16_t, + svmla_za32_u16_vg2x1 (0, z0, z0), + svmla_za32_vg2x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svuint16_t, + svmla_za32_u16_vg2x1 (w0, z0, z3), + svmla_za32_vg2x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svuint16_t, + svmla_za32_u16_vg2x1 (w7, z0, z3), + svmla_za32_vg2x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** umlal za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svuint16_t, + svmla_za32_u16_vg2x1 (w8, z7, z3), + svmla_za32_vg2x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlal za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svuint16_t, + svmla_za32_u16_vg2x1 (w8, z31, z16), + svmla_za32_vg2x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 1, z0, z0), + svmla_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mla_w8p2_z23_z0: +** umlal za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p2_z23_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 2, z23, z0), + svmla_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** umlal za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w11 + 6, z23, z0), + svmla_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mla_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p7_z7_z7, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 7, z7, z7), + svmla_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mla_w11p10_z23_z0: +** umlal za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p10_z23_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w11 + 10, z23, z0), + svmla_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** umlal za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 14, z23, z0), + svmla_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 15, z7, z7), + svmla_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlal za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svuint16_t, + svmla_za32_u16_vg2x1 (w8 + 16, z7, z7), + svmla_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svuint16_t, + svmla_za32_u16_vg2x1 (w8 - 1, z16, z0), + svmla_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlal za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svuint16_t, + svmla_za32_u16_vg2x1 (w12, z0, z3), + svmla_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8f44f1856acb2237e29408494e17d3366e0c6aca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (0, z0, z0), + svmla_za32_vg2x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w0, z0, z0), + svmla_za32_vg2x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z0, z4), + svmla_za32_vg2x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** umlal za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z4, z18), + svmla_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z0, z23), + svmla_za32_vg2x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlal za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z23, z0), + svmla_za32_vg2x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** umlal za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z18, z28), + svmla_za32_vg2x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** umlal za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8, z28, z4), + svmla_za32_vg2x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 + 1, z4, z0), + svmla_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** umlal za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 + 2, z4, z0), + svmla_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** umlal za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 + 6, z4, z0), + svmla_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 + 7, z4, z0), + svmla_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 + 8, z4, z4), + svmla_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x2_t, + svmla_za32_u16_vg2x2 (w8 - 1, z4, z0), + svmla_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (0, z1, z0), + svmla_za32_vg2x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w0, z1, z0), + svmla_za32_vg2x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlal za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8, z1, z0), + svmla_za32_vg2x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8 + 1, z1, z0), + svmla_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** umlal za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8 + 4, z20, z0), + svmla_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** umlal za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8 + 6, z27, z0), + svmla_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8 + 7, z1, z0), + svmla_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8 + 8, z1, z0), + svmla_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlal za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w0 - 1, z1, z0), + svmla_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlal za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8, z0, z15), + svmla_za32_vg2x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlal za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svmla_single_za32_u16_vg2x2 (w8, z20, z16), + svmla_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b87569a998f3c46b6398bef582a4116cb9f39c5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (0, z0, z0), + svmla_za32_vg2x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w0, z0, z0), + svmla_za32_vg2x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z0, z4), + svmla_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z0, z18), + svmla_za32_vg2x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z18, z0), + svmla_za32_vg2x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z0, z23), + svmla_za32_vg2x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlal za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z23, z0), + svmla_za32_vg2x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** umlal za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z4, z28), + svmla_za32_vg2x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** umlal za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8, z28, z0), + svmla_za32_vg2x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 + 1, z4, z0), + svmla_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** umlal za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 + 2, z4, z0), + svmla_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mla_w8p6_z4_z0: +** umlal za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p6_z4_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 + 6, z4, z0), + svmla_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 + 7, z4, z0), + svmla_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 + 8, z4, z4), + svmla_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x4_t, + svmla_za32_u16_vg2x4 (w8 - 1, z4, z0), + svmla_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (0, z1, z0), + svmla_za32_vg2x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w0, z1, z0), + svmla_za32_vg2x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlal za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8, z1, z0), + svmla_za32_vg2x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8 + 1, z1, z0), + svmla_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** umlal za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8 + 4, z20, z0), + svmla_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** umlal za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8 + 6, z27, z0), + svmla_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8 + 7, z1, z0), + svmla_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8 + 8, z1, z0), + svmla_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlal za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w0 - 1, z1, z0), + svmla_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlal za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8, z0, z15), + svmla_za32_vg2x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlal za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svmla_single_za32_u16_vg2x4 (w8, z20, z16), + svmla_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..99d75c9b967d4a1dc3515f6d2c5e9af6fc57b830 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x1.c @@ -0,0 +1,149 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svuint8_t, + svmla_za32_u8_vg4x1 (0, z0, z0), + svmla_za32_vg4x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svuint8_t, + svmla_za32_u8_vg4x1 (w0, z0, z3), + svmla_za32_vg4x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svuint8_t, + svmla_za32_u8_vg4x1 (w7, z0, z3), + svmla_za32_vg4x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** umlall za\.s\[w8, 0:3\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svuint8_t, + svmla_za32_u8_vg4x1 (w8, z7, z3), + svmla_za32_vg4x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlall za\.s\[w8, 0:3\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svuint8_t, + svmla_za32_u8_vg4x1 (w8, z31, z16), + svmla_za32_vg4x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w8 + 1, z0, z0), + svmla_za32_vg4x1 (w8 + 1, z0, z0)) + +/* +** mla_w10p4_z23_z0: +** umlall za\.s\[w10, 4:7\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w10 + 4, z23, z0), + svmla_za32_vg4x1 (w10 + 4, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w11 + 6, z23, z0), + svmla_za32_vg4x1 (w11 + 6, z23, z0)) + +/* +** mla_w9p8_z7_z7: +** umlall za\.s\[w9, 8:11\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svuint8_t, + svmla_za32_u8_vg4x1 (w9 + 8, z7, z7), + svmla_za32_vg4x1 (w9 + 8, z7, z7)) + +/* +** mla_w11p12_z23_z0: +** umlall za\.s\[w11, 12:15\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w11 + 12, z23, z0), + svmla_za32_vg4x1 (w11 + 12, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** umlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w8 + 14, z23, z0), + svmla_za32_vg4x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svuint8_t, + svmla_za32_u8_vg4x1 (w8 + 15, z7, z7), + svmla_za32_vg4x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svuint8_t, + svmla_za32_u8_vg4x1 (w8 + 16, z7, z7), + svmla_za32_vg4x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svuint8_t, + svmla_za32_u8_vg4x1 (w8 - 1, z16, z0), + svmla_za32_vg4x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svuint8_t, + svmla_za32_u8_vg4x1 (w12, z0, z3), + svmla_za32_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ae91dfbbf5b6821125cfc8ca5f86b9f4e8677296 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x2.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (0, z0, z0), + svmla_za32_vg4x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w0, z0, z0), + svmla_za32_vg4x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z0, z4), + svmla_za32_vg4x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z4, z18), + svmla_za32_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z0, z23), + svmla_za32_vg4x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlall za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z23, z0), + svmla_za32_vg4x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** umlall za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z18, z28), + svmla_za32_vg4x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** umlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8, z28, z4), + svmla_za32_vg4x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8 + 1, z4, z0), + svmla_za32_vg4x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8 + 2, z4, z0), + svmla_za32_vg4x2 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** umlall za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w11 + 4, z4, z0), + svmla_za32_vg4x2 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8 + 7, z4, z0), + svmla_za32_vg4x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8 + 8, z4, z4), + svmla_za32_vg4x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint8x2_t, + svmla_za32_u8_vg4x2 (w8 - 1, z4, z0), + svmla_za32_vg4x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (0, z1, z0), + svmla_za32_vg4x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w0, z1, z0), + svmla_za32_vg4x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlall za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8, z1, z0), + svmla_za32_vg4x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8 + 1, z1, z0), + svmla_za32_vg4x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8 + 2, z20, z0), + svmla_za32_vg4x2 (w8 + 2, z20, z0)) + +/* +** mla_single_w11p4_z27_z0: +** umlall za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w11 + 4, z27, z0), + svmla_za32_vg4x2 (w11 + 4, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8 + 7, z1, z0), + svmla_za32_vg4x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8 + 8, z1, z0), + svmla_za32_vg4x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w0 - 1, z1, z0), + svmla_za32_vg4x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8, z0, z15), + svmla_za32_vg4x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlall za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint8x2_t, svuint8_t, + svmla_single_za32_u8_vg4x2 (w8, z20, z16), + svmla_za32_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1dde7e5a57e09feb4e39c124d06238a4d9f2df9e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_u8_vg4x4.c @@ -0,0 +1,260 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (0, z0, z0), + svmla_za32_vg4x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w0, z0, z0), + svmla_za32_vg4x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z0, z4), + svmla_za32_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z0, z18), + svmla_za32_vg4x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z18, z0), + svmla_za32_vg4x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z0, z23), + svmla_za32_vg4x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z23, z0), + svmla_za32_vg4x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** umlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z4, z28), + svmla_za32_vg4x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** umlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8, z28, z0), + svmla_za32_vg4x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8 + 1, z4, z0), + svmla_za32_vg4x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8 + 2, z4, z0), + svmla_za32_vg4x4 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** umlall za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w11 + 4, z4, z0), + svmla_za32_vg4x4 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8 + 7, z4, z0), + svmla_za32_vg4x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8 + 8, z4, z4), + svmla_za32_vg4x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint8x4_t, + svmla_za32_u8_vg4x4 (w8 - 1, z4, z0), + svmla_za32_vg4x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (0, z1, z0), + svmla_za32_vg4x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w0, z1, z0), + svmla_za32_vg4x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlall za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8, z1, z0), + svmla_za32_vg4x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8 + 1, z1, z0), + svmla_za32_vg4x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** umlall za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8 + 4, z20, z0), + svmla_za32_vg4x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8 + 6, z27, z0), + svmla_za32_vg4x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8 + 7, z1, z0), + svmla_za32_vg4x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8 + 8, z1, z0), + svmla_za32_vg4x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w0 - 1, z1, z0), + svmla_za32_vg4x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8, z0, z15), + svmla_za32_vg4x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlall za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint8x4_t, svuint8_t, + svmla_single_za32_u8_vg4x4 (w8, z20, z16), + svmla_za32_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..be8b0a79b224c18d74f47a92d33d47bef94e6dc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat64x2_t, + svmla_za64_f64_vg1x2 (0, z0, z0), + svmla_za64_vg1x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w0, z0, z0), + svmla_za64_vg1x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8, z0, z4), + svmla_za64_vg1x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8, z4, z18), + svmla_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z23_z0: +** ... +** fmla za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8, z23, z0), + svmla_za64_vg1x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z23: +** ... +** fmla za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z23, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8, z18, z23), + svmla_za64_vg1x2 (w8, z18, z23)) + +/* +** mla_w8_z4_z28: +** fmla za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8, z4, z28), + svmla_za64_vg1x2 (w8, z4, z28)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8 + 7, z4, z0), + svmla_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8 + 8, z4, z4), + svmla_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat64x2_t, + svmla_za64_f64_vg1x2 (w8 - 1, z4, z0), + svmla_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (0, z1, z0), + svmla_za64_vg1x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w0, z1, z0), + svmla_za64_vg1x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w8, z1, z0), + svmla_za64_vg1x2 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w8 + 7, z1, z0), + svmla_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w8 + 8, z1, z0), + svmla_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w0 - 1, z1, z0), + svmla_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w8, z0, z15), + svmla_za64_vg1x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat64x2_t, svfloat64_t, + svmla_single_za64_f64_vg1x2 (w8, z20, z16), + svmla_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..49621e6fffacbe868c3aba55471ca4d074b96e35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_f64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svfloat64x4_t, + svmla_za64_f64_vg1x4 (0, z0, z0), + svmla_za64_vg1x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w0, z0, z0), + svmla_za64_vg1x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8, z0, z4), + svmla_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8, z0, z18), + svmla_za64_vg1x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z28: +** ... +** fmla za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8, z18, z28), + svmla_za64_vg1x4 (w8, z18, z28)) + +/* +** mla_w8_z28_z23: +** ... +** fmla za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z23, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8, z28, z23), + svmla_za64_vg1x4 (w8, z28, z23)) + +/* +** mla_w8p7_z4_z0: +** fmla za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8 + 7, z4, z0), + svmla_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8 + 8, z4, z4), + svmla_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmla za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svfloat64x4_t, + svmla_za64_f64_vg1x4 (w8 - 1, z4, z0), + svmla_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (0, z1, z0), + svmla_za64_vg1x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w0, z1, z0), + svmla_za64_vg1x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** fmla za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w8, z1, z0), + svmla_za64_vg1x4 (w8, z1, z0)) + +/* +** mla_single_w8p7_z1_z0: +** fmla za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w8 + 7, z1, z0), + svmla_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w8 + 8, z1, z0), + svmla_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmla za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w0 - 1, z1, z0), + svmla_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmla za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w8, z0, z15), + svmla_za64_vg1x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmla za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat64x4_t, svfloat64_t, + svmla_single_za64_f64_vg1x4 (w8, z20, z16), + svmla_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..92cad0ca5ed4d6cea96f26ead543266d9beb838c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x1.c @@ -0,0 +1,151 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svint16_t, + svmla_za64_s16_vg4x1 (0, z0, z0), + svmla_za64_vg4x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svint16_t, + svmla_za64_s16_vg4x1 (w0, z0, z3), + svmla_za64_vg4x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svint16_t, + svmla_za64_s16_vg4x1 (w7, z0, z3), + svmla_za64_vg4x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** smlall za\.d\[w8, 0:3\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svint16_t, + svmla_za64_s16_vg4x1 (w8, z7, z3), + svmla_za64_vg4x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlall za\.d\[w8, 0:3\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svint16_t, + svmla_za64_s16_vg4x1 (w8, z31, z16), + svmla_za64_vg4x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svint16_t, + svmla_za64_s16_vg4x1 (w8 + 1, z0, z0), + svmla_za64_vg4x1 (w8 + 1, z0, z0)) + +/* +** mla_w10p4_z23_z0: +** smlall za\.d\[w10, 4:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svint16_t, + svmla_za64_s16_vg4x1 (w10 + 4, z23, z0), + svmla_za64_vg4x1 (w10 + 4, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svint16_t, + svmla_za64_s16_vg4x1 (w11 + 6, z23, z0), + svmla_za64_vg4x1 (w11 + 6, z23, z0)) + +/* +** mla_w9p8_z7_z7: +** smlall za\.d\[w9, 8:11\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svint16_t, + svmla_za64_s16_vg4x1 (w9 + 8, z7, z7), + svmla_za64_vg4x1 (w9 + 8, z7, z7)) + +/* +** mla_w11p12_z23_z0: +** smlall za\.d\[w11, 12:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svint16_t, + svmla_za64_s16_vg4x1 (w11 + 12, z23, z0), + svmla_za64_vg4x1 (w11 + 12, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** smlall za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svint16_t, + svmla_za64_s16_vg4x1 (w8 + 14, z23, z0), + svmla_za64_vg4x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svint16_t, + svmla_za64_s16_vg4x1 (w8 + 15, z7, z7), + svmla_za64_vg4x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlall za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svint16_t, + svmla_za64_s16_vg4x1 (w8 + 16, z7, z7), + svmla_za64_vg4x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svint16_t, + svmla_za64_s16_vg4x1 (w8 - 1, z16, z0), + svmla_za64_vg4x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svint16_t, + svmla_za64_s16_vg4x1 (w12, z0, z3), + svmla_za64_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2299cf8ba876892b13768700717463e5747b1b97 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x2.c @@ -0,0 +1,251 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (0, z0, z0), + svmla_za64_vg4x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w0, z0, z0), + svmla_za64_vg4x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z0, z4), + svmla_za64_vg4x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z4, z18), + svmla_za64_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z0, z23), + svmla_za64_vg4x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlall za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z23, z0), + svmla_za64_vg4x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** smlall za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z18, z28), + svmla_za64_vg4x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** smlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svint16x2_t, + svmla_za64_s16_vg4x2 (w8, z28, z4), + svmla_za64_vg4x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w8 + 1, z4, z0), + svmla_za64_vg4x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w8 + 2, z4, z0), + svmla_za64_vg4x2 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** smlall za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w11 + 4, z4, z0), + svmla_za64_vg4x2 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w8 + 7, z4, z0), + svmla_za64_vg4x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint16x2_t, + svmla_za64_s16_vg4x2 (w8 + 8, z4, z4), + svmla_za64_vg4x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint16x2_t, + svmla_za64_s16_vg4x2 (w8 - 1, z4, z0), + svmla_za64_vg4x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (0, z1, z0), + svmla_za64_vg4x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w0, z1, z0), + svmla_za64_vg4x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlall za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8, z1, z0), + svmla_za64_vg4x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8 + 1, z1, z0), + svmla_za64_vg4x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8 + 2, z20, z0), + svmla_za64_vg4x2 (w8 + 2, z20, z0)) + +/* +** mla_single_w11p4_z27_z0: +** smlall za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w11 + 4, z27, z0), + svmla_za64_vg4x2 (w11 + 4, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8 + 7, z1, z0), + svmla_za64_vg4x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8 + 8, z1, z0), + svmla_za64_vg4x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w0 - 1, z1, z0), + svmla_za64_vg4x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8, z0, z15), + svmla_za64_vg4x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlall za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x2_t, svint16_t, + svmla_single_za64_s16_vg4x2 (w8, z20, z16), + svmla_za64_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3a1780ae294a5e88d2ab1ddf8b49565564ffd6f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_s16_vg4x4.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (0, z0, z0), + svmla_za64_vg4x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w0, z0, z0), + svmla_za64_vg4x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z0, z4), + svmla_za64_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z0, z18), + svmla_za64_vg4x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z18, z0), + svmla_za64_vg4x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z0, z23), + svmla_za64_vg4x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** smlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z23, z0), + svmla_za64_vg4x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** smlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z4, z28), + svmla_za64_vg4x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** smlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8, z28, z0), + svmla_za64_vg4x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8 + 1, z4, z0), + svmla_za64_vg4x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8 + 2, z4, z0), + svmla_za64_vg4x4 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** smlall za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w11 + 4, z4, z0), + svmla_za64_vg4x4 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8 + 7, z4, z0), + svmla_za64_vg4x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svint16x4_t, + svmla_za64_s16_vg4x4 (w8 + 8, z4, z4), + svmla_za64_vg4x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svint16x4_t, + svmla_za64_s16_vg4x4 (w8 - 1, z4, z0), + svmla_za64_vg4x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (0, z1, z0), + svmla_za64_vg4x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w0, z1, z0), + svmla_za64_vg4x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** smlall za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8, z1, z0), + svmla_za64_vg4x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8 + 1, z1, z0), + svmla_za64_vg4x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** smlall za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8 + 4, z20, z0), + svmla_za64_vg4x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** smlall za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8 + 6, z27, z0), + svmla_za64_vg4x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8 + 7, z1, z0), + svmla_za64_vg4x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8 + 8, z1, z0), + svmla_za64_vg4x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w0 - 1, z1, z0), + svmla_za64_vg4x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8, z0, z15), + svmla_za64_vg4x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlall za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svint16x4_t, svint16_t, + svmla_single_za64_s16_vg4x4 (w8, z20, z16), + svmla_za64_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..cd50db46048163458facca0abb27db7f8924ae1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x1.c @@ -0,0 +1,151 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svuint16_t, + svmla_za64_u16_vg4x1 (0, z0, z0), + svmla_za64_vg4x1 (0, z0, z0)) + +/* +** mla_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svuint16_t, + svmla_za64_u16_vg4x1 (w0, z0, z3), + svmla_za64_vg4x1 (w0, z0, z3)) + +/* +** mla_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svuint16_t, + svmla_za64_u16_vg4x1 (w7, z0, z3), + svmla_za64_vg4x1 (w7, z0, z3)) + +/* +** mla_w8_z7_z3: +** umlall za\.d\[w8, 0:3\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svuint16_t, + svmla_za64_u16_vg4x1 (w8, z7, z3), + svmla_za64_vg4x1 (w8, z7, z3)) + +/* +** mla_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlall za\.d\[w8, 0:3\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svuint16_t, + svmla_za64_u16_vg4x1 (w8, z31, z16), + svmla_za64_vg4x1 (w8, z31, z16)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w8 + 1, z0, z0), + svmla_za64_vg4x1 (w8 + 1, z0, z0)) + +/* +** mla_w10p4_z23_z0: +** umlall za\.d\[w10, 4:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w10 + 4, z23, z0), + svmla_za64_vg4x1 (w10 + 4, z23, z0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w11 + 6, z23, z0), + svmla_za64_vg4x1 (w11 + 6, z23, z0)) + +/* +** mla_w9p8_z7_z7: +** umlall za\.d\[w9, 8:11\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svuint16_t, + svmla_za64_u16_vg4x1 (w9 + 8, z7, z7), + svmla_za64_vg4x1 (w9 + 8, z7, z7)) + +/* +** mla_w11p12_z23_z0: +** umlall za\.d\[w11, 12:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w11 + 12, z23, z0), + svmla_za64_vg4x1 (w11 + 12, z23, z0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** umlall za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w8 + 14, z23, z0), + svmla_za64_vg4x1 (w8 + 14, z23, z0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svuint16_t, + svmla_za64_u16_vg4x1 (w8 + 15, z7, z7), + svmla_za64_vg4x1 (w8 + 15, z7, z7)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlall za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svuint16_t, + svmla_za64_u16_vg4x1 (w8 + 16, z7, z7), + svmla_za64_vg4x1 (w8 + 16, z7, z7)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svuint16_t, + svmla_za64_u16_vg4x1 (w8 - 1, z16, z0), + svmla_za64_vg4x1 (w8 - 1, z16, z0)) + +/* +** mla_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlall za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svuint16_t, + svmla_za64_u16_vg4x1 (w12, z0, z3), + svmla_za64_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..680ef5651da82fc98bb65321313c2d510b1b1423 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x2.c @@ -0,0 +1,251 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (0, z0, z0), + svmla_za64_vg4x2 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w0, z0, z0), + svmla_za64_vg4x2 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z0, z4), + svmla_za64_vg4x2 (w8, z0, z4)) + +/* +** mla_w8_z4_z18: +** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z4, z18), + svmla_za64_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** ... +** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z0, z23), + svmla_za64_vg4x2 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlall za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z23, z0), + svmla_za64_vg4x2 (w8, z23, z0)) + +/* +** mla_w8_z18_z28: +** umlall za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z18, z28), + svmla_za64_vg4x2 (w8, z18, z28)) + +/* +** mla_w8_z28_z4: +** umlall za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8, z28, z4), + svmla_za64_vg4x2 (w8, z28, z4)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8 + 1, z4, z0), + svmla_za64_vg4x2 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8 + 2, z4, z0), + svmla_za64_vg4x2 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** umlall za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w11 + 4, z4, z0), + svmla_za64_vg4x2 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8 + 7, z4, z0), + svmla_za64_vg4x2 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8 + 8, z4, z4), + svmla_za64_vg4x2 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x2_t, + svmla_za64_u16_vg4x2 (w8 - 1, z4, z0), + svmla_za64_vg4x2 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (0, z1, z0), + svmla_za64_vg4x2 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w0, z1, z0), + svmla_za64_vg4x2 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlall za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8, z1, z0), + svmla_za64_vg4x2 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8 + 1, z1, z0), + svmla_za64_vg4x2 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8 + 2, z20, z0), + svmla_za64_vg4x2 (w8 + 2, z20, z0)) + +/* +** mla_single_w11p4_z27_z0: +** umlall za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w11 + 4, z27, z0), + svmla_za64_vg4x2 (w11 + 4, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8 + 7, z1, z0), + svmla_za64_vg4x2 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8 + 8, z1, z0), + svmla_za64_vg4x2 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w0 - 1, z1, z0), + svmla_za64_vg4x2 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlall za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8, z0, z15), + svmla_za64_vg4x2 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlall za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svmla_single_za64_u16_vg4x2 (w8, z20, z16), + svmla_za64_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..47ae89f08be3af6c796178785837f5c370f9925d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za64_u16_vg4x4.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mla_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (0, z0, z0), + svmla_za64_vg4x4 (0, z0, z0)) + +/* +** mla_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w0, z0, z0), + svmla_za64_vg4x4 (w0, z0, z0)) + +/* +** mla_w8_z0_z4: +** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z0, z4), + svmla_za64_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** ... +** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z0, z18), + svmla_za64_vg4x4 (w8, z0, z18)) + +/* +** mla_w8_z18_z0: +** ... +** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z18, z0), + svmla_za64_vg4x4 (w8, z18, z0)) + +/* +** mla_w8_z0_z23: +** ... +** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z0, z23), + svmla_za64_vg4x4 (w8, z0, z23)) + +/* +** mla_w8_z23_z0: +** ... +** umlall za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z23, z0), + svmla_za64_vg4x4 (w8, z23, z0)) + +/* +** mla_w8_z4_z28: +** umlall za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z4, z28), + svmla_za64_vg4x4 (w8, z4, z28)) + +/* +** mla_w8_z28_z0: +** umlall za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8, z28, z0), + svmla_za64_vg4x4 (w8, z28, z0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8 + 1, z4, z0), + svmla_za64_vg4x4 (w8 + 1, z4, z0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8 + 2, z4, z0), + svmla_za64_vg4x4 (w8 + 2, z4, z0)) + +/* +** mla_w11p4_z4_z0: +** umlall za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w11 + 4, z4, z0), + svmla_za64_vg4x4 (w11 + 4, z4, z0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8 + 7, z4, z0), + svmla_za64_vg4x4 (w8 + 7, z4, z0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8 + 8, z4, z4), + svmla_za64_vg4x4 (w8 + 8, z4, z4)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svuint16x4_t, + svmla_za64_u16_vg4x4 (w8 - 1, z4, z0), + svmla_za64_vg4x4 (w8 - 1, z4, z0)) + +/* +** mla_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (0, z1, z0), + svmla_za64_vg4x4 (0, z1, z0)) + +/* +** mla_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w0, z1, z0), + svmla_za64_vg4x4 (w0, z1, z0)) + +/* +** mla_single_w8_z1_z0: +** umlall za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8, z1, z0), + svmla_za64_vg4x4 (w8, z1, z0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8 + 1, z1, z0), + svmla_za64_vg4x4 (w8 + 1, z1, z0)) + +/* +** mla_single_w8p4_z20_z0: +** umlall za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8 + 4, z20, z0), + svmla_za64_vg4x4 (w8 + 4, z20, z0)) + +/* +** mla_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** umlall za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8 + 6, z27, z0), + svmla_za64_vg4x4 (w8 + 6, z27, z0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8 + 7, z1, z0), + svmla_za64_vg4x4 (w8 + 7, z1, z0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8 + 8, z1, z0), + svmla_za64_vg4x4 (w8 + 8, z1, z0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlall za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w0 - 1, z1, z0), + svmla_za64_vg4x4 (w0 - 1, z1, z0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlall za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8, z0, z15), + svmla_za64_vg4x4 (w8, z0, z15)) + +/* +** mla_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlall za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svmla_single_za64_u16_vg4x4 (w8, z20, z16), + svmla_za64_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..55c96206fb9bb8b5ac63d9134ac851e328fb59fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (0, z0, z0, 0), + svmls_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w0, z0, z3, 1), + svmls_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w7, z0, z3, 2), + svmls_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** bfmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8, z7, z3, 3), + svmls_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** bfmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8, z31, z16, 4), + svmls_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** bfmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p6_z23_z0_7: +** bfmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w11 + 6, z23, z0, 7), + svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 7, z7, z7, 0), + svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p10_z23_z0_1: +** bfmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w11 + 10, z23, z0, 1), + svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** bfmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 14, z23, z0, 2), + svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 15, z7, z7, 3), + svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 + 16, z7, z7, 4), + svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w8 - 1, z16, z0, 5), + svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svbfloat16_t, + svmls_lane_za32_bf16_vg2x1 (w12, z0, z3, 6), + svmls_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..26b786dc36364995a0d80e5ba15faab0c320c985 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (0, z0, z4, 0), + svmls_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w0, z0, z7, 1), + svmls_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8, z28, z4, 2), + svmls_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** bfmlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8, z4, z15, 6), + svmls_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8, z28, z16, 7), + svmls_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8, z17, z7, 0), + svmls_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x2 (w8, z22, z4, 1), + svmls_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d958899491bf35b0b7dfe8f7857b23513dbe4a47 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_bf16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (0, z0, z4, 0), + svmls_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w0, z0, z7, 1), + svmls_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8, z28, z4, 2), + svmls_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** bfmlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8, z4, z15, 6), + svmls_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8, z28, z16, 7), + svmls_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8, z17, z7, 0), + svmls_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svbfloat16x4_t, svbfloat16_t, + svmls_lane_za32_bf16_vg2x4 (w8, z22, z4, 1), + svmls_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..c211a38aba506387a422724f8c939e103f60988d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (0, z0, z0, 0), + svmls_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w0, z0, z3, 1), + svmls_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w7, z0, z3, 2), + svmls_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** fmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8, z7, z3, 3), + svmls_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** fmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8, z31, z16, 4), + svmls_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** fmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p6_z23_z0_7: +** fmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w11 + 6, z23, z0, 7), + svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 7, z7, z7, 0), + svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p10_z23_z0_1: +** fmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w11 + 10, z23, z0, 1), + svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** fmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 14, z23, z0, 2), + svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 15, z7, z7, 3), + svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 + 16, z7, z7, 4), + svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w8 - 1, z16, z0, 5), + svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svfloat16_t, + svmls_lane_za32_f16_vg2x1 (w12, z0, z3, 6), + svmls_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..27e1bdd54865dcd4d953f401c6183ef3046c31a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (0, z0, z4, 0), + svmls_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w0, z0, z7, 1), + svmls_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8, z28, z4, 2), + svmls_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** fmlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8, z4, z15, 6), + svmls_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8, z28, z16, 7), + svmls_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8, z17, z7, 0), + svmls_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svmls_lane_za32_f16_vg2x2 (w8, z22, z4, 1), + svmls_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a78e7108ee8a375a6e142f8897a7918190d3dd2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (0, z0, z4, 0), + svmls_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w0, z0, z7, 1), + svmls_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8, z28, z4, 2), + svmls_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** fmlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** fmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8, z4, z15, 6), + svmls_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8, z28, z16, 7), + svmls_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8, z17, z7, 0), + svmls_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t, + svmls_lane_za32_f16_vg2x4 (w8, z22, z4, 1), + svmls_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ce7db44ddbce1cf0f7d31e3ee707f11196de1adc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (0, z0, z4, 0), + svmls_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z7\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w0, z0, z7, 1), + svmls_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, z4\.s\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8, z28, z4, 2), + svmls_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s}, z4\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8 + 8, z0, z4, 0), + svmls_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** mls_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w0 - 1, z0, z4, 1), + svmls_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** mls_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, z15\.s\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8, z4, z15, 2), + svmls_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmls za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s}, \1\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8, z28, z16, 3), + svmls_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.s\[w8, 0, vgx2\], [^\n]+, z7\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8, z17, z7, 0), + svmls_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** fmls za\.s\[w8, 0, vgx2\], {z22\.s - z23\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat32x2_t, svfloat32_t, + svmls_lane_za32_f32_vg1x2 (w8, z22, z4, 1), + svmls_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b642c1a13a08deee5da2394a67f0507482d34f20 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_f32_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (0, z0, z4, 0), + svmls_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z7\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w0, z0, z7, 1), + svmls_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, z4\.s\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8, z28, z4, 2), + svmls_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s}, z4\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8 + 8, z0, z4, 0), + svmls_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** mls_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w0 - 1, z0, z4, 1), + svmls_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** mls_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmls za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s}, z15\.s\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8, z4, z15, 2), + svmls_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, \1\.s\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8, z28, z16, 3), + svmls_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.s\[w8, 0, vgx4\], [^\n]+, z7\.s\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8, z17, z7, 0), + svmls_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.s\[w8, 0, vgx4\], [^\n]+, z4\.s\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat32x4_t, svfloat32_t, + svmls_lane_za32_f32_vg1x4 (w8, z22, z4, 1), + svmls_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..66a0d39c437ea0e70615e3f554824c1b3daa5b57 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint16_t, + svmls_lane_za32_s16_vg2x1 (0, z0, z0, 0), + svmls_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint16_t, + svmls_lane_za32_s16_vg2x1 (w0, z0, z3, 1), + svmls_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint16_t, + svmls_lane_za32_s16_vg2x1 (w7, z0, z3, 2), + svmls_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** smlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8, z7, z3, 3), + svmls_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8, z31, z16, 4), + svmls_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** smlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p6_z23_z0_7: +** smlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svint16_t, + svmls_lane_za32_s16_vg2x1 (w11 + 6, z23, z0, 7), + svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 7, z7, z7, 0), + svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p10_z23_z0_1: +** smlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svint16_t, + svmls_lane_za32_s16_vg2x1 (w11 + 10, z23, z0, 1), + svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** smlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 14, z23, z0, 2), + svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 15, z7, z7, 3), + svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 + 16, z7, z7, 4), + svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svint16_t, + svmls_lane_za32_s16_vg2x1 (w8 - 1, z16, z0, 5), + svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svint16_t, + svmls_lane_za32_s16_vg2x1 (w12, z0, z3, 6), + svmls_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f4a08b1748f1988185d5b8f25f36906304c623cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (0, z0, z4, 0), + svmls_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w0, z0, z7, 1), + svmls_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8, z28, z4, 2), + svmls_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** smlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** smlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8, z4, z15, 6), + svmls_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8, z28, z16, 7), + svmls_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** smlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8, z17, z7, 0), + svmls_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** smlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svint16x2_t, svint16_t, + svmls_lane_za32_s16_vg2x2 (w8, z22, z4, 1), + svmls_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3e39ddb0ce08f3b5617f7d48e12bf7fb0cd79840 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (0, z0, z4, 0), + svmls_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w0, z0, z7, 1), + svmls_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8, z28, z4, 2), + svmls_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** smlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** smlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8, z4, z15, 6), + svmls_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8, z28, z16, 7), + svmls_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8, z17, z7, 0), + svmls_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svint16x4_t, svint16_t, + svmls_lane_za32_s16_vg2x4 (w8, z22, z4, 1), + svmls_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..ad6e418f9ce0bffeb04a22dbf285fa14769a21da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x1.c @@ -0,0 +1,150 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint8_t, + svmls_lane_za32_s8_vg4x1 (0, z0, z0, 0), + svmls_lane_za32_vg4x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint8_t, + svmls_lane_za32_s8_vg4x1 (w0, z0, z3, 1), + svmls_lane_za32_vg4x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint8_t, + svmls_lane_za32_s8_vg4x1 (w7, z0, z3, 2), + svmls_lane_za32_vg4x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** smlsll za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8, z7, z3, 3), + svmls_lane_za32_vg4x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlsll za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8, z31, z16, 4), + svmls_lane_za32_vg4x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p4_z23_z0_7: +** smlsll za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svint8_t, + svmls_lane_za32_s8_vg4x1 (w11 + 4, z23, z0, 7), + svmls_lane_za32_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_8, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 7, z7, z7, 8), + svmls_lane_za32_vg4x1 (w8 + 7, z7, z7, 8)) + +/* +** mls_lane_w11p12_z23_z0_9: +** smlsll za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p12_z23_z0_9, svint8_t, + svmls_lane_za32_s8_vg4x1 (w11 + 12, z23, z0, 9), + svmls_lane_za32_vg4x1 (w11 + 12, z23, z0, 9)) + +/* +** mls_lane_w8p14_z23_z0_10: +** add (w8|w9|w10|w11), w8, #?14 +** smlsll za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_10, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 14, z23, z0, 10), + svmls_lane_za32_vg4x1 (w8 + 14, z23, z0, 10)) + +/* +** mls_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_11, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 15, z7, z7, 11), + svmls_lane_za32_vg4x1 (w8 + 15, z7, z7, 11)) + +/* +** mls_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_12, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 + 16, z7, z7, 12), + svmls_lane_za32_vg4x1 (w8 + 16, z7, z7, 12)) + +/* +** mls_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_13, svint8_t, + svmls_lane_za32_s8_vg4x1 (w8 - 1, z16, z0, 13), + svmls_lane_za32_vg4x1 (w8 - 1, z16, z0, 13)) + +/* +** mls_lane_w12_z0_z3_15: +** mov (w8|w9|w10|w11), w12 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_15, svint8_t, + svmls_lane_za32_s8_vg4x1 (w12, z0, z3, 15), + svmls_lane_za32_vg4x1 (w12, z0, z3, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a00e0de60f5b08132b179fbbda31d8ce3afe7290 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (0, z0, z4, 0), + svmls_lane_za32_vg4x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w0, z0, z7, 1), + svmls_lane_za32_vg4x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8, z28, z4, 2), + svmls_lane_za32_vg4x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** smlsll za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w11 + 4, z0, z4, 3), + svmls_lane_za32_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8 + 6, z0, z4, 4), + svmls_lane_za32_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8 + 7, z0, z4, 5), + svmls_lane_za32_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_7, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8 + 8, z0, z4, 7), + svmls_lane_za32_vg4x2 (w8 + 8, z0, z4, 7)) + +/* +** mls_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_9, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w0 - 1, z0, z4, 9), + svmls_lane_za32_vg4x2 (w0 - 1, z0, z4, 9)) + +/* +** mls_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_10, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8, z4, z15, 10), + svmls_lane_za32_vg4x2 (w8, z4, z15, 10)) + +/* +** mls_lane_w8_z28_z16_11: +** mov (z[0-7]).d, z16.d +** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_11, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8, z28, z16, 11), + svmls_lane_za32_vg4x2 (w8, z28, z16, 11)) + +/* +** mls_lane_w8_z17_z7_13: +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_13, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8, z17, z7, 13), + svmls_lane_za32_vg4x2 (w8, z17, z7, 13)) + +/* +** mls_lane_w8_z22_z4_15: +** smlsll za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svint8x2_t, svint8_t, + svmls_lane_za32_s8_vg4x2 (w8, z22, z4, 15), + svmls_lane_za32_vg4x2 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..53e9b34c70a7fc07305fc22eb54bca5cb6d359b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_s8_vg4x4.c @@ -0,0 +1,128 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (0, z0, z4, 0), + svmls_lane_za32_vg4x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w0, z0, z7, 1), + svmls_lane_za32_vg4x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8, z28, z4, 2), + svmls_lane_za32_vg4x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_7: +** smlsll za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_7, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w11 + 4, z0, z4, 7), + svmls_lane_za32_vg4x4 (w11 + 4, z0, z4, 7)) + +/* +** mls_lane_w8p6_z0_z4_8: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_8, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8 + 6, z0, z4, 8), + svmls_lane_za32_vg4x4 (w8 + 6, z0, z4, 8)) + +/* +** mls_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_9, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8 + 7, z0, z4, 9), + svmls_lane_za32_vg4x4 (w8 + 7, z0, z4, 9)) + +/* +** mls_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_10, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8 + 8, z0, z4, 10), + svmls_lane_za32_vg4x4 (w8 + 8, z0, z4, 10)) + +/* +** mls_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_11, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w0 - 1, z0, z4, 11), + svmls_lane_za32_vg4x4 (w0 - 1, z0, z4, 11)) + +/* +** mls_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** smlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_12, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8, z4, z15, 12), + svmls_lane_za32_vg4x4 (w8, z4, z15, 12)) + +/* +** mls_lane_w8_z28_z16_13: +** mov (z[0-7]).d, z16.d +** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_13, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8, z28, z16, 13), + svmls_lane_za32_vg4x4 (w8, z28, z16, 13)) + +/* +** mls_lane_w8_z17_z7_14: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_14, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8, z17, z7, 14), + svmls_lane_za32_vg4x4 (w8, z17, z7, 14)) + +/* +** mls_lane_w8_z22_z4_15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svint8x4_t, svint8_t, + svmls_lane_za32_s8_vg4x4 (w8, z22, z4, 15), + svmls_lane_za32_vg4x4 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..83fa732ee490ec794b420d90053595e82b8e1e74 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint16_t, + svmls_lane_za32_u16_vg2x1 (0, z0, z0, 0), + svmls_lane_za32_vg2x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w0, z0, z3, 1), + svmls_lane_za32_vg2x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w7, z0, z3, 2), + svmls_lane_za32_vg2x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** umlsl za\.s\[w8, 0:1\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8, z7, z3, 3), + svmls_lane_za32_vg2x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlsl za\.s\[w8, 0:1\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8, z31, z16, 4), + svmls_lane_za32_vg2x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg2x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** umlsl za\.s\[w8, 2:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg2x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p6_z23_z0_7: +** umlsl za\.s\[w11, 6:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p6_z23_z0_7, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w11 + 6, z23, z0, 7), + svmls_lane_za32_vg2x1 (w11 + 6, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 7, z7, z7, 0), + svmls_lane_za32_vg2x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p10_z23_z0_1: +** umlsl za\.s\[w11, 10:11\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p10_z23_z0_1, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w11 + 10, z23, z0, 1), + svmls_lane_za32_vg2x1 (w11 + 10, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** umlsl za\.s\[w8, 14:15\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 14, z23, z0, 2), + svmls_lane_za32_vg2x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 15, z7, z7, 3), + svmls_lane_za32_vg2x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 + 16, z7, z7, 4), + svmls_lane_za32_vg2x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w8 - 1, z16, z0, 5), + svmls_lane_za32_vg2x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svuint16_t, + svmls_lane_za32_u16_vg2x1 (w12, z0, z3, 6), + svmls_lane_za32_vg2x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..efbcd9270a7d1f9596f34bd9c1247b4917edf9ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (0, z0, z4, 0), + svmls_lane_za32_vg2x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w0, z0, z7, 1), + svmls_lane_za32_vg2x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8, z28, z4, 2), + svmls_lane_za32_vg2x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** umlsl za\.s\[w8, 6:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x2 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x2 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x2 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x2 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** umlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8, z4, z15, 6), + svmls_lane_za32_vg2x2 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8, z28, z16, 7), + svmls_lane_za32_vg2x2 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** umlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8, z17, z7, 0), + svmls_lane_za32_vg2x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** umlsl za\.s\[w8, 0:1, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t, + svmls_lane_za32_u16_vg2x2 (w8, z22, z4, 1), + svmls_lane_za32_vg2x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..76ac966ac713479edbfab73a7e3b2a8aaaab0575 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u16_vg2x4.c @@ -0,0 +1,118 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (0, z0, z4, 0), + svmls_lane_za32_vg2x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w0, z0, z7, 1), + svmls_lane_za32_vg2x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8, z28, z4, 2), + svmls_lane_za32_vg2x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w8p6_z0_z4_7: +** umlsl za\.s\[w8, 6:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_7, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8 + 6, z0, z4, 7), + svmls_lane_za32_vg2x4 (w8 + 6, z0, z4, 7)) + +/* +** mls_lane_w8p7_z0_z4_3: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8 + 7, z0, z4, 3), + svmls_lane_za32_vg2x4 (w8 + 7, z0, z4, 3)) + +/* +** mls_lane_w8p8_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8 + 8, z0, z4, 4), + svmls_lane_za32_vg2x4 (w8 + 8, z0, z4, 4)) + +/* +** mls_lane_w0m1_z0_z4_5: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w0 - 1, z0, z4, 5), + svmls_lane_za32_vg2x4 (w0 - 1, z0, z4, 5)) + +/* +** mls_lane_w8_z4_z15_6: +** str d15, \[sp, #?-16\]! +** umlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8, z4, z15, 6), + svmls_lane_za32_vg2x4 (w8, z4, z15, 6)) + +/* +** mls_lane_w8_z28_z16_7: +** mov (z[0-7]).d, z16.d +** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8, z28, z16, 7), + svmls_lane_za32_vg2x4 (w8, z28, z16, 7)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8, z17, z7, 0), + svmls_lane_za32_vg2x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t, + svmls_lane_za32_u16_vg2x4 (w8, z22, z4, 1), + svmls_lane_za32_vg2x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..0b70449a51aac047f87e5844b9b63322cc035575 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x1.c @@ -0,0 +1,150 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint8_t, + svmls_lane_za32_u8_vg4x1 (0, z0, z0, 0), + svmls_lane_za32_vg4x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w0, z0, z3, 1), + svmls_lane_za32_vg4x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w7, z0, z3, 2), + svmls_lane_za32_vg4x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** umlsll za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8, z7, z3, 3), + svmls_lane_za32_vg4x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlsll za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8, z31, z16, 4), + svmls_lane_za32_vg4x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 1, z0, z0, 5), + svmls_lane_za32_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 2, z23, z0, 6), + svmls_lane_za32_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p4_z23_z0_7: +** umlsll za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w11 + 4, z23, z0, 7), + svmls_lane_za32_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_8, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 7, z7, z7, 8), + svmls_lane_za32_vg4x1 (w8 + 7, z7, z7, 8)) + +/* +** mls_lane_w11p12_z23_z0_9: +** umlsll za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p12_z23_z0_9, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w11 + 12, z23, z0, 9), + svmls_lane_za32_vg4x1 (w11 + 12, z23, z0, 9)) + +/* +** mls_lane_w8p14_z23_z0_10: +** add (w8|w9|w10|w11), w8, #?14 +** umlsll za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_10, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 14, z23, z0, 10), + svmls_lane_za32_vg4x1 (w8 + 14, z23, z0, 10)) + +/* +** mls_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_11, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 15, z7, z7, 11), + svmls_lane_za32_vg4x1 (w8 + 15, z7, z7, 11)) + +/* +** mls_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_12, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 + 16, z7, z7, 12), + svmls_lane_za32_vg4x1 (w8 + 16, z7, z7, 12)) + +/* +** mls_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_13, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w8 - 1, z16, z0, 13), + svmls_lane_za32_vg4x1 (w8 - 1, z16, z0, 13)) + +/* +** mls_lane_w12_z0_z3_15: +** mov (w8|w9|w10|w11), w12 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_15, svuint8_t, + svmls_lane_za32_u8_vg4x1 (w12, z0, z3, 15), + svmls_lane_za32_vg4x1 (w12, z0, z3, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4937c0c26b922bce050cf550dd0b8728b6eac36c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (0, z0, z4, 0), + svmls_lane_za32_vg4x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w0, z0, z7, 1), + svmls_lane_za32_vg4x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8, z28, z4, 2), + svmls_lane_za32_vg4x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** umlsll za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w11 + 4, z0, z4, 3), + svmls_lane_za32_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8 + 6, z0, z4, 4), + svmls_lane_za32_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8 + 7, z0, z4, 5), + svmls_lane_za32_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_7, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8 + 8, z0, z4, 7), + svmls_lane_za32_vg4x2 (w8 + 8, z0, z4, 7)) + +/* +** mls_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_9, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w0 - 1, z0, z4, 9), + svmls_lane_za32_vg4x2 (w0 - 1, z0, z4, 9)) + +/* +** mls_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_10, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8, z4, z15, 10), + svmls_lane_za32_vg4x2 (w8, z4, z15, 10)) + +/* +** mls_lane_w8_z28_z16_11: +** mov (z[0-7]).d, z16.d +** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_11, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8, z28, z16, 11), + svmls_lane_za32_vg4x2 (w8, z28, z16, 11)) + +/* +** mls_lane_w8_z17_z7_13: +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_13, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8, z17, z7, 13), + svmls_lane_za32_vg4x2 (w8, z17, z7, 13)) + +/* +** mls_lane_w8_z22_z4_15: +** umlsll za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svuint8x2_t, svuint8_t, + svmls_lane_za32_u8_vg4x2 (w8, z22, z4, 15), + svmls_lane_za32_vg4x2 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..05b36151c8c73528b7c64e6db402e55329c8e61a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za32_u8_vg4x4.c @@ -0,0 +1,128 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (0, z0, z4, 0), + svmls_lane_za32_vg4x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w0, z0, z7, 1), + svmls_lane_za32_vg4x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8, z28, z4, 2), + svmls_lane_za32_vg4x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_7: +** umlsll za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_7, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w11 + 4, z0, z4, 7), + svmls_lane_za32_vg4x4 (w11 + 4, z0, z4, 7)) + +/* +** mls_lane_w8p6_z0_z4_8: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_8, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8 + 6, z0, z4, 8), + svmls_lane_za32_vg4x4 (w8 + 6, z0, z4, 8)) + +/* +** mls_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_9, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8 + 7, z0, z4, 9), + svmls_lane_za32_vg4x4 (w8 + 7, z0, z4, 9)) + +/* +** mls_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_10, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8 + 8, z0, z4, 10), + svmls_lane_za32_vg4x4 (w8 + 8, z0, z4, 10)) + +/* +** mls_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_11, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w0 - 1, z0, z4, 11), + svmls_lane_za32_vg4x4 (w0 - 1, z0, z4, 11)) + +/* +** mls_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** umlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_12, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8, z4, z15, 12), + svmls_lane_za32_vg4x4 (w8, z4, z15, 12)) + +/* +** mls_lane_w8_z28_z16_13: +** mov (z[0-7]).d, z16.d +** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_13, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8, z28, z16, 13), + svmls_lane_za32_vg4x4 (w8, z28, z16, 13)) + +/* +** mls_lane_w8_z17_z7_14: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_14, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8, z17, z7, 14), + svmls_lane_za32_vg4x4 (w8, z17, z7, 14)) + +/* +** mls_lane_w8_z22_z4_15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_15, svuint8x4_t, svuint8_t, + svmls_lane_za32_u8_vg4x4 (w8, z22, z4, 15), + svmls_lane_za32_vg4x4 (w8, z22, z4, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..72ea6043c22409a46c3c8dcba8595bfe8809666b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x2.c @@ -0,0 +1,104 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (0, z0, z4, 0), + svmls_lane_za64_vg1x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z7\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w0, z0, z7, 1), + svmls_lane_za64_vg1x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8, z28, z4, 0), + svmls_lane_za64_vg1x2 (w8, z28, z4, 0)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8 + 7, z0, z4, 1), + svmls_lane_za64_vg1x2 (w8 + 7, z0, z4, 1)) + +/* +** mls_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8 + 8, z0, z4, 0), + svmls_lane_za64_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** mls_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w0 - 1, z0, z4, 1), + svmls_lane_za64_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** mls_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, z15\.d\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8, z4, z15, 0), + svmls_lane_za64_vg1x2 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmls za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d}, \1\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8, z28, z16, 1), + svmls_lane_za64_vg1x2 (w8, z28, z16, 1)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.d\[w8, 0, vgx2\], [^\n]+, z7\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8, z17, z7, 0), + svmls_lane_za64_vg1x2 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** fmls za\.d\[w8, 0, vgx2\], {z22\.d - z23\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat64x2_t, svfloat64_t, + svmls_lane_za64_f64_vg1x2 (w8, z22, z4, 1), + svmls_lane_za64_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ce1de1d734ff68d00f272d2f46c148d5359a68b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_f64_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (0, z0, z4, 0), + svmls_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z7\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w0, z0, z7, 1), + svmls_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8, z28, z4, 0), + svmls_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** mls_lane_w8p7_z0_z4_3: +** fmls za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8 + 7, z0, z4, 1), + svmls_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** mls_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_0, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8 + 8, z0, z4, 0), + svmls_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** mls_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_1, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w0 - 1, z0, z4, 1), + svmls_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** mls_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fmls za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d}, z15\.d\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_2, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8, z4, z15, 0), + svmls_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, \1\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8, z28, z16, 1), + svmls_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** mls_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.d\[w8, 0, vgx4\], [^\n]+, z7\.d\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8, z17, z7, 0), + svmls_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** mls_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmls za\.d\[w8, 0, vgx4\], [^\n]+, z4\.d\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat64x4_t, svfloat64_t, + svmls_lane_za64_f64_vg1x4 (w8, z22, z4, 1), + svmls_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..c13643875d8959e978d14181da95336098459493 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x1.c @@ -0,0 +1,152 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svint16_t, + svmls_lane_za64_s16_vg4x1 (0, z0, z0, 0), + svmls_lane_za64_vg4x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svint16_t, + svmls_lane_za64_s16_vg4x1 (w0, z0, z3, 1), + svmls_lane_za64_vg4x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svint16_t, + svmls_lane_za64_s16_vg4x1 (w7, z0, z3, 2), + svmls_lane_za64_vg4x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** smlsll za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8, z7, z3, 3), + svmls_lane_za64_vg4x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** smlsll za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8, z31, z16, 4), + svmls_lane_za64_vg4x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 1, z0, z0, 5), + svmls_lane_za64_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 2, z23, z0, 6), + svmls_lane_za64_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p4_z23_z0_7: +** smlsll za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svint16_t, + svmls_lane_za64_s16_vg4x1 (w11 + 4, z23, z0, 7), + svmls_lane_za64_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 7, z7, z7, 0), + svmls_lane_za64_vg4x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p12_z23_z0_1: +** smlsll za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p12_z23_z0_1, svint16_t, + svmls_lane_za64_s16_vg4x1 (w11 + 12, z23, z0, 1), + svmls_lane_za64_vg4x1 (w11 + 12, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** add (w8|w9|w10|w11), w8, #?14 +** smlsll za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 14, z23, z0, 2), + svmls_lane_za64_vg4x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 15, z7, z7, 3), + svmls_lane_za64_vg4x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 + 16, z7, z7, 4), + svmls_lane_za64_vg4x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svint16_t, + svmls_lane_za64_s16_vg4x1 (w8 - 1, z16, z0, 5), + svmls_lane_za64_vg4x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svint16_t, + svmls_lane_za64_s16_vg4x1 (w12, z0, z3, 6), + svmls_lane_za64_vg4x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ffc50d72242b2c1c55d1d118c1d28028d7dc92bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (0, z0, z4, 0), + svmls_lane_za64_vg4x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w0, z0, z7, 1), + svmls_lane_za64_vg4x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8, z28, z4, 2), + svmls_lane_za64_vg4x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** smlsll za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w11 + 4, z0, z4, 3), + svmls_lane_za64_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8 + 6, z0, z4, 4), + svmls_lane_za64_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8 + 7, z0, z4, 5), + svmls_lane_za64_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8 + 8, z0, z4, 6), + svmls_lane_za64_vg4x2 (w8 + 8, z0, z4, 6)) + +/* +** mls_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w0 - 1, z0, z4, 7), + svmls_lane_za64_vg4x2 (w0 - 1, z0, z4, 7)) + +/* +** mls_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8, z4, z15, 0), + svmls_lane_za64_vg4x2 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_1, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8, z28, z16, 1), + svmls_lane_za64_vg4x2 (w8, z28, z16, 1)) + +/* +** mls_lane_w8_z17_z7_3: +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_3, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8, z17, z7, 3), + svmls_lane_za64_vg4x2 (w8, z17, z7, 3)) + +/* +** mls_lane_w8_z22_z4_5: +** smlsll za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_5, svint16x2_t, svint16_t, + svmls_lane_za64_s16_vg4x2 (w8, z22, z4, 5), + svmls_lane_za64_vg4x2 (w8, z22, z4, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..89aa97bac204ff8f22e09235c275512f271a0ab1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_s16_vg4x4.c @@ -0,0 +1,130 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (0, z0, z4, 0), + svmls_lane_za64_vg4x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w0, z0, z7, 1), + svmls_lane_za64_vg4x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8, z28, z4, 2), + svmls_lane_za64_vg4x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** smlsll za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w11 + 4, z0, z4, 3), + svmls_lane_za64_vg4x4 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8 + 6, z0, z4, 4), + svmls_lane_za64_vg4x4 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8 + 7, z0, z4, 5), + svmls_lane_za64_vg4x4 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8 + 8, z0, z4, 6), + svmls_lane_za64_vg4x4 (w8 + 8, z0, z4, 6)) + +/* +** mls_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w0 - 1, z0, z4, 7), + svmls_lane_za64_vg4x4 (w0 - 1, z0, z4, 7)) + +/* +** mls_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** smlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8, z4, z15, 0), + svmls_lane_za64_vg4x4 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8, z28, z16, 3), + svmls_lane_za64_vg4x4 (w8, z28, z16, 3)) + +/* +** mls_lane_w8_z17_z7_4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_4, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8, z17, z7, 4), + svmls_lane_za64_vg4x4 (w8, z17, z7, 4)) + +/* +** mls_lane_w8_z22_z4_6: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_6, svint16x4_t, svint16_t, + svmls_lane_za64_s16_vg4x4 (w8, z22, z4, 6), + svmls_lane_za64_vg4x4 (w8, z22, z4, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..71407563ebb30cf7774b20d9195f490441093ec8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x1.c @@ -0,0 +1,152 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z0_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_0_z0_z0_0, svuint16_t, + svmls_lane_za64_u16_vg4x1 (0, z0, z0, 0), + svmls_lane_za64_vg4x1 (0, z0, z0, 0)) + +/* +** mls_lane_w0_z0_z3_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w0_z0_z3_1, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w0, z0, z3, 1), + svmls_lane_za64_vg4x1 (w0, z0, z3, 1)) + +/* +** mls_lane_w7_z0_z3_2: +** mov (w8|w9|w10|w11), w7 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w7_z0_z3_2, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w7, z0, z3, 2), + svmls_lane_za64_vg4x1 (w7, z0, z3, 2)) + +/* +** mls_lane_w8_z7_z3_3: +** umlsll za\.d\[w8, 0:3\], z7\.h, z3\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z7_z3_3, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8, z7, z3, 3), + svmls_lane_za64_vg4x1 (w8, z7, z3, 3)) + +/* +** mls_lane_w8_z31_z16_4: +** mov (z[0-7])\.d, z16\.d +** umlsll za\.d\[w8, 0:3\], z31\.h. \1\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8_z31_z16_4, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8, z31, z16, 4), + svmls_lane_za64_vg4x1 (w8, z31, z16, 4)) + +/* +** mls_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p1_z0_z0_5, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 1, z0, z0, 5), + svmls_lane_za64_vg4x1 (w8 + 1, z0, z0, 5)) + +/* +** mls_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p2_z23_z0_6, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 2, z23, z0, 6), + svmls_lane_za64_vg4x1 (w8 + 2, z23, z0, 6)) + +/* +** mls_lane_w11p4_z23_z0_7: +** umlsll za\.d\[w11, 4:7\], z23\.h, z0\.h\[7\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p4_z23_z0_7, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w11 + 4, z23, z0, 7), + svmls_lane_za64_vg4x1 (w11 + 4, z23, z0, 7)) + +/* +** mls_lane_w8p7_z7_z7_0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[0\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p7_z7_z7_0, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 7, z7, z7, 0), + svmls_lane_za64_vg4x1 (w8 + 7, z7, z7, 0)) + +/* +** mls_lane_w11p12_z23_z0_1: +** umlsll za\.d\[w11, 12:15\], z23\.h, z0\.h\[1\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w11p12_z23_z0_1, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w11 + 12, z23, z0, 1), + svmls_lane_za64_vg4x1 (w11 + 12, z23, z0, 1)) + +/* +** mls_lane_w8p14_z23_z0_2: +** add (w8|w9|w10|w11), w8, #?14 +** umlsll za\.d\[w8, 0:3\], z23\.h, z0\.h\[2\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p14_z23_z0_2, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 14, z23, z0, 2), + svmls_lane_za64_vg4x1 (w8 + 14, z23, z0, 2)) + +/* +** mls_lane_w8p15_z7_z7_3: +** add (w8|w9|w10|w11), w8, #?15 +** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[3\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p15_z7_z7_3, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 15, z7, z7, 3), + svmls_lane_za64_vg4x1 (w8 + 15, z7, z7, 3)) + +/* +** mls_lane_w8p16_z7_z7_4: +** add (w8|w9|w10|w11), w8, #?16 +** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h\[4\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8p16_z7_z7_4, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 + 16, z7, z7, 4), + svmls_lane_za64_vg4x1 (w8 + 16, z7, z7, 4)) + +/* +** mls_lane_w8m1_z16_z0_5: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3\], z16\.h, z0\.h\[5\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w8m1_z16_z0_5, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w8 - 1, z16, z0, 5), + svmls_lane_za64_vg4x1 (w8 - 1, z16, z0, 5)) + +/* +** mls_lane_w12_z0_z3_6: +** mov (w8|w9|w10|w11), w12 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h\[6\] +** ret +*/ +TEST_ZA_X1 (mls_lane_w12_z0_z3_6, svuint16_t, + svmls_lane_za64_u16_vg4x1 (w12, z0, z3, 6), + svmls_lane_za64_vg4x1 (w12, z0, z3, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..62e235151ad1c03c50bb448e9163a3da31e0fa14 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (0, z0, z4, 0), + svmls_lane_za64_vg4x2 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w0, z0, z7, 1), + svmls_lane_za64_vg4x2 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8, z28, z4, 2), + svmls_lane_za64_vg4x2 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** umlsll za\.d\[w11, 4:7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w11 + 4, z0, z4, 3), + svmls_lane_za64_vg4x2 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8 + 6, z0, z4, 4), + svmls_lane_za64_vg4x2 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8 + 7, z0, z4, 5), + svmls_lane_za64_vg4x2 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8 + 8, z0, z4, 6), + svmls_lane_za64_vg4x2 (w8 + 8, z0, z4, 6)) + +/* +** mls_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w0 - 1, z0, z4, 7), + svmls_lane_za64_vg4x2 (w0 - 1, z0, z4, 7)) + +/* +** mls_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8, z4, z15, 0), + svmls_lane_za64_vg4x2 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_1, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8, z28, z16, 1), + svmls_lane_za64_vg4x2 (w8, z28, z16, 1)) + +/* +** mls_lane_w8_z17_z7_3: +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, z7\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_3, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8, z17, z7, 3), + svmls_lane_za64_vg4x2 (w8, z17, z7, 3)) + +/* +** mls_lane_w8_z22_z4_5: +** umlsll za\.d\[w8, 0:3, vgx2\], {z22\.h - z23\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_5, svuint16x2_t, svuint16_t, + svmls_lane_za64_u16_vg4x2 (w8, z22, z4, 5), + svmls_lane_za64_vg4x2 (w8, z22, z4, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f0959089df0673a639e04a7c88633e4f66fb1823 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za64_u16_vg4x4.c @@ -0,0 +1,130 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (mls_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (0, z0, z4, 0), + svmls_lane_za64_vg4x4 (0, z0, z4, 0)) + +/* +** mls_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w0, z0, z7, 1), + svmls_lane_za64_vg4x4 (w0, z0, z7, 1)) + +/* +** mls_lane_w8_z28_z4_2: +** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8, z28, z4, 2), + svmls_lane_za64_vg4x4 (w8, z28, z4, 2)) + +/* +** mls_lane_w11p4_z0_z4_3: +** umlsll za\.d\[w11, 4:7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w11p4_z0_z4_3, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w11 + 4, z0, z4, 3), + svmls_lane_za64_vg4x4 (w11 + 4, z0, z4, 3)) + +/* +** mls_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p6_z0_z4_4, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8 + 6, z0, z4, 4), + svmls_lane_za64_vg4x4 (w8 + 6, z0, z4, 4)) + +/* +** mls_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p7_z0_z4_5, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8 + 7, z0, z4, 5), + svmls_lane_za64_vg4x4 (w8 + 7, z0, z4, 5)) + +/* +** mls_lane_w8p8_z0_z4_6: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8p8_z0_z4_6, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8 + 8, z0, z4, 6), + svmls_lane_za64_vg4x4 (w8 + 8, z0, z4, 6)) + +/* +** mls_lane_w0m1_z0_z4_7: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, z4\.h\[7\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w0m1_z0_z4_7, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w0 - 1, z0, z4, 7), + svmls_lane_za64_vg4x4 (w0 - 1, z0, z4, 7)) + +/* +** mls_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** umlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8, z4, z15, 0), + svmls_lane_za64_vg4x4 (w8, z4, z15, 0)) + +/* +** mls_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z28_z16_3, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8, z28, z16, 3), + svmls_lane_za64_vg4x4 (w8, z28, z16, 3)) + +/* +** mls_lane_w8_z17_z7_4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z7\.h\[4\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z17_z7_4, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8, z17, z7, 4), + svmls_lane_za64_vg4x4 (w8, z17, z7, 4)) + +/* +** mls_lane_w8_z22_z4_6: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, z4\.h\[6\] +** ret +*/ +TEST_ZA_LANE (mls_lane_w8_z22_z4_6, svuint16x4_t, svuint16_t, + svmls_lane_za64_u16_vg4x4 (w8, z22, z4, 6), + svmls_lane_za64_vg4x4 (w8, z22, z4, 6)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..0e04e33827084ec98271b56cc8f7241b233134de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (0, z0, z0), + svmls_za32_vg2x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w0, z0, z3), + svmls_za32_vg2x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w7, z0, z3), + svmls_za32_vg2x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** bfmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8, z7, z3), + svmls_za32_vg2x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** bfmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8, z31, z16), + svmls_za32_vg2x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 1, z0, z0), + svmls_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mls_w8p2_z23_z0: +** bfmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p2_z23_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 2, z23, z0), + svmls_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** bfmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w11 + 6, z23, z0), + svmls_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mls_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p7_z7_z7, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 7, z7, z7), + svmls_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mls_w11p10_z23_z0: +** bfmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p10_z23_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w11 + 10, z23, z0), + svmls_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** bfmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 14, z23, z0), + svmls_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 15, z7, z7), + svmls_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** bfmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 + 16, z7, z7), + svmls_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w8 - 1, z16, z0), + svmls_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** bfmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svbfloat16_t, + svmls_za32_bf16_vg2x1 (w12, z0, z3), + svmls_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a923ba63ffb3648bddbbc116a8779a72d89a70ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (0, z0, z0), + svmls_za32_vg2x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w0, z0, z0), + svmls_za32_vg2x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z0, z4), + svmls_za32_vg2x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z4, z18), + svmls_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z0, z23), + svmls_za32_vg2x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z23, z0), + svmls_za32_vg2x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z18, z28), + svmls_za32_vg2x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8, z28, z4), + svmls_za32_vg2x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 + 1, z4, z0), + svmls_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** bfmlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 + 2, z4, z0), + svmls_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** bfmlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 + 6, z4, z0), + svmls_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 + 7, z4, z0), + svmls_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 + 8, z4, z4), + svmls_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x2_t, + svmls_za32_bf16_vg2x2 (w8 - 1, z4, z0), + svmls_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (0, z1, z0), + svmls_za32_vg2x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w0, z1, z0), + svmls_za32_vg2x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8, z1, z0), + svmls_za32_vg2x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8 + 1, z1, z0), + svmls_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** bfmlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8 + 4, z20, z0), + svmls_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** bfmlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8 + 6, z27, z0), + svmls_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8 + 7, z1, z0), + svmls_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8 + 8, z1, z0), + svmls_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w0 - 1, z1, z0), + svmls_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8, z0, z15), + svmls_za32_vg2x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x2_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x2 (w8, z20, z16), + svmls_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..498eb1061dc7c3559d4dba001cc0e132fab0417a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_bf16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (0, z0, z0), + svmls_za32_vg2x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w0, z0, z0), + svmls_za32_vg2x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z0, z4), + svmls_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z0, z18), + svmls_za32_vg2x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z18, z0), + svmls_za32_vg2x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z0, z23), + svmls_za32_vg2x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** bfmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z23, z0), + svmls_za32_vg2x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z4, z28), + svmls_za32_vg2x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8, z28, z0), + svmls_za32_vg2x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 + 1, z4, z0), + svmls_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** bfmlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 + 2, z4, z0), + svmls_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** bfmlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 + 6, z4, z0), + svmls_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 + 7, z4, z0), + svmls_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 + 8, z4, z4), + svmls_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svbfloat16x4_t, + svmls_za32_bf16_vg2x4 (w8 - 1, z4, z0), + svmls_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (0, z1, z0), + svmls_za32_vg2x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w0, z1, z0), + svmls_za32_vg2x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8, z1, z0), + svmls_za32_vg2x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8 + 1, z1, z0), + svmls_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** bfmlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8 + 4, z20, z0), + svmls_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** bfmlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8 + 6, z27, z0), + svmls_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8 + 7, z1, z0), + svmls_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8 + 8, z1, z0), + svmls_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** bfmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w0 - 1, z1, z0), + svmls_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8, z0, z15), + svmls_za32_vg2x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** bfmlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svbfloat16x4_t, svbfloat16_t, + svmls_single_za32_bf16_vg2x4 (w8, z20, z16), + svmls_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..8d53b3d1b04293208653ec316a472aa9bf0374bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (0, z0, z0), + svmls_za32_vg2x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svfloat16_t, + svmls_za32_f16_vg2x1 (w0, z0, z3), + svmls_za32_vg2x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svfloat16_t, + svmls_za32_f16_vg2x1 (w7, z0, z3), + svmls_za32_vg2x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** fmlsl za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svfloat16_t, + svmls_za32_f16_vg2x1 (w8, z7, z3), + svmls_za32_vg2x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** fmlsl za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svfloat16_t, + svmls_za32_f16_vg2x1 (w8, z31, z16), + svmls_za32_vg2x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 1, z0, z0), + svmls_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mls_w8p2_z23_z0: +** fmlsl za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p2_z23_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 2, z23, z0), + svmls_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** fmlsl za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w11 + 6, z23, z0), + svmls_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mls_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p7_z7_z7, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 7, z7, z7), + svmls_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mls_w11p10_z23_z0: +** fmlsl za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p10_z23_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w11 + 10, z23, z0), + svmls_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** fmlsl za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 14, z23, z0), + svmls_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 15, z7, z7), + svmls_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** fmlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 + 16, z7, z7), + svmls_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svfloat16_t, + svmls_za32_f16_vg2x1 (w8 - 1, z16, z0), + svmls_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** fmlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svfloat16_t, + svmls_za32_f16_vg2x1 (w12, z0, z3), + svmls_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dafa9f0b4869b7aded90ac3cc74d2bb697c55d4d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (0, z0, z0), + svmls_za32_vg2x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w0, z0, z0), + svmls_za32_vg2x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z0, z4), + svmls_za32_vg2x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z4, z18), + svmls_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z0, z23), + svmls_za32_vg2x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** fmlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z23, z0), + svmls_za32_vg2x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z18, z28), + svmls_za32_vg2x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8, z28, z4), + svmls_za32_vg2x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 + 1, z4, z0), + svmls_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** fmlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 + 2, z4, z0), + svmls_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** fmlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 + 6, z4, z0), + svmls_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 + 7, z4, z0), + svmls_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 + 8, z4, z4), + svmls_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x2_t, + svmls_za32_f16_vg2x2 (w8 - 1, z4, z0), + svmls_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (0, z1, z0), + svmls_za32_vg2x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w0, z1, z0), + svmls_za32_vg2x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8, z1, z0), + svmls_za32_vg2x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8 + 1, z1, z0), + svmls_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** fmlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8 + 4, z20, z0), + svmls_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** fmlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8 + 6, z27, z0), + svmls_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8 + 7, z1, z0), + svmls_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8 + 8, z1, z0), + svmls_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w0 - 1, z1, z0), + svmls_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8, z0, z15), + svmls_za32_vg2x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x2_t, svfloat16_t, + svmls_single_za32_f16_vg2x2 (w8, z20, z16), + svmls_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c0f31bf7bc1dae0862b0e70749831872c67ebe7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (0, z0, z0), + svmls_za32_vg2x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w0, z0, z0), + svmls_za32_vg2x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z0, z4), + svmls_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z0, z18), + svmls_za32_vg2x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z18, z0), + svmls_za32_vg2x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z0, z23), + svmls_za32_vg2x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** fmlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z23, z0), + svmls_za32_vg2x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** fmlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z4, z28), + svmls_za32_vg2x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** fmlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8, z28, z0), + svmls_za32_vg2x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 + 1, z4, z0), + svmls_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** fmlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 + 2, z4, z0), + svmls_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** fmlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 + 6, z4, z0), + svmls_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 + 7, z4, z0), + svmls_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 + 8, z4, z4), + svmls_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x4_t, + svmls_za32_f16_vg2x4 (w8 - 1, z4, z0), + svmls_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (0, z1, z0), + svmls_za32_vg2x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w0, z1, z0), + svmls_za32_vg2x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8, z1, z0), + svmls_za32_vg2x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8 + 1, z1, z0), + svmls_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** fmlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8 + 4, z20, z0), + svmls_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** fmlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8 + 6, z27, z0), + svmls_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8 + 7, z1, z0), + svmls_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8 + 8, z1, z0), + svmls_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w0 - 1, z1, z0), + svmls_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8, z0, z15), + svmls_za32_vg2x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x4_t, svfloat16_t, + svmls_single_za32_f16_vg2x4 (w8, z20, z16), + svmls_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..231974120f1782c6e0814a7f54b2f72f79867119 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat32x2_t, + svmls_za32_f32_vg1x2 (0, z0, z0), + svmls_za32_vg1x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w0, z0, z0), + svmls_za32_vg1x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8, z0, z4), + svmls_za32_vg1x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8, z4, z18), + svmls_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z23_z0: +** ... +** fmls za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8, z23, z0), + svmls_za32_vg1x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z23: +** ... +** fmls za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z23, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8, z18, z23), + svmls_za32_vg1x2 (w8, z18, z23)) + +/* +** mls_w8_z4_z28: +** fmls za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8, z4, z28), + svmls_za32_vg1x2 (w8, z4, z28)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8 + 7, z4, z0), + svmls_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8 + 8, z4, z4), + svmls_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat32x2_t, + svmls_za32_f32_vg1x2 (w8 - 1, z4, z0), + svmls_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (0, z1, z0), + svmls_za32_vg1x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w0, z1, z0), + svmls_za32_vg1x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w8, z1, z0), + svmls_za32_vg1x2 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w8 + 7, z1, z0), + svmls_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w8 + 8, z1, z0), + svmls_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w0 - 1, z1, z0), + svmls_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w8, z0, z15), + svmls_za32_vg1x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat32x2_t, svfloat32_t, + svmls_single_za32_f32_vg1x2 (w8, z20, z16), + svmls_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9c358bb8dbfca1fe5902deb17e3de481670e81a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_f32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat32x4_t, + svmls_za32_f32_vg1x4 (0, z0, z0), + svmls_za32_vg1x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w0, z0, z0), + svmls_za32_vg1x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8, z0, z4), + svmls_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8, z0, z18), + svmls_za32_vg1x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z28: +** ... +** fmls za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8, z18, z28), + svmls_za32_vg1x4 (w8, z18, z28)) + +/* +** mls_w8_z28_z23: +** ... +** fmls za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z23, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8, z28, z23), + svmls_za32_vg1x4 (w8, z28, z23)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8 + 7, z4, z0), + svmls_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8 + 8, z4, z4), + svmls_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat32x4_t, + svmls_za32_f32_vg1x4 (w8 - 1, z4, z0), + svmls_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (0, z1, z0), + svmls_za32_vg1x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w0, z1, z0), + svmls_za32_vg1x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w8, z1, z0), + svmls_za32_vg1x4 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w8 + 7, z1, z0), + svmls_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w8 + 8, z1, z0), + svmls_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w0 - 1, z1, z0), + svmls_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w8, z0, z15), + svmls_za32_vg1x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat32x4_t, svfloat32_t, + svmls_single_za32_f32_vg1x4 (w8, z20, z16), + svmls_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..0c73d04ee4929e8c6d62e0f3f2302f4e6ef5425d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svint16_t, + svmls_za32_s16_vg2x1 (0, z0, z0), + svmls_za32_vg2x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svint16_t, + svmls_za32_s16_vg2x1 (w0, z0, z3), + svmls_za32_vg2x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svint16_t, + svmls_za32_s16_vg2x1 (w7, z0, z3), + svmls_za32_vg2x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** smlsl za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svint16_t, + svmls_za32_s16_vg2x1 (w8, z7, z3), + svmls_za32_vg2x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlsl za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svint16_t, + svmls_za32_s16_vg2x1 (w8, z31, z16), + svmls_za32_vg2x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 1, z0, z0), + svmls_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mls_w8p2_z23_z0: +** smlsl za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p2_z23_z0, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 2, z23, z0), + svmls_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** smlsl za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svint16_t, + svmls_za32_s16_vg2x1 (w11 + 6, z23, z0), + svmls_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mls_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p7_z7_z7, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 7, z7, z7), + svmls_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mls_w11p10_z23_z0: +** smlsl za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p10_z23_z0, svint16_t, + svmls_za32_s16_vg2x1 (w11 + 10, z23, z0), + svmls_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** smlsl za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 14, z23, z0), + svmls_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 15, z7, z7), + svmls_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svint16_t, + svmls_za32_s16_vg2x1 (w8 + 16, z7, z7), + svmls_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svint16_t, + svmls_za32_s16_vg2x1 (w8 - 1, z16, z0), + svmls_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svint16_t, + svmls_za32_s16_vg2x1 (w12, z0, z3), + svmls_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7b5d93441e5ee6f66a702966afcb15bc407627e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (0, z0, z0), + svmls_za32_vg2x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w0, z0, z0), + svmls_za32_vg2x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z0, z4), + svmls_za32_vg2x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** smlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z4, z18), + svmls_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z0, z23), + svmls_za32_vg2x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z23, z0), + svmls_za32_vg2x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** smlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z18, z28), + svmls_za32_vg2x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** smlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svint16x2_t, + svmls_za32_s16_vg2x2 (w8, z28, z4), + svmls_za32_vg2x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 + 1, z4, z0), + svmls_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** smlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 + 2, z4, z0), + svmls_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** smlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 + 6, z4, z0), + svmls_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 + 7, z4, z0), + svmls_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 + 8, z4, z4), + svmls_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint16x2_t, + svmls_za32_s16_vg2x2 (w8 - 1, z4, z0), + svmls_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (0, z1, z0), + svmls_za32_vg2x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w0, z1, z0), + svmls_za32_vg2x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8, z1, z0), + svmls_za32_vg2x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8 + 1, z1, z0), + svmls_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** smlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8 + 4, z20, z0), + svmls_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** smlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8 + 6, z27, z0), + svmls_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8 + 7, z1, z0), + svmls_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8 + 8, z1, z0), + svmls_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w0 - 1, z1, z0), + svmls_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8, z0, z15), + svmls_za32_vg2x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x2_t, svint16_t, + svmls_single_za32_s16_vg2x2 (w8, z20, z16), + svmls_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d026f74b8f1dcae1d7eceb61a75d3e5b0b4bf790 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (0, z0, z0), + svmls_za32_vg2x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w0, z0, z0), + svmls_za32_vg2x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z0, z4), + svmls_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z0, z18), + svmls_za32_vg2x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z18, z0), + svmls_za32_vg2x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z0, z23), + svmls_za32_vg2x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z23, z0), + svmls_za32_vg2x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** smlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z4, z28), + svmls_za32_vg2x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** smlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8, z28, z0), + svmls_za32_vg2x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 + 1, z4, z0), + svmls_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** smlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 + 2, z4, z0), + svmls_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** smlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 + 6, z4, z0), + svmls_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 + 7, z4, z0), + svmls_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 + 8, z4, z4), + svmls_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint16x4_t, + svmls_za32_s16_vg2x4 (w8 - 1, z4, z0), + svmls_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (0, z1, z0), + svmls_za32_vg2x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w0, z1, z0), + svmls_za32_vg2x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8, z1, z0), + svmls_za32_vg2x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8 + 1, z1, z0), + svmls_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** smlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8 + 4, z20, z0), + svmls_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** smlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8 + 6, z27, z0), + svmls_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8 + 7, z1, z0), + svmls_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8 + 8, z1, z0), + svmls_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w0 - 1, z1, z0), + svmls_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8, z0, z15), + svmls_za32_vg2x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x4_t, svint16_t, + svmls_single_za32_s16_vg2x4 (w8, z20, z16), + svmls_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..0937c497b849e168f47498514edfba8f8d833da3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x1.c @@ -0,0 +1,149 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svint8_t, + svmls_za32_s8_vg4x1 (0, z0, z0), + svmls_za32_vg4x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svint8_t, + svmls_za32_s8_vg4x1 (w0, z0, z3), + svmls_za32_vg4x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svint8_t, + svmls_za32_s8_vg4x1 (w7, z0, z3), + svmls_za32_vg4x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** smlsll za\.s\[w8, 0:3\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svint8_t, + svmls_za32_s8_vg4x1 (w8, z7, z3), + svmls_za32_vg4x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlsll za\.s\[w8, 0:3\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svint8_t, + svmls_za32_s8_vg4x1 (w8, z31, z16), + svmls_za32_vg4x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svint8_t, + svmls_za32_s8_vg4x1 (w8 + 1, z0, z0), + svmls_za32_vg4x1 (w8 + 1, z0, z0)) + +/* +** mls_w10p4_z23_z0: +** smlsll za\.s\[w10, 4:7\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w10p4_z23_z0, svint8_t, + svmls_za32_s8_vg4x1 (w10 + 4, z23, z0), + svmls_za32_vg4x1 (w10 + 4, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svint8_t, + svmls_za32_s8_vg4x1 (w11 + 6, z23, z0), + svmls_za32_vg4x1 (w11 + 6, z23, z0)) + +/* +** mls_w9p8_z7_z7: +** smlsll za\.s\[w9, 8:11\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w9p8_z7_z7, svint8_t, + svmls_za32_s8_vg4x1 (w9 + 8, z7, z7), + svmls_za32_vg4x1 (w9 + 8, z7, z7)) + +/* +** mls_w11p12_z23_z0: +** smlsll za\.s\[w11, 12:15\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w11p12_z23_z0, svint8_t, + svmls_za32_s8_vg4x1 (w11 + 12, z23, z0), + svmls_za32_vg4x1 (w11 + 12, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** smlsll za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svint8_t, + svmls_za32_s8_vg4x1 (w8 + 14, z23, z0), + svmls_za32_vg4x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svint8_t, + svmls_za32_s8_vg4x1 (w8 + 15, z7, z7), + svmls_za32_vg4x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlsll za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svint8_t, + svmls_za32_s8_vg4x1 (w8 + 16, z7, z7), + svmls_za32_vg4x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svint8_t, + svmls_za32_s8_vg4x1 (w8 - 1, z16, z0), + svmls_za32_vg4x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svint8_t, + svmls_za32_s8_vg4x1 (w12, z0, z3), + svmls_za32_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4dcfac5776891caf85aa4a64fdafa601ddbbd3d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x2.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (0, z0, z0), + svmls_za32_vg4x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w0, z0, z0), + svmls_za32_vg4x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z0, z4), + svmls_za32_vg4x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z4, z18), + svmls_za32_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z0, z23), + svmls_za32_vg4x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z23, z0), + svmls_za32_vg4x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** smlsll za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z18, z28), + svmls_za32_vg4x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** smlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svint8x2_t, + svmls_za32_s8_vg4x2 (w8, z28, z4), + svmls_za32_vg4x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w8 + 1, z4, z0), + svmls_za32_vg4x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w8 + 2, z4, z0), + svmls_za32_vg4x2 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** smlsll za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w11 + 4, z4, z0), + svmls_za32_vg4x2 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w8 + 7, z4, z0), + svmls_za32_vg4x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint8x2_t, + svmls_za32_s8_vg4x2 (w8 + 8, z4, z4), + svmls_za32_vg4x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint8x2_t, + svmls_za32_s8_vg4x2 (w8 - 1, z4, z0), + svmls_za32_vg4x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (0, z1, z0), + svmls_za32_vg4x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w0, z1, z0), + svmls_za32_vg4x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsll za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8, z1, z0), + svmls_za32_vg4x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8 + 1, z1, z0), + svmls_za32_vg4x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8 + 2, z20, z0), + svmls_za32_vg4x2 (w8 + 2, z20, z0)) + +/* +** mls_single_w11p4_z27_z0: +** smlsll za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w11 + 4, z27, z0), + svmls_za32_vg4x2 (w11 + 4, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8 + 7, z1, z0), + svmls_za32_vg4x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8 + 8, z1, z0), + svmls_za32_vg4x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w0 - 1, z1, z0), + svmls_za32_vg4x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8, z0, z15), + svmls_za32_vg4x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsll za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint8x2_t, svint8_t, + svmls_single_za32_s8_vg4x2 (w8, z20, z16), + svmls_za32_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..86e66275b3a8414d8391d5d8509bc5665ba928c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_s8_vg4x4.c @@ -0,0 +1,260 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (0, z0, z0), + svmls_za32_vg4x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w0, z0, z0), + svmls_za32_vg4x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z0, z4), + svmls_za32_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z0, z18), + svmls_za32_vg4x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z18, z0), + svmls_za32_vg4x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z0, z23), + svmls_za32_vg4x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z23, z0), + svmls_za32_vg4x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** smlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z4, z28), + svmls_za32_vg4x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** smlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8, z28, z0), + svmls_za32_vg4x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8 + 1, z4, z0), + svmls_za32_vg4x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8 + 2, z4, z0), + svmls_za32_vg4x4 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** smlsll za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w11 + 4, z4, z0), + svmls_za32_vg4x4 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8 + 7, z4, z0), + svmls_za32_vg4x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint8x4_t, + svmls_za32_s8_vg4x4 (w8 + 8, z4, z4), + svmls_za32_vg4x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint8x4_t, + svmls_za32_s8_vg4x4 (w8 - 1, z4, z0), + svmls_za32_vg4x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (0, z1, z0), + svmls_za32_vg4x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w0, z1, z0), + svmls_za32_vg4x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsll za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8, z1, z0), + svmls_za32_vg4x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8 + 1, z1, z0), + svmls_za32_vg4x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** smlsll za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8 + 4, z20, z0), + svmls_za32_vg4x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8 + 6, z27, z0), + svmls_za32_vg4x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8 + 7, z1, z0), + svmls_za32_vg4x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8 + 8, z1, z0), + svmls_za32_vg4x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w0 - 1, z1, z0), + svmls_za32_vg4x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8, z0, z15), + svmls_za32_vg4x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsll za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint8x4_t, svint8_t, + svmls_single_za32_s8_vg4x4 (w8, z20, z16), + svmls_za32_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x1.c new file mode 100644 index 0000000000000000000000000000000000000000..fcc7057bd657a23bc445c6c2812fd52dd6b819fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x1.c @@ -0,0 +1,148 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svuint16_t, + svmls_za32_u16_vg2x1 (0, z0, z0), + svmls_za32_vg2x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svuint16_t, + svmls_za32_u16_vg2x1 (w0, z0, z3), + svmls_za32_vg2x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svuint16_t, + svmls_za32_u16_vg2x1 (w7, z0, z3), + svmls_za32_vg2x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** umlsl za\.s\[w8, 0:1\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svuint16_t, + svmls_za32_u16_vg2x1 (w8, z7, z3), + svmls_za32_vg2x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlsl za\.s\[w8, 0:1\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svuint16_t, + svmls_za32_u16_vg2x1 (w8, z31, z16), + svmls_za32_vg2x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 1, z0, z0), + svmls_za32_vg2x1 (w8 + 1, z0, z0)) + +/* +** mls_w8p2_z23_z0: +** umlsl za\.s\[w8, 2:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p2_z23_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 2, z23, z0), + svmls_za32_vg2x1 (w8 + 2, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** umlsl za\.s\[w11, 6:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w11 + 6, z23, z0), + svmls_za32_vg2x1 (w11 + 6, z23, z0)) + +/* +** mls_w8p7_z7_z7: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p7_z7_z7, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 7, z7, z7), + svmls_za32_vg2x1 (w8 + 7, z7, z7)) + +/* +** mls_w11p10_z23_z0: +** umlsl za\.s\[w11, 10:11\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p10_z23_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w11 + 10, z23, z0), + svmls_za32_vg2x1 (w11 + 10, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** umlsl za\.s\[w8, 14:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 14, z23, z0), + svmls_za32_vg2x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 15, z7, z7), + svmls_za32_vg2x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlsl za\.s\[\1, 0:1\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svuint16_t, + svmls_za32_u16_vg2x1 (w8 + 16, z7, z7), + svmls_za32_vg2x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svuint16_t, + svmls_za32_u16_vg2x1 (w8 - 1, z16, z0), + svmls_za32_vg2x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlsl za\.s\[\1, 0:1\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svuint16_t, + svmls_za32_u16_vg2x1 (w12, z0, z3), + svmls_za32_vg2x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ae283d73ac5a0e36c0501fdf5f30d96757659c05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x2.c @@ -0,0 +1,247 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (0, z0, z0), + svmls_za32_vg2x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w0, z0, z0), + svmls_za32_vg2x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z0, z4), + svmls_za32_vg2x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** umlsl za\.s\[w8, 0:1, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z4, z18), + svmls_za32_vg2x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z0, z23), + svmls_za32_vg2x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsl za\.s\[w8, 0:1, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z23, z0), + svmls_za32_vg2x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** umlsl za\.s\[w8, 0:1, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z18, z28), + svmls_za32_vg2x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** umlsl za\.s\[w8, 0:1, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8, z28, z4), + svmls_za32_vg2x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 + 1, z4, z0), + svmls_za32_vg2x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** umlsl za\.s\[w8, 2:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 + 2, z4, z0), + svmls_za32_vg2x2 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** umlsl za\.s\[w8, 6:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 + 6, z4, z0), + svmls_za32_vg2x2 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 + 7, z4, z0), + svmls_za32_vg2x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 + 8, z4, z4), + svmls_za32_vg2x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x2_t, + svmls_za32_u16_vg2x2 (w8 - 1, z4, z0), + svmls_za32_vg2x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (0, z1, z0), + svmls_za32_vg2x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w0, z1, z0), + svmls_za32_vg2x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsl za\.s\[w8, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8, z1, z0), + svmls_za32_vg2x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8 + 1, z1, z0), + svmls_za32_vg2x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** umlsl za\.s\[w8, 4:5, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8 + 4, z20, z0), + svmls_za32_vg2x2 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** umlsl za\.s\[w8, 6:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8 + 6, z27, z0), + svmls_za32_vg2x2 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8 + 7, z1, z0), + svmls_za32_vg2x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8 + 8, z1, z0), + svmls_za32_vg2x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsl za\.s\[\1, 0:1, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w0 - 1, z1, z0), + svmls_za32_vg2x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsl za\.s\[w8, 0:1, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8, z0, z15), + svmls_za32_vg2x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsl za\.s\[w8, 0:1, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svmls_single_za32_u16_vg2x2 (w8, z20, z16), + svmls_za32_vg2x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x4.c new file mode 100644 index 0000000000000000000000000000000000000000..813e0d102328c0487c3d8440b412ed9ab611ef42 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u16_vg2x4.c @@ -0,0 +1,258 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (0, z0, z0), + svmls_za32_vg2x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w0, z0, z0), + svmls_za32_vg2x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z0, z4), + svmls_za32_vg2x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z0, z18), + svmls_za32_vg2x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z18, z0), + svmls_za32_vg2x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z0, z23), + svmls_za32_vg2x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsl za\.s\[w8, 0:1, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z23, z0), + svmls_za32_vg2x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** umlsl za\.s\[w8, 0:1, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z4, z28), + svmls_za32_vg2x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** umlsl za\.s\[w8, 0:1, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8, z28, z0), + svmls_za32_vg2x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 + 1, z4, z0), + svmls_za32_vg2x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** umlsl za\.s\[w8, 2:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 + 2, z4, z0), + svmls_za32_vg2x4 (w8 + 2, z4, z0)) + +/* +** mls_w8p6_z4_z0: +** umlsl za\.s\[w8, 6:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p6_z4_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 + 6, z4, z0), + svmls_za32_vg2x4 (w8 + 6, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 + 7, z4, z0), + svmls_za32_vg2x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 + 8, z4, z4), + svmls_za32_vg2x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x4_t, + svmls_za32_u16_vg2x4 (w8 - 1, z4, z0), + svmls_za32_vg2x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (0, z1, z0), + svmls_za32_vg2x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w0, z1, z0), + svmls_za32_vg2x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsl za\.s\[w8, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8, z1, z0), + svmls_za32_vg2x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8 + 1, z1, z0), + svmls_za32_vg2x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** umlsl za\.s\[w8, 4:5, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8 + 4, z20, z0), + svmls_za32_vg2x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** umlsl za\.s\[w8, 6:7, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8 + 6, z27, z0), + svmls_za32_vg2x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8 + 7, z1, z0), + svmls_za32_vg2x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8 + 8, z1, z0), + svmls_za32_vg2x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsl za\.s\[\1, 0:1, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w0 - 1, z1, z0), + svmls_za32_vg2x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsl za\.s\[w8, 0:1, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8, z0, z15), + svmls_za32_vg2x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsl za\.s\[w8, 0:1, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svmls_single_za32_u16_vg2x4 (w8, z20, z16), + svmls_za32_vg2x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..064bbe359a634f2adb2ecdd052cb5317fd3cdbba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x1.c @@ -0,0 +1,149 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svuint8_t, + svmls_za32_u8_vg4x1 (0, z0, z0), + svmls_za32_vg4x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svuint8_t, + svmls_za32_u8_vg4x1 (w0, z0, z3), + svmls_za32_vg4x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svuint8_t, + svmls_za32_u8_vg4x1 (w7, z0, z3), + svmls_za32_vg4x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** umlsll za\.s\[w8, 0:3\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svuint8_t, + svmls_za32_u8_vg4x1 (w8, z7, z3), + svmls_za32_vg4x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlsll za\.s\[w8, 0:3\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svuint8_t, + svmls_za32_u8_vg4x1 (w8, z31, z16), + svmls_za32_vg4x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w8 + 1, z0, z0), + svmls_za32_vg4x1 (w8 + 1, z0, z0)) + +/* +** mls_w10p4_z23_z0: +** umlsll za\.s\[w10, 4:7\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w10p4_z23_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w10 + 4, z23, z0), + svmls_za32_vg4x1 (w10 + 4, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w11 + 6, z23, z0), + svmls_za32_vg4x1 (w11 + 6, z23, z0)) + +/* +** mls_w9p8_z7_z7: +** umlsll za\.s\[w9, 8:11\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w9p8_z7_z7, svuint8_t, + svmls_za32_u8_vg4x1 (w9 + 8, z7, z7), + svmls_za32_vg4x1 (w9 + 8, z7, z7)) + +/* +** mls_w11p12_z23_z0: +** umlsll za\.s\[w11, 12:15\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w11p12_z23_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w11 + 12, z23, z0), + svmls_za32_vg4x1 (w11 + 12, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** umlsll za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w8 + 14, z23, z0), + svmls_za32_vg4x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svuint8_t, + svmls_za32_u8_vg4x1 (w8 + 15, z7, z7), + svmls_za32_vg4x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlsll za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svuint8_t, + svmls_za32_u8_vg4x1 (w8 + 16, z7, z7), + svmls_za32_vg4x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svuint8_t, + svmls_za32_u8_vg4x1 (w8 - 1, z16, z0), + svmls_za32_vg4x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlsll za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svuint8_t, + svmls_za32_u8_vg4x1 (w12, z0, z3), + svmls_za32_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..652c5be5a1aa878b9530ec4f6772b563f35417cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x2.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (0, z0, z0), + svmls_za32_vg4x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w0, z0, z0), + svmls_za32_vg4x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z0, z4), + svmls_za32_vg4x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z4, z18), + svmls_za32_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z0, z23), + svmls_za32_vg4x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsll za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z23, z0), + svmls_za32_vg4x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** umlsll za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z18, z28), + svmls_za32_vg4x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** umlsll za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8, z28, z4), + svmls_za32_vg4x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8 + 1, z4, z0), + svmls_za32_vg4x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8 + 2, z4, z0), + svmls_za32_vg4x2 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** umlsll za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w11 + 4, z4, z0), + svmls_za32_vg4x2 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8 + 7, z4, z0), + svmls_za32_vg4x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8 + 8, z4, z4), + svmls_za32_vg4x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint8x2_t, + svmls_za32_u8_vg4x2 (w8 - 1, z4, z0), + svmls_za32_vg4x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (0, z1, z0), + svmls_za32_vg4x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w0, z1, z0), + svmls_za32_vg4x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsll za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8, z1, z0), + svmls_za32_vg4x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8 + 1, z1, z0), + svmls_za32_vg4x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8 + 2, z20, z0), + svmls_za32_vg4x2 (w8 + 2, z20, z0)) + +/* +** mls_single_w11p4_z27_z0: +** umlsll za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w11 + 4, z27, z0), + svmls_za32_vg4x2 (w11 + 4, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8 + 7, z1, z0), + svmls_za32_vg4x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8 + 8, z1, z0), + svmls_za32_vg4x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w0 - 1, z1, z0), + svmls_za32_vg4x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsll za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8, z0, z15), + svmls_za32_vg4x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsll za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint8x2_t, svuint8_t, + svmls_single_za32_u8_vg4x2 (w8, z20, z16), + svmls_za32_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..94e0556fa6f091ec9b3256fd58410acf6fd4b64f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za32_u8_vg4x4.c @@ -0,0 +1,260 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (0, z0, z0), + svmls_za32_vg4x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w0, z0, z0), + svmls_za32_vg4x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z0, z4), + svmls_za32_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z0, z18), + svmls_za32_vg4x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z18, z0), + svmls_za32_vg4x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z0, z23), + svmls_za32_vg4x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsll za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z23, z0), + svmls_za32_vg4x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** umlsll za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z4, z28), + svmls_za32_vg4x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** umlsll za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8, z28, z0), + svmls_za32_vg4x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8 + 1, z4, z0), + svmls_za32_vg4x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8 + 2, z4, z0), + svmls_za32_vg4x4 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** umlsll za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w11 + 4, z4, z0), + svmls_za32_vg4x4 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8 + 7, z4, z0), + svmls_za32_vg4x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8 + 8, z4, z4), + svmls_za32_vg4x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint8x4_t, + svmls_za32_u8_vg4x4 (w8 - 1, z4, z0), + svmls_za32_vg4x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (0, z1, z0), + svmls_za32_vg4x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w0, z1, z0), + svmls_za32_vg4x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsll za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8, z1, z0), + svmls_za32_vg4x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8 + 1, z1, z0), + svmls_za32_vg4x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** umlsll za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8 + 4, z20, z0), + svmls_za32_vg4x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8 + 6, z27, z0), + svmls_za32_vg4x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8 + 7, z1, z0), + svmls_za32_vg4x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8 + 8, z1, z0), + svmls_za32_vg4x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w0 - 1, z1, z0), + svmls_za32_vg4x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsll za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8, z0, z15), + svmls_za32_vg4x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsll za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint8x4_t, svuint8_t, + svmls_single_za32_u8_vg4x4 (w8, z20, z16), + svmls_za32_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e39b2814699f7621b5a015a2ca527722d0d8842a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat64x2_t, + svmls_za64_f64_vg1x2 (0, z0, z0), + svmls_za64_vg1x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w0, z0, z0), + svmls_za64_vg1x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8, z0, z4), + svmls_za64_vg1x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8, z4, z18), + svmls_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z23_z0: +** ... +** fmls za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8, z23, z0), + svmls_za64_vg1x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z23: +** ... +** fmls za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z23, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8, z18, z23), + svmls_za64_vg1x2 (w8, z18, z23)) + +/* +** mls_w8_z4_z28: +** fmls za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8, z4, z28), + svmls_za64_vg1x2 (w8, z4, z28)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8 + 7, z4, z0), + svmls_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8 + 8, z4, z4), + svmls_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat64x2_t, + svmls_za64_f64_vg1x2 (w8 - 1, z4, z0), + svmls_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (0, z1, z0), + svmls_za64_vg1x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w0, z1, z0), + svmls_za64_vg1x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w8, z1, z0), + svmls_za64_vg1x2 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w8 + 7, z1, z0), + svmls_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w8 + 8, z1, z0), + svmls_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w0 - 1, z1, z0), + svmls_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w8, z0, z15), + svmls_za64_vg1x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat64x2_t, svfloat64_t, + svmls_single_za64_f64_vg1x2 (w8, z20, z16), + svmls_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0858f1f978890deabbd0c1bd2f7744794d704a59 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_f64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svfloat64x4_t, + svmls_za64_f64_vg1x4 (0, z0, z0), + svmls_za64_vg1x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w0, z0, z0), + svmls_za64_vg1x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8, z0, z4), + svmls_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8, z0, z18), + svmls_za64_vg1x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z28: +** ... +** fmls za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8, z18, z28), + svmls_za64_vg1x4 (w8, z18, z28)) + +/* +** mls_w8_z28_z23: +** ... +** fmls za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z23, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8, z28, z23), + svmls_za64_vg1x4 (w8, z28, z23)) + +/* +** mls_w8p7_z4_z0: +** fmls za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8 + 7, z4, z0), + svmls_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8 + 8, z4, z4), + svmls_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fmls za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svfloat64x4_t, + svmls_za64_f64_vg1x4 (w8 - 1, z4, z0), + svmls_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (0, z1, z0), + svmls_za64_vg1x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w0, z1, z0), + svmls_za64_vg1x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** fmls za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w8, z1, z0), + svmls_za64_vg1x4 (w8, z1, z0)) + +/* +** mls_single_w8p7_z1_z0: +** fmls za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w8 + 7, z1, z0), + svmls_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w8 + 8, z1, z0), + svmls_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** fmls za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w0 - 1, z1, z0), + svmls_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** fmls za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w8, z0, z15), + svmls_za64_vg1x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** fmls za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat64x4_t, svfloat64_t, + svmls_single_za64_f64_vg1x4 (w8, z20, z16), + svmls_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..101d66704159a3bab0109bc3d2a3e011946319e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x1.c @@ -0,0 +1,151 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svint16_t, + svmls_za64_s16_vg4x1 (0, z0, z0), + svmls_za64_vg4x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svint16_t, + svmls_za64_s16_vg4x1 (w0, z0, z3), + svmls_za64_vg4x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svint16_t, + svmls_za64_s16_vg4x1 (w7, z0, z3), + svmls_za64_vg4x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** smlsll za\.d\[w8, 0:3\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svint16_t, + svmls_za64_s16_vg4x1 (w8, z7, z3), + svmls_za64_vg4x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** smlsll za\.d\[w8, 0:3\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svint16_t, + svmls_za64_s16_vg4x1 (w8, z31, z16), + svmls_za64_vg4x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svint16_t, + svmls_za64_s16_vg4x1 (w8 + 1, z0, z0), + svmls_za64_vg4x1 (w8 + 1, z0, z0)) + +/* +** mls_w10p4_z23_z0: +** smlsll za\.d\[w10, 4:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w10p4_z23_z0, svint16_t, + svmls_za64_s16_vg4x1 (w10 + 4, z23, z0), + svmls_za64_vg4x1 (w10 + 4, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svint16_t, + svmls_za64_s16_vg4x1 (w11 + 6, z23, z0), + svmls_za64_vg4x1 (w11 + 6, z23, z0)) + +/* +** mls_w9p8_z7_z7: +** smlsll za\.d\[w9, 8:11\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w9p8_z7_z7, svint16_t, + svmls_za64_s16_vg4x1 (w9 + 8, z7, z7), + svmls_za64_vg4x1 (w9 + 8, z7, z7)) + +/* +** mls_w11p12_z23_z0: +** smlsll za\.d\[w11, 12:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p12_z23_z0, svint16_t, + svmls_za64_s16_vg4x1 (w11 + 12, z23, z0), + svmls_za64_vg4x1 (w11 + 12, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** smlsll za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svint16_t, + svmls_za64_s16_vg4x1 (w8 + 14, z23, z0), + svmls_za64_vg4x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svint16_t, + svmls_za64_s16_vg4x1 (w8 + 15, z7, z7), + svmls_za64_vg4x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** smlsll za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svint16_t, + svmls_za64_s16_vg4x1 (w8 + 16, z7, z7), + svmls_za64_vg4x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svint16_t, + svmls_za64_s16_vg4x1 (w8 - 1, z16, z0), + svmls_za64_vg4x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** smlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svint16_t, + svmls_za64_s16_vg4x1 (w12, z0, z3), + svmls_za64_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..441a8122b6765e5d84e9412ed4fc9745e8f11194 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x2.c @@ -0,0 +1,251 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (0, z0, z0), + svmls_za64_vg4x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w0, z0, z0), + svmls_za64_vg4x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z0, z4), + svmls_za64_vg4x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z4, z18), + svmls_za64_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z0, z23), + svmls_za64_vg4x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z23, z0), + svmls_za64_vg4x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** smlsll za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z18, z28), + svmls_za64_vg4x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** smlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svint16x2_t, + svmls_za64_s16_vg4x2 (w8, z28, z4), + svmls_za64_vg4x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w8 + 1, z4, z0), + svmls_za64_vg4x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w8 + 2, z4, z0), + svmls_za64_vg4x2 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** smlsll za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w11 + 4, z4, z0), + svmls_za64_vg4x2 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w8 + 7, z4, z0), + svmls_za64_vg4x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint16x2_t, + svmls_za64_s16_vg4x2 (w8 + 8, z4, z4), + svmls_za64_vg4x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint16x2_t, + svmls_za64_s16_vg4x2 (w8 - 1, z4, z0), + svmls_za64_vg4x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (0, z1, z0), + svmls_za64_vg4x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w0, z1, z0), + svmls_za64_vg4x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsll za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8, z1, z0), + svmls_za64_vg4x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8 + 1, z1, z0), + svmls_za64_vg4x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8 + 2, z20, z0), + svmls_za64_vg4x2 (w8 + 2, z20, z0)) + +/* +** mls_single_w11p4_z27_z0: +** smlsll za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w11 + 4, z27, z0), + svmls_za64_vg4x2 (w11 + 4, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8 + 7, z1, z0), + svmls_za64_vg4x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8 + 8, z1, z0), + svmls_za64_vg4x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w0 - 1, z1, z0), + svmls_za64_vg4x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8, z0, z15), + svmls_za64_vg4x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsll za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x2_t, svint16_t, + svmls_single_za64_s16_vg4x2 (w8, z20, z16), + svmls_za64_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b555caa3c965fcd9d44e0e8686d36fad44f6e77f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_s16_vg4x4.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (0, z0, z0), + svmls_za64_vg4x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w0, z0, z0), + svmls_za64_vg4x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z0, z4), + svmls_za64_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z0, z18), + svmls_za64_vg4x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z18, z0), + svmls_za64_vg4x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z0, z23), + svmls_za64_vg4x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** smlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z23, z0), + svmls_za64_vg4x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** smlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z4, z28), + svmls_za64_vg4x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** smlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8, z28, z0), + svmls_za64_vg4x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8 + 1, z4, z0), + svmls_za64_vg4x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8 + 2, z4, z0), + svmls_za64_vg4x4 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** smlsll za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w11 + 4, z4, z0), + svmls_za64_vg4x4 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8 + 7, z4, z0), + svmls_za64_vg4x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svint16x4_t, + svmls_za64_s16_vg4x4 (w8 + 8, z4, z4), + svmls_za64_vg4x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svint16x4_t, + svmls_za64_s16_vg4x4 (w8 - 1, z4, z0), + svmls_za64_vg4x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (0, z1, z0), + svmls_za64_vg4x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w0, z1, z0), + svmls_za64_vg4x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** smlsll za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8, z1, z0), + svmls_za64_vg4x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8 + 1, z1, z0), + svmls_za64_vg4x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** smlsll za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8 + 4, z20, z0), + svmls_za64_vg4x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** smlsll za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8 + 6, z27, z0), + svmls_za64_vg4x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8 + 7, z1, z0), + svmls_za64_vg4x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8 + 8, z1, z0), + svmls_za64_vg4x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** smlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w0 - 1, z1, z0), + svmls_za64_vg4x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** smlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8, z0, z15), + svmls_za64_vg4x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** smlsll za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svint16x4_t, svint16_t, + svmls_single_za64_s16_vg4x4 (w8, z20, z16), + svmls_za64_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x1.c new file mode 100644 index 0000000000000000000000000000000000000000..196b3348ba901e9853d4774196f69204932e684b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x1.c @@ -0,0 +1,151 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_0_z0_z0, svuint16_t, + svmls_za64_u16_vg4x1 (0, z0, z0), + svmls_za64_vg4x1 (0, z0, z0)) + +/* +** mls_w0_z0_z3: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w0_z0_z3, svuint16_t, + svmls_za64_u16_vg4x1 (w0, z0, z3), + svmls_za64_vg4x1 (w0, z0, z3)) + +/* +** mls_w7_z0_z3: +** mov (w8|w9|w10|w11), w7 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w7_z0_z3, svuint16_t, + svmls_za64_u16_vg4x1 (w7, z0, z3), + svmls_za64_vg4x1 (w7, z0, z3)) + +/* +** mls_w8_z7_z3: +** umlsll za\.d\[w8, 0:3\], z7\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z7_z3, svuint16_t, + svmls_za64_u16_vg4x1 (w8, z7, z3), + svmls_za64_vg4x1 (w8, z7, z3)) + +/* +** mls_w8_z31_z16: +** mov (z[0-7])\.d, z16\.d +** umlsll za\.d\[w8, 0:3\], z31\.h. \1\.h +** ret +*/ +TEST_ZA_X1 (mls_w8_z31_z16, svuint16_t, + svmls_za64_u16_vg4x1 (w8, z31, z16), + svmls_za64_vg4x1 (w8, z31, z16)) + +/* +** mls_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3\], z0\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p1_z0_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w8 + 1, z0, z0), + svmls_za64_vg4x1 (w8 + 1, z0, z0)) + +/* +** mls_w10p4_z23_z0: +** umlsll za\.d\[w10, 4:7\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w10p4_z23_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w10 + 4, z23, z0), + svmls_za64_vg4x1 (w10 + 4, z23, z0)) + +/* +** mls_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p6_z23_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w11 + 6, z23, z0), + svmls_za64_vg4x1 (w11 + 6, z23, z0)) + +/* +** mls_w9p8_z7_z7: +** umlsll za\.d\[w9, 8:11\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w9p8_z7_z7, svuint16_t, + svmls_za64_u16_vg4x1 (w9 + 8, z7, z7), + svmls_za64_vg4x1 (w9 + 8, z7, z7)) + +/* +** mls_w11p12_z23_z0: +** umlsll za\.d\[w11, 12:15\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w11p12_z23_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w11 + 12, z23, z0), + svmls_za64_vg4x1 (w11 + 12, z23, z0)) + +/* +** mls_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** umlsll za\.d\[\1, 0:3\], z23\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p14_z23_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w8 + 14, z23, z0), + svmls_za64_vg4x1 (w8 + 14, z23, z0)) + +/* +** mls_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p15_z7_z7, svuint16_t, + svmls_za64_u16_vg4x1 (w8 + 15, z7, z7), + svmls_za64_vg4x1 (w8 + 15, z7, z7)) + +/* +** mls_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** umlsll za\.d\[\1, 0:3\], z7\.h, z7\.h +** ret +*/ +TEST_ZA_X1 (mls_w8p16_z7_z7, svuint16_t, + svmls_za64_u16_vg4x1 (w8 + 16, z7, z7), + svmls_za64_vg4x1 (w8 + 16, z7, z7)) + +/* +** mls_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3\], z16\.h, z0\.h +** ret +*/ +TEST_ZA_X1 (mls_w8m1_z16_z0, svuint16_t, + svmls_za64_u16_vg4x1 (w8 - 1, z16, z0), + svmls_za64_vg4x1 (w8 - 1, z16, z0)) + +/* +** mls_w12_z0_z3: +** mov (w8|w9|w10|w11), w12 +** umlsll za\.d\[\1, 0:3\], z0\.h, z3\.h +** ret +*/ +TEST_ZA_X1 (mls_w12_z0_z3, svuint16_t, + svmls_za64_u16_vg4x1 (w12, z0, z3), + svmls_za64_vg4x1 (w12, z0, z3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4c05f24b0f2ac78adde7117d6f9bc252e9e9ecd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x2.c @@ -0,0 +1,251 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (0, z0, z0), + svmls_za64_vg4x2 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w0, z0, z0), + svmls_za64_vg4x2 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z0, z4), + svmls_za64_vg4x2 (w8, z0, z4)) + +/* +** mls_w8_z4_z18: +** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z18, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z4, z18), + svmls_za64_vg4x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z23: +** ... +** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z0, z23), + svmls_za64_vg4x2 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsll za\.d\[w8, 0:3, vgx2\], [^\n]+, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z23, z0), + svmls_za64_vg4x2 (w8, z23, z0)) + +/* +** mls_w8_z18_z28: +** umlsll za\.d\[w8, 0:3, vgx2\], {z18\.h - z19\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z28, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z18, z28), + svmls_za64_vg4x2 (w8, z18, z28)) + +/* +** mls_w8_z28_z4: +** umlsll za\.d\[w8, 0:3, vgx2\], {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z4, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8, z28, z4), + svmls_za64_vg4x2 (w8, z28, z4)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8 + 1, z4, z0), + svmls_za64_vg4x2 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.d\[w8, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8 + 2, z4, z0), + svmls_za64_vg4x2 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** umlsll za\.d\[w11, 4:7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w11 + 4, z4, z0), + svmls_za64_vg4x2 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8 + 7, z4, z0), + svmls_za64_vg4x2 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8 + 8, z4, z4), + svmls_za64_vg4x2 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x2_t, + svmls_za64_u16_vg4x2 (w8 - 1, z4, z0), + svmls_za64_vg4x2 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (0, z1, z0), + svmls_za64_vg4x2 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w0, z1, z0), + svmls_za64_vg4x2 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsll za\.d\[w8, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8, z1, z0), + svmls_za64_vg4x2 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8 + 1, z1, z0), + svmls_za64_vg4x2 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.d\[\1, 0:3, vgx2\], {z20\.h - z21\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p2_z20_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8 + 2, z20, z0), + svmls_za64_vg4x2 (w8 + 2, z20, z0)) + +/* +** mls_single_w11p4_z27_z0: +** umlsll za\.d\[w11, 4:7, vgx2\], {z27\.h - z28\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w11p4_z27_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w11 + 4, z27, z0), + svmls_za64_vg4x2 (w11 + 4, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8 + 7, z1, z0), + svmls_za64_vg4x2 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8 + 8, z1, z0), + svmls_za64_vg4x2 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.d\[\1, 0:3, vgx2\], {z1\.h - z2\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w0 - 1, z1, z0), + svmls_za64_vg4x2 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsll za\.d\[w8, 0:3, vgx2\], {z0\.h - z1\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8, z0, z15), + svmls_za64_vg4x2 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsll za\.d\[w8, 0:3, vgx2\], {z20\.h - z21\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x2_t, svuint16_t, + svmls_single_za64_u16_vg4x2 (w8, z20, z16), + svmls_za64_vg4x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6ceb826d1dfa19596975debcf4ce056d805fb50c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za64_u16_vg4x4.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** mls_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_0_z0_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (0, z0, z0), + svmls_za64_vg4x4 (0, z0, z0)) + +/* +** mls_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w0_z0_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w0, z0, z0), + svmls_za64_vg4x4 (w0, z0, z0)) + +/* +** mls_w8_z0_z4: +** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z4, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z0, z4), + svmls_za64_vg4x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mls_w8_z0_z18: +** ... +** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z18, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z0, z18), + svmls_za64_vg4x4 (w8, z0, z18)) + +/* +** mls_w8_z18_z0: +** ... +** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z18_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z18, z0), + svmls_za64_vg4x4 (w8, z18, z0)) + +/* +** mls_w8_z0_z23: +** ... +** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mls_w8_z0_z23, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z0, z23), + svmls_za64_vg4x4 (w8, z0, z23)) + +/* +** mls_w8_z23_z0: +** ... +** umlsll za\.d\[w8, 0:3, vgx4\], [^\n]+, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z23_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z23, z0), + svmls_za64_vg4x4 (w8, z23, z0)) + +/* +** mls_w8_z4_z28: +** umlsll za\.d\[w8, 0:3, vgx4\], {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z4_z28, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z4, z28), + svmls_za64_vg4x4 (w8, z4, z28)) + +/* +** mls_w8_z28_z0: +** umlsll za\.d\[w8, 0:3, vgx4\], {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8_z28_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8, z28, z0), + svmls_za64_vg4x4 (w8, z28, z0)) + +/* +** mls_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p1_z4_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8 + 1, z4, z0), + svmls_za64_vg4x4 (w8 + 1, z4, z0)) + +/* +** mls_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p2_z4_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8 + 2, z4, z0), + svmls_za64_vg4x4 (w8 + 2, z4, z0)) + +/* +** mls_w11p4_z4_z0: +** umlsll za\.d\[w11, 4:7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w11p4_z4_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w11 + 4, z4, z0), + svmls_za64_vg4x4 (w11 + 4, z4, z0)) + +/* +** mls_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p7_z4_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8 + 7, z4, z0), + svmls_za64_vg4x4 (w8 + 7, z4, z0)) + +/* +** mls_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (mls_w8p8_z4_z4, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8 + 8, z4, z4), + svmls_za64_vg4x4 (w8 + 8, z4, z4)) + +/* +** mls_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (mls_w8m1_z4_z0, svuint16x4_t, + svmls_za64_u16_vg4x4 (w8 - 1, z4, z0), + svmls_za64_vg4x4 (w8 - 1, z4, z0)) + +/* +** mls_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_0_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (0, z1, z0), + svmls_za64_vg4x4 (0, z1, z0)) + +/* +** mls_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w0, z1, z0), + svmls_za64_vg4x4 (w0, z1, z0)) + +/* +** mls_single_w8_z1_z0: +** umlsll za\.d\[w8, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8, z1, z0), + svmls_za64_vg4x4 (w8, z1, z0)) + +/* +** mls_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p1_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8 + 1, z1, z0), + svmls_za64_vg4x4 (w8 + 1, z1, z0)) + +/* +** mls_single_w8p4_z20_z0: +** umlsll za\.d\[w8, 4:7, vgx4\], {z20\.h - z23\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p4_z20_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8 + 4, z20, z0), + svmls_za64_vg4x4 (w8 + 4, z20, z0)) + +/* +** mls_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** umlsll za\.d\[\1, 0:3, vgx4\], {z27\.h - z30\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p6_z27_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8 + 6, z27, z0), + svmls_za64_vg4x4 (w8 + 6, z27, z0)) + +/* +** mls_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8 + 7, z1, z0), + svmls_za64_vg4x4 (w8 + 7, z1, z0)) + +/* +** mls_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8 + 8, z1, z0), + svmls_za64_vg4x4 (w8 + 8, z1, z0)) + +/* +** mls_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** umlsll za\.d\[\1, 0:3, vgx4\], {z1\.h - z4\.h}, z0\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w0 - 1, z1, z0), + svmls_za64_vg4x4 (w0 - 1, z1, z0)) + +/* +** mls_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** umlsll za\.d\[w8, 0:3, vgx4\], {z0\.h - z3\.h}, z15\.h +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8, z0, z15), + svmls_za64_vg4x4 (w8, z0, z15)) + +/* +** mls_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** umlsll za\.d\[w8, 0:3, vgx4\], {z20\.h - z23\.h}, \1\.h +** ret +*/ +TEST_ZA_SINGLE (mls_single_w8_z20_z16, svuint16x4_t, svuint16_t, + svmls_single_za64_u16_vg4x4 (w8, z20, z16), + svmls_za64_vg4x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32.c new file mode 100644 index 0000000000000000000000000000000000000000..7b24387dd80e088002d9303dd76f5ff53a7f21bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mopa_za32_s16_0_p0_p1_z0_z1: +** smopa za0\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_s16_0_p0_p1_z0_z1, svint16_t, + svmopa_za32_s16_m (0, p0, p1, z0, z1), + svmopa_za32_m (0, p0, p1, z0, z1)) + +/* +** mopa_za32_s16_0_p1_p0_z1_z0: +** smopa za0\.s, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_s16_0_p1_p0_z1_z0, svint16_t, + svmopa_za32_s16_m (0, p1, p0, z1, z0), + svmopa_za32_m (0, p1, p0, z1, z0)) + +/* +** mopa_za32_s16_3_p0_p1_z0_z1: +** smopa za3\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_s16_3_p0_p1_z0_z1, svint16_t, + svmopa_za32_s16_m (3, p0, p1, z0, z1), + svmopa_za32_m (3, p0, p1, z0, z1)) + +/* +** mopa_za32_u16_0_p0_p1_z0_z1: +** umopa za0\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_u16_0_p0_p1_z0_z1, svuint16_t, + svmopa_za32_u16_m (0, p0, p1, z0, z1), + svmopa_za32_m (0, p0, p1, z0, z1)) + +/* +** mopa_za32_u16_3_p0_p1_z0_z1: +** umopa za3\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mopa_za32_u16_3_p0_p1_z0_z1, svuint16_t, + svmopa_za32_u16_m (3, p0, p1, z0, z1), + svmopa_za32_m (3, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za32.c new file mode 100644 index 0000000000000000000000000000000000000000..04a104ad0364bcb6b8229b833cbdaf811b7c51f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za32.c @@ -0,0 +1,48 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** mops_za32_s16_0_p0_p1_z0_z1: +** smops za0\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za32_s16_0_p0_p1_z0_z1, svint16_t, + svmops_za32_s16_m (0, p0, p1, z0, z1), + svmops_za32_m (0, p0, p1, z0, z1)) + +/* +** mops_za32_s16_0_p1_p0_z1_z0: +** smops za0\.s, p1/m, p0/m, z1\.h, z0\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za32_s16_0_p1_p0_z1_z0, svint16_t, + svmops_za32_s16_m (0, p1, p0, z1, z0), + svmops_za32_m (0, p1, p0, z1, z0)) + +/* +** mops_za32_s16_3_p0_p1_z0_z1: +** smops za3\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za32_s16_3_p0_p1_z0_z1, svint16_t, + svmops_za32_s16_m (3, p0, p1, z0, z1), + svmops_za32_m (3, p0, p1, z0, z1)) + +/* +** mops_za32_u16_0_p0_p1_z0_z1: +** umops za0\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za32_u16_0_p0_p1_z0_z1, svuint16_t, + svmops_za32_u16_m (0, p0, p1, z0, z1), + svmops_za32_m (0, p0, p1, z0, z1)) + +/* +** mops_za32_u16_3_p0_p1_z0_z1: +** umops za3\.s, p0/m, p1/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_ZA (mops_za32_u16_3_p0_p1_z0_z1, svuint16_t, + svmops_za32_u16_m (3, p0, p1, z0, z1), + svmops_za32_m (3, p0, p1, z0, z1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..b03a8aed99427da51204f34ae027f9ee73c3236b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext p2\.h, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t, + p2 = svpext_c16 (pn0, 0), + p2 = svpext_c16 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext p5\.h, pn\1\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t, + p5 = svpext_c16 (pn7, 1), + p5 = svpext_c16 (pn7, 1)) + +/* +** pext_p9_pn8_2: +** pext p9\.h, pn8\[2\] +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t, + p9 = svpext_c16 (pn8, 2), + p9 = svpext_c16 (pn8, 2)) + +/* +** pext_p12_pn11_3: +** pext p12\.h, pn11\[3\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t, + p12 = svpext_c16 (pn11, 3), + p12 = svpext_c16 (pn11, 3)) + +/* +** pext_p2_pn15_0: +** pext p2\.h, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t, + p2 = svpext_c16 (pn15, 0), + p2 = svpext_c16 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c07d82038b1565eac8e925389ac72b91d69fadb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c @@ -0,0 +1,54 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext {p2\.h, p3\.h}, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t, + p2 = svpext_c16_x2 (pn0, 0), + p2 = svpext_c16_x2 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext {[^}]+}, pn\1\[1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t, + p5 = svpext_c16_x2 (pn7, 1), + p5 = svpext_c16_x2 (pn7, 1)) + +/* +** pext_p9_pn8_0: +** pext {[^}]+}, pn8\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t, + p9 = svpext_c16_x2 (pn8, 0), + p9 = svpext_c16_x2 (pn8, 0)) + +/* +** pext_p12_pn11_1: +** pext {p12\.h, p13\.h}, pn11\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t, + p12 = svpext_c16_x2 (pn11, 1), + p12 = svpext_c16_x2 (pn11, 1)) + +/* +** pext_p2_pn15_0: +** pext {p2\.h, p3\.h}, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t, + p2 = svpext_c16_x2 (pn15, 0), + p2 = svpext_c16_x2 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..2a63942d8e8a6ee8b139703ec7e311ce44dc239e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext p2\.s, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t, + p2 = svpext_c32 (pn0, 0), + p2 = svpext_c32 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext p5\.s, pn\1\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t, + p5 = svpext_c32 (pn7, 1), + p5 = svpext_c32 (pn7, 1)) + +/* +** pext_p9_pn8_2: +** pext p9\.s, pn8\[2\] +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t, + p9 = svpext_c32 (pn8, 2), + p9 = svpext_c32 (pn8, 2)) + +/* +** pext_p12_pn11_3: +** pext p12\.s, pn11\[3\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t, + p12 = svpext_c32 (pn11, 3), + p12 = svpext_c32 (pn11, 3)) + +/* +** pext_p2_pn15_0: +** pext p2\.s, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t, + p2 = svpext_c32 (pn15, 0), + p2 = svpext_c32 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1629b440fabef301ac13d7b326d09898ea835877 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c @@ -0,0 +1,54 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext {p2\.s, p3\.s}, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t, + p2 = svpext_c32_x2 (pn0, 0), + p2 = svpext_c32_x2 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext {[^}]+}, pn\1\[1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t, + p5 = svpext_c32_x2 (pn7, 1), + p5 = svpext_c32_x2 (pn7, 1)) + +/* +** pext_p9_pn8_0: +** pext {[^}]+}, pn8\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t, + p9 = svpext_c32_x2 (pn8, 0), + p9 = svpext_c32_x2 (pn8, 0)) + +/* +** pext_p12_pn11_1: +** pext {p12\.s, p13\.s}, pn11\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t, + p12 = svpext_c32_x2 (pn11, 1), + p12 = svpext_c32_x2 (pn11, 1)) + +/* +** pext_p2_pn15_0: +** pext {p2\.s, p3\.s}, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t, + p2 = svpext_c32_x2 (pn15, 0), + p2 = svpext_c32_x2 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..8c93d7b6f1f9a863883d00987198e2f2b2070ec9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext p2\.d, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t, + p2 = svpext_c64 (pn0, 0), + p2 = svpext_c64 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext p5\.d, pn\1\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t, + p5 = svpext_c64 (pn7, 1), + p5 = svpext_c64 (pn7, 1)) + +/* +** pext_p9_pn8_2: +** pext p9\.d, pn8\[2\] +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t, + p9 = svpext_c64 (pn8, 2), + p9 = svpext_c64 (pn8, 2)) + +/* +** pext_p12_pn11_3: +** pext p12\.d, pn11\[3\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t, + p12 = svpext_c64 (pn11, 3), + p12 = svpext_c64 (pn11, 3)) + +/* +** pext_p2_pn15_0: +** pext p2\.d, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t, + p2 = svpext_c64 (pn15, 0), + p2 = svpext_c64 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ab12c8a9003d12aa4dae4132fb039288577954a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c @@ -0,0 +1,54 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext {p2\.d, p3\.d}, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t, + p2 = svpext_c64_x2 (pn0, 0), + p2 = svpext_c64_x2 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext {[^}]+}, pn\1\[1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t, + p5 = svpext_c64_x2 (pn7, 1), + p5 = svpext_c64_x2 (pn7, 1)) + +/* +** pext_p9_pn8_0: +** pext {[^}]+}, pn8\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t, + p9 = svpext_c64_x2 (pn8, 0), + p9 = svpext_c64_x2 (pn8, 0)) + +/* +** pext_p12_pn11_1: +** pext {p12\.d, p13\.d}, pn11\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t, + p12 = svpext_c64_x2 (pn11, 1), + p12 = svpext_c64_x2 (pn11, 1)) + +/* +** pext_p2_pn15_0: +** pext {p2\.d, p3\.d}, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t, + p2 = svpext_c64_x2 (pn15, 0), + p2 = svpext_c64_x2 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..8f882ce5a145fa59c3ca221700c94e53d9f1284c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext p2\.b, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svbool_t, + p2 = svpext_c8 (pn0, 0), + p2 = svpext_c8 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext p5\.b, pn\1\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svbool_t, + p5 = svpext_c8 (pn7, 1), + p5 = svpext_c8 (pn7, 1)) + +/* +** pext_p9_pn8_2: +** pext p9\.b, pn8\[2\] +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_2, svbool_t, + p9 = svpext_c8 (pn8, 2), + p9 = svpext_c8 (pn8, 2)) + +/* +** pext_p12_pn11_3: +** pext p12\.b, pn11\[3\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_3, svbool_t, + p12 = svpext_c8 (pn11, 3), + p12 = svpext_c8 (pn11, 3)) + +/* +** pext_p2_pn15_0: +** pext p2\.b, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svbool_t, + p2 = svpext_c8 (pn15, 0), + p2 = svpext_c8 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..814e36df094d0191b331c024e1a2ecc2f1e2b2fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c @@ -0,0 +1,54 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pext_p2_pn0_0: +** mov p([0-9]+)\.b, p0\.b +** pext {p2\.b, p3\.b}, pn\1\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn0_0, svboolx2_t, + p2 = svpext_c8_x2 (pn0, 0), + p2 = svpext_c8_x2 (pn0, 0)) + +/* +** pext_p5_pn7_1: +** mov p([0-9]+)\.b, p7\.b +** pext {[^}]+}, pn\1\[1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p5_pn7_1, svboolx2_t, + p5 = svpext_c8_x2 (pn7, 1), + p5 = svpext_c8_x2 (pn7, 1)) + +/* +** pext_p9_pn8_0: +** pext {[^}]+}, pn8\[0\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_EXTRACT_PN (pext_p9_pn8_0, svboolx2_t, + p9 = svpext_c8_x2 (pn8, 0), + p9 = svpext_c8_x2 (pn8, 0)) + +/* +** pext_p12_pn11_1: +** pext {p12\.b, p13\.b}, pn11\[1\] +** ret +*/ +TEST_EXTRACT_PN (pext_p12_pn11_1, svboolx2_t, + p12 = svpext_c8_x2 (pn11, 1), + p12 = svpext_c8_x2 (pn11, 1)) + +/* +** pext_p2_pn15_0: +** pext {p2\.b, p3\.b}, pn15\[0\] +** ret +*/ +TEST_EXTRACT_PN (pext_p2_pn15_0, svboolx2_t, + p2 = svpext_c8_x2 (pn15, 0), + p2 = svpext_c8_x2 (pn15, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pfalse_c.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pfalse_c.c new file mode 100644 index 0000000000000000000000000000000000000000..ebd3c0056b374bf3f33f3d89b868f6ea8cc52094 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pfalse_c.c @@ -0,0 +1,39 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** pfalse_pn0: +** pfalse p0\.b +** ret +*/ +TEST_PN (pfalse_pn0, + pn0 = svpfalse_c (), + pn0 = svpfalse_c ()) + +/* +** pfalse_pn7: +** pfalse p7\.b +** ret +*/ +TEST_PN (pfalse_pn7, + pn7 = svpfalse_c (), + pn7 = svpfalse_c ()) + +/* +** pfalse_pn8: +** pfalse p8\.b +** ret +*/ +TEST_PN (pfalse_pn8, + pn8 = svpfalse_c (), + pn8 = svpfalse_c ()) + +/* +** pfalse_pn15: +** pfalse p15\.b +** ret +*/ +TEST_PN (pfalse_pn15, + pn15 = svpfalse_c (), + pn15 = svpfalse_c ()) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b16.c new file mode 100644 index 0000000000000000000000000000000000000000..5df2aa0b59651136ca37068216fe0dd3683bc7b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b16.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t, + p0 = svpsel_b16 (p2, p7, 0), + p0 = svpsel_b16 (p2, p7, 0)) + +/* +** psel_p2_p7_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p7, p8\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t, + p2 = svpsel_b16 (p7, p8, w11), + p2 = svpsel_b16 (p7, p8, w11)) + +/* +** psel_p7_p8_p13_w12: +** psel p7, p8, p13\.h\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t, + p7 = svpsel_b16 (p8, p13, w12), + p7 = svpsel_b16 (p8, p13, w12)) + +/* +** psel_p8_p13_p15_w15: +** psel p8, p13, p15\.h\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t, + p8 = svpsel_b16 (p13, p15, w15), + p8 = svpsel_b16 (p13, p15, w15)) + +/* +** psel_p13_p15_p0_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p15, p0\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t, + p13 = svpsel_b16 (p15, p0, w16), + p13 = svpsel_b16 (p15, p0, w16)) + +/* +** psel_p15_p13_p8_w12p1: +** psel p15, p13, p8\.h\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t, + p15 = svpsel_b16 (p13, p8, w12 + 1), + p15 = svpsel_b16 (p13, p8, w12 + 1)) + +/* +** psel_p13_p8_p7_w12p7: +** psel p13, p8, p7\.h\[w12, 7\] +** ret +*/ +TEST_SELECT_P (psel_p13_p8_p7_w12p7, svbool_t, + p13 = svpsel_b16 (p8, p7, w12 + 7), + p13 = svpsel_b16 (p8, p7, w12 + 7)) + +/* +** psel_p0_p0_p0_w12p8: +** add (w[0-9]+), w12, #?8 +** psel p0, p0, p0\.h\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p0_w12p8, svbool_t, + p0 = svpsel_b16 (p0, p0, w12 + 8), + p0 = svpsel_b16 (p0, p0, w12 + 8)) + +/* +** psel_p15_p15_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p15, p15, p15\.h\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t, + p15 = svpsel_b16 (p15, p15, w12 - 1), + p15 = svpsel_b16 (p15, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b32.c new file mode 100644 index 0000000000000000000000000000000000000000..8489dca008629caee6e1ba21dc1d25f1d9eaee86 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b32.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t, + p0 = svpsel_b32 (p2, p7, 0), + p0 = svpsel_b32 (p2, p7, 0)) + +/* +** psel_p2_p7_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p7, p8\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t, + p2 = svpsel_b32 (p7, p8, w11), + p2 = svpsel_b32 (p7, p8, w11)) + +/* +** psel_p7_p8_p13_w12: +** psel p7, p8, p13\.s\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t, + p7 = svpsel_b32 (p8, p13, w12), + p7 = svpsel_b32 (p8, p13, w12)) + +/* +** psel_p8_p13_p15_w15: +** psel p8, p13, p15\.s\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t, + p8 = svpsel_b32 (p13, p15, w15), + p8 = svpsel_b32 (p13, p15, w15)) + +/* +** psel_p13_p15_p0_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p15, p0\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t, + p13 = svpsel_b32 (p15, p0, w16), + p13 = svpsel_b32 (p15, p0, w16)) + +/* +** psel_p15_p13_p8_w12p1: +** psel p15, p13, p8\.s\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t, + p15 = svpsel_b32 (p13, p8, w12 + 1), + p15 = svpsel_b32 (p13, p8, w12 + 1)) + +/* +** psel_p13_p8_p7_w12p3: +** psel p13, p8, p7\.s\[w12, 3\] +** ret +*/ +TEST_SELECT_P (psel_p13_p8_p7_w12p3, svbool_t, + p13 = svpsel_b32 (p8, p7, w12 + 3), + p13 = svpsel_b32 (p8, p7, w12 + 3)) + +/* +** psel_p0_p0_p0_w12p4: +** add (w[0-9]+), w12, #?4 +** psel p0, p0, p0\.s\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p0_w12p4, svbool_t, + p0 = svpsel_b32 (p0, p0, w12 + 4), + p0 = svpsel_b32 (p0, p0, w12 + 4)) + +/* +** psel_p15_p15_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p15, p15, p15\.s\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t, + p15 = svpsel_b32 (p15, p15, w12 - 1), + p15 = svpsel_b32 (p15, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b64.c new file mode 100644 index 0000000000000000000000000000000000000000..5d2d8b48f50e3868c8895a2577ee647d8df932d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b64.c @@ -0,0 +1,80 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t, + p0 = svpsel_b64 (p2, p7, 0), + p0 = svpsel_b64 (p2, p7, 0)) + +/* +** psel_p2_p7_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p7, p8\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t, + p2 = svpsel_b64 (p7, p8, w11), + p2 = svpsel_b64 (p7, p8, w11)) + +/* +** psel_p7_p8_p13_w12: +** psel p7, p8, p13\.d\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t, + p7 = svpsel_b64 (p8, p13, w12), + p7 = svpsel_b64 (p8, p13, w12)) + +/* +** psel_p8_p13_p15_w15: +** psel p8, p13, p15\.d\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t, + p8 = svpsel_b64 (p13, p15, w15), + p8 = svpsel_b64 (p13, p15, w15)) + +/* +** psel_p13_p15_p0_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p15, p0\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t, + p13 = svpsel_b64 (p15, p0, w16), + p13 = svpsel_b64 (p15, p0, w16)) + +/* +** psel_p15_p13_p8_w12p1: +** psel p15, p13, p8\.d\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t, + p15 = svpsel_b64 (p13, p8, w12 + 1), + p15 = svpsel_b64 (p13, p8, w12 + 1)) + +/* +** psel_p0_p0_p0_w12p2: +** add (w[0-9]+), w12, #?2 +** psel p0, p0, p0\.d\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p0_w12p2, svbool_t, + p0 = svpsel_b64 (p0, p0, w12 + 2), + p0 = svpsel_b64 (p0, p0, w12 + 2)) + +/* +** psel_p15_p15_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p15, p15, p15\.d\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t, + p15 = svpsel_b64 (p15, p15, w12 - 1), + p15 = svpsel_b64 (p15, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b8.c new file mode 100644 index 0000000000000000000000000000000000000000..438735893c93f6e84b7fc6f8c069ae294fa2f356 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_b8.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svbool_t, + p0 = svpsel_b8 (p2, p7, 0), + p0 = svpsel_b8 (p2, p7, 0)) + +/* +** psel_p2_p7_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p7, p8\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p7_p8_w11, svbool_t, + p2 = svpsel_b8 (p7, p8, w11), + p2 = svpsel_b8 (p7, p8, w11)) + +/* +** psel_p7_p8_p13_w12: +** psel p7, p8, p13\.b\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p7_p8_p13_w12, svbool_t, + p7 = svpsel_b8 (p8, p13, w12), + p7 = svpsel_b8 (p8, p13, w12)) + +/* +** psel_p8_p13_p15_w15: +** psel p8, p13, p15\.b\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p8_p13_p15_w15, svbool_t, + p8 = svpsel_b8 (p13, p15, w15), + p8 = svpsel_b8 (p13, p15, w15)) + +/* +** psel_p13_p15_p0_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p15, p0\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p15_p0_w16, svbool_t, + p13 = svpsel_b8 (p15, p0, w16), + p13 = svpsel_b8 (p15, p0, w16)) + +/* +** psel_p15_p13_p8_w12p1: +** psel p15, p13, p8\.b\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p15_p13_p8_w12p1, svbool_t, + p15 = svpsel_b8 (p13, p8, w12 + 1), + p15 = svpsel_b8 (p13, p8, w12 + 1)) + +/* +** psel_p13_p8_p7_w12p15: +** psel p13, p8, p7\.b\[w12, 15\] +** ret +*/ +TEST_SELECT_P (psel_p13_p8_p7_w12p15, svbool_t, + p13 = svpsel_b8 (p8, p7, w12 + 15), + p13 = svpsel_b8 (p8, p7, w12 + 15)) + +/* +** psel_p0_p0_p0_w12p16: +** add (w[0-9]+), w12, #?16 +** psel p0, p0, p0\.b\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p0_w12p16, svbool_t, + p0 = svpsel_b8 (p0, p0, w12 + 16), + p0 = svpsel_b8 (p0, p0, w12 + 16)) + +/* +** psel_p15_p15_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p15, p15, p15\.b\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p15_p15_p15_w12m1, svbool_t, + p15 = svpsel_b8 (p15, p15, w12 - 1), + p15 = svpsel_b8 (p15, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..1cf45e6c28385d026dff01894878fe8be12b75f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c16.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t, + p0 = svpsel_c16 (p2, p7, 0), + p0 = svpsel_c16 (p2, p7, 0)) + +/* +** psel_p2_p0_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p0, p8\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p0_p8_w11, svcount_t, + p2 = svpsel_c16 (p0, p8, w11), + p2 = svpsel_c16 (p0, p8, w11)) + +/* +** psel_p2_p13_p15_w12: +** psel p2, p13, p15\.h\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p15_w12, svcount_t, + p2 = svpsel_c16 (p13, p15, w12), + p2 = svpsel_c16 (p13, p15, w12)) + +/* +** psel_p0_p13_p15_w15: +** psel p0, p13, p15\.h\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p13_p15_w15, svcount_t, + p0 = svpsel_c16 (p13, p15, w15), + p0 = svpsel_c16 (p13, p15, w15)) + +/* +** psel_p13_p0_p15_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p0, p15\.h\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p15_w16, svcount_t, + p13 = svpsel_c16 (p0, p15, w16), + p13 = svpsel_c16 (p0, p15, w16)) + +/* +** psel_p2_p13_p8_w12p1: +** psel p2, p13, p8\.h\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t, + p2 = svpsel_c16 (p13, p8, w12 + 1), + p2 = svpsel_c16 (p13, p8, w12 + 1)) + +/* +** psel_p13_p0_p7_w12p7: +** psel p13, p0, p7\.h\[w12, 7\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p7_w12p7, svcount_t, + p13 = svpsel_c16 (p0, p7, w12 + 7), + p13 = svpsel_c16 (p0, p7, w12 + 7)) + +/* +** psel_p0_p0_p15_w12p8: +** add (w[0-9]+), w12, #?8 +** psel p0, p0, p15\.h\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p15_w12p8, svcount_t, + p0 = svpsel_c16 (p0, p15, w12 + 8), + p0 = svpsel_c16 (p0, p15, w12 + 8)) + +/* +** psel_p13_p13_p7_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p13, p13, p7\.h\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p13_p7_w12m1, svcount_t, + p13 = svpsel_c16 (p13, p7, w12 - 1), + p13 = svpsel_c16 (p13, p7, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..5246a031595abed303d3ca8ab55b3434f1f6e229 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c32.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t, + p0 = svpsel_c32 (p2, p7, 0), + p0 = svpsel_c32 (p2, p7, 0)) + +/* +** psel_p2_p13_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p13, p8\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p8_w11, svcount_t, + p2 = svpsel_c32 (p13, p8, w11), + p2 = svpsel_c32 (p13, p8, w11)) + +/* +** psel_p0_p13_p15_w12: +** psel p0, p13, p15\.s\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p13_p15_w12, svcount_t, + p0 = svpsel_c32 (p13, p15, w12), + p0 = svpsel_c32 (p13, p15, w12)) + +/* +** psel_p2_p0_p15_w15: +** psel p2, p0, p15\.s\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p0_p15_w15, svcount_t, + p2 = svpsel_c32 (p0, p15, w15), + p2 = svpsel_c32 (p0, p15, w15)) + +/* +** psel_p13_p0_p7_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p0, p7\.s\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p7_w16, svcount_t, + p13 = svpsel_c32 (p0, p7, w16), + p13 = svpsel_c32 (p0, p7, w16)) + +/* +** psel_p2_p13_p8_w12p1: +** psel p2, p13, p8\.s\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t, + p2 = svpsel_c32 (p13, p8, w12 + 1), + p2 = svpsel_c32 (p13, p8, w12 + 1)) + +/* +** psel_p13_p0_p7_w12p3: +** psel p13, p0, p7\.s\[w12, 3\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p7_w12p3, svcount_t, + p13 = svpsel_c32 (p0, p7, w12 + 3), + p13 = svpsel_c32 (p0, p7, w12 + 3)) + +/* +** psel_p0_p0_p7_w12p4: +** add (w[0-9]+), w12, #?4 +** psel p0, p0, p7\.s\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p7_w12p4, svcount_t, + p0 = svpsel_c32 (p0, p7, w12 + 4), + p0 = svpsel_c32 (p0, p7, w12 + 4)) + +/* +** psel_p13_p13_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p13, p13, p15\.s\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t, + p13 = svpsel_c32 (p13, p15, w12 - 1), + p13 = svpsel_c32 (p13, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..97304fd1646233c522568043bbce0a01376f1ccf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c64.c @@ -0,0 +1,80 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t, + p0 = svpsel_c64 (p2, p7, 0), + p0 = svpsel_c64 (p2, p7, 0)) + +/* +** psel_p2_p13_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p13, p8\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p8_w11, svcount_t, + p2 = svpsel_c64 (p13, p8, w11), + p2 = svpsel_c64 (p13, p8, w11)) + +/* +** psel_p2_p0_p15_w12: +** psel p2, p0, p15\.d\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p0_p15_w12, svcount_t, + p2 = svpsel_c64 (p0, p15, w12), + p2 = svpsel_c64 (p0, p15, w12)) + +/* +** psel_p0_p13_p15_w15: +** psel p0, p13, p15\.d\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p13_p15_w15, svcount_t, + p0 = svpsel_c64 (p13, p15, w15), + p0 = svpsel_c64 (p13, p15, w15)) + +/* +** psel_p13_p0_p15_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p13, p0, p15\.d\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p15_w16, svcount_t, + p13 = svpsel_c64 (p0, p15, w16), + p13 = svpsel_c64 (p0, p15, w16)) + +/* +** psel_p2_p13_p8_w12p1: +** psel p2, p13, p8\.d\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p8_w12p1, svcount_t, + p2 = svpsel_c64 (p13, p8, w12 + 1), + p2 = svpsel_c64 (p13, p8, w12 + 1)) + +/* +** psel_p0_p0_p8_w12p2: +** add (w[0-9]+), w12, #?2 +** psel p0, p0, p8\.d\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p8_w12p2, svcount_t, + p0 = svpsel_c64 (p0, p8, w12 + 2), + p0 = svpsel_c64 (p0, p8, w12 + 2)) + +/* +** psel_p13_p13_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p13, p13, p15\.d\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t, + p13 = svpsel_c64 (p13, p15, w12 - 1), + p13 = svpsel_c64 (p13, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..95973a17ebd87301ddc50b1da9f49ddc245f9d0f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_c8.c @@ -0,0 +1,89 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** psel_p0_p2_p7_0: +** mov [wx](1[2-5]), #?0 +** psel p0, p2, p7\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p2_p7_0, svcount_t, + p0 = svpsel_c8 (p2, p7, 0), + p0 = svpsel_c8 (p2, p7, 0)) + +/* +** psel_p2_p0_p8_w11: +** mov [wx](1[2-5]), [wx]11 +** psel p2, p0, p8\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p0_p8_w11, svcount_t, + p2 = svpsel_c8 (p0, p8, w11), + p2 = svpsel_c8 (p0, p8, w11)) + +/* +** psel_p0_p13_p15_w12: +** psel p0, p13, p15\.b\[w12, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p13_p15_w12, svcount_t, + p0 = svpsel_c8 (p13, p15, w12), + p0 = svpsel_c8 (p13, p15, w12)) + +/* +** psel_p13_p0_p8_w15: +** psel p13, p0, p8\.b\[w15, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p0_p8_w15, svcount_t, + p13 = svpsel_c8 (p0, p8, w15), + p13 = svpsel_c8 (p0, p8, w15)) + +/* +** psel_p2_p13_p7_w16: +** mov [wx](1[2-5]), [wx]16 +** psel p2, p13, p7\.b\[w\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p2_p13_p7_w16, svcount_t, + p2 = svpsel_c8 (p13, p7, w16), + p2 = svpsel_c8 (p13, p7, w16)) + +/* +** psel_p0_p13_p8_w12p1: +** psel p0, p13, p8\.b\[w12, 1\] +** ret +*/ +TEST_SELECT_P (psel_p0_p13_p8_w12p1, svcount_t, + p0 = svpsel_c8 (p13, p8, w12 + 1), + p0 = svpsel_c8 (p13, p8, w12 + 1)) + +/* +** psel_p13_p2_p7_w12p15: +** psel p13, p2, p7\.b\[w12, 15\] +** ret +*/ +TEST_SELECT_P (psel_p13_p2_p7_w12p15, svcount_t, + p13 = svpsel_c8 (p2, p7, w12 + 15), + p13 = svpsel_c8 (p2, p7, w12 + 15)) + +/* +** psel_p0_p0_p15_w12p16: +** add (w[0-9]+), w12, #?16 +** psel p0, p0, p15\.b\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p0_p0_p15_w12p16, svcount_t, + p0 = svpsel_c8 (p0, p15, w12 + 16), + p0 = svpsel_c8 (p0, p15, w12 + 16)) + +/* +** psel_p13_p13_p15_w12m1: +** sub (w[0-9]+), w12, #?1 +** psel p13, p13, p15\.b\[\1, 0\] +** ret +*/ +TEST_SELECT_P (psel_p13_p13_p15_w12m1, svcount_t, + p13 = svpsel_c8 (p13, p15, w12 - 1), + p13 = svpsel_c8 (p13, p15, w12 - 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..3e157bf8f3a688312be3697c6b6cc1220a1525d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c16.c @@ -0,0 +1,41 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** ptrue_pn0: +** ptrue pn([8-9]|1[0-5])\.h +** mov p0\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn0, + pn0 = svptrue_c16 (), + pn0 = svptrue_c16 ()) + +/* +** ptrue_pn7: +** ptrue pn([8-9]|1[0-5])\.h +** mov p7\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn7, + pn7 = svptrue_c16 (), + pn7 = svptrue_c16 ()) + +/* +** ptrue_pn8: +** ptrue pn8\.h +** ret +*/ +TEST_PN (ptrue_pn8, + pn8 = svptrue_c16 (), + pn8 = svptrue_c16 ()) + +/* +** ptrue_pn15: +** ptrue pn15\.h +** ret +*/ +TEST_PN (ptrue_pn15, + pn15 = svptrue_c16 (), + pn15 = svptrue_c16 ()) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..49ad1ad12a55e0ca84dc15faca472c66d2e9590d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c32.c @@ -0,0 +1,41 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** ptrue_pn0: +** ptrue pn([8-9]|1[0-5])\.s +** mov p0\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn0, + pn0 = svptrue_c32 (), + pn0 = svptrue_c32 ()) + +/* +** ptrue_pn7: +** ptrue pn([8-9]|1[0-5])\.s +** mov p7\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn7, + pn7 = svptrue_c32 (), + pn7 = svptrue_c32 ()) + +/* +** ptrue_pn8: +** ptrue pn8\.s +** ret +*/ +TEST_PN (ptrue_pn8, + pn8 = svptrue_c32 (), + pn8 = svptrue_c32 ()) + +/* +** ptrue_pn15: +** ptrue pn15\.s +** ret +*/ +TEST_PN (ptrue_pn15, + pn15 = svptrue_c32 (), + pn15 = svptrue_c32 ()) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..746b892ac62b8596b9acbcde36e7a2da3bcd2ce5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c64.c @@ -0,0 +1,41 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** ptrue_pn0: +** ptrue pn([8-9]|1[0-5])\.d +** mov p0\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn0, + pn0 = svptrue_c64 (), + pn0 = svptrue_c64 ()) + +/* +** ptrue_pn7: +** ptrue pn([8-9]|1[0-5])\.d +** mov p7\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn7, + pn7 = svptrue_c64 (), + pn7 = svptrue_c64 ()) + +/* +** ptrue_pn8: +** ptrue pn8\.d +** ret +*/ +TEST_PN (ptrue_pn8, + pn8 = svptrue_c64 (), + pn8 = svptrue_c64 ()) + +/* +** ptrue_pn15: +** ptrue pn15\.d +** ret +*/ +TEST_PN (ptrue_pn15, + pn15 = svptrue_c64 (), + pn15 = svptrue_c64 ()) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..60b4d7210b4e69629dc9f3ae2d0323646f94b20e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ptrue_c8.c @@ -0,0 +1,41 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** ptrue_pn0: +** ptrue pn([8-9]|1[0-5])\.b +** mov p0\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn0, + pn0 = svptrue_c8 (), + pn0 = svptrue_c8 ()) + +/* +** ptrue_pn7: +** ptrue pn([8-9]|1[0-5])\.b +** mov p7\.b, p\1\.b +** ret +*/ +TEST_PN (ptrue_pn7, + pn7 = svptrue_c8 (), + pn7 = svptrue_c8 ()) + +/* +** ptrue_pn8: +** ptrue pn8\.b +** ret +*/ +TEST_PN (ptrue_pn8, + pn8 = svptrue_c8 (), + pn8 = svptrue_c8 ()) + +/* +** ptrue_pn15: +** ptrue pn15\.b +** ret +*/ +TEST_PN (ptrue_pn15, + pn15 = svptrue_c8 (), + pn15 = svptrue_c8 ()) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..aa1b76fccd487370acbbbffdf042e3f672dfd775 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvt z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z0, svint32x2_t, svint16_t, + z0_res = svqcvt_s16_s32_x2 (z0), + z0_res = svqcvt_s16 (z0)) + +/* +** qcvt_z0_z6: +** sqcvt z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z6, svint32x2_t, svint16_t, + z0_res = svqcvt_s16_s32_x2 (z6), + z0_res = svqcvt_s16 (z6)) + +/* +** qcvt_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** sqcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z29, svint32x2_t, svint16_t, + z0_res = svqcvt_s16_s32_x2 (z29), + z0_res = svqcvt_s16 (z29)) + +/* +** qcvt_z5_z0: +** sqcvt z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z5_z0, svint32x2_t, svint16_t, + z5 = svqcvt_s16_s32_x2 (z0), + z5 = svqcvt_s16 (z0)) + +/* +** qcvt_z22_z16: +** sqcvt z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z22_z16, svint32x2_t, svint16_t, + z22 = svqcvt_s16_s32_x2 (z16), + z22 = svqcvt_s16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4d0d768de4b7e930cff02f9eea6d645449888ac8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s16_s64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvt z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svint64x4_t, svint16_t, + z0_res = svqcvt_s16_s64_x4 (z0), + z0_res = svqcvt_s16 (z0)) + +/* +** qcvt_z0_z4: +** sqcvt z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svint64x4_t, svint16_t, + z0_res = svqcvt_s16_s64_x4 (z4), + z0_res = svqcvt_s16 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svint64x4_t, svint16_t, + z0_res = svqcvt_s16_s64_x4 (z21), + z0_res = svqcvt_s16 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvt z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svint64x4_t, svint16_t, + z25 = svqcvt_s16_s64_x4 (z26), + z25 = svqcvt_s16 (z26)) + +/* +** qcvt_z25_z0: +** sqcvt z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svint64x4_t, svint16_t, + z25 = svqcvt_s16_s64_x4 (z0), + z25 = svqcvt_s16 (z0)) + +/* +** qcvt_z22_z16: +** sqcvt z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svint64x4_t, svint16_t, + z22_res = svqcvt_s16_s64_x4 (z16), + z22_res = svqcvt_s16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2b568be2c72c5c00b9e7f2c4bf7768c7046a1331 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_s8_s32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvt z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svint32x4_t, svint8_t, + z0_res = svqcvt_s8_s32_x4 (z0), + z0_res = svqcvt_s8 (z0)) + +/* +** qcvt_z0_z4: +** sqcvt z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svint32x4_t, svint8_t, + z0_res = svqcvt_s8_s32_x4 (z4), + z0_res = svqcvt_s8 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvt z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svint32x4_t, svint8_t, + z0_res = svqcvt_s8_s32_x4 (z21), + z0_res = svqcvt_s8 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvt z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svint32x4_t, svint8_t, + z25 = svqcvt_s8_s32_x4 (z26), + z25 = svqcvt_s8 (z26)) + +/* +** qcvt_z25_z0: +** sqcvt z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svint32x4_t, svint8_t, + z25 = svqcvt_s8_s32_x4 (z0), + z25 = svqcvt_s8 (z0)) + +/* +** qcvt_z22_z16: +** sqcvt z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svint32x4_t, svint8_t, + z22_res = svqcvt_s8_s32_x4 (z16), + z22_res = svqcvt_s8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e87e9e40d7933911a6e3634c058a97a56ec54dee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvtu z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z0, svint32x2_t, svuint16_t, + z0_res = svqcvt_u16_s32_x2 (z0), + z0_res = svqcvt_u16 (z0)) + +/* +** qcvt_z0_z6: +** sqcvtu z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z6, svint32x2_t, svuint16_t, + z0_res = svqcvt_u16_s32_x2 (z6), + z0_res = svqcvt_u16 (z6)) + +/* +** qcvt_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** sqcvtu z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z29, svint32x2_t, svuint16_t, + z0_res = svqcvt_u16_s32_x2 (z29), + z0_res = svqcvt_u16 (z29)) + +/* +** qcvt_z5_z0: +** sqcvtu z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z5_z0, svint32x2_t, svuint16_t, + z5 = svqcvt_u16_s32_x2 (z0), + z5 = svqcvt_u16 (z0)) + +/* +** qcvt_z22_z16: +** sqcvtu z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z22_z16, svint32x2_t, svuint16_t, + z22 = svqcvt_u16_s32_x2 (z16), + z22 = svqcvt_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..288f9667ff96581e6afee1d56dd50341f729dd19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_s64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvtu z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svint64x4_t, svuint16_t, + z0_res = svqcvt_u16_s64_x4 (z0), + z0_res = svqcvt_u16 (z0)) + +/* +** qcvt_z0_z4: +** sqcvtu z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svint64x4_t, svuint16_t, + z0_res = svqcvt_u16_s64_x4 (z4), + z0_res = svqcvt_u16 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtu z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svint64x4_t, svuint16_t, + z0_res = svqcvt_u16_s64_x4 (z21), + z0_res = svqcvt_u16 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtu z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svint64x4_t, svuint16_t, + z25 = svqcvt_u16_s64_x4 (z26), + z25 = svqcvt_u16 (z26)) + +/* +** qcvt_z25_z0: +** sqcvtu z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svint64x4_t, svuint16_t, + z25 = svqcvt_u16_s64_x4 (z0), + z25 = svqcvt_u16 (z0)) + +/* +** qcvt_z22_z16: +** sqcvtu z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svint64x4_t, svuint16_t, + z22_res = svqcvt_u16_s64_x4 (z16), + z22_res = svqcvt_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..77c2848cfcb2f4e46591d03f34090a42b8dab2e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** uqcvt z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z0, svuint32x2_t, svuint16_t, + z0_res = svqcvt_u16_u32_x2 (z0), + z0_res = svqcvt_u16 (z0)) + +/* +** qcvt_z0_z6: +** uqcvt z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z6, svuint32x2_t, svuint16_t, + z0_res = svqcvt_u16_u32_x2 (z6), + z0_res = svqcvt_u16 (z6)) + +/* +** qcvt_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** uqcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvt_z0_z29, svuint32x2_t, svuint16_t, + z0_res = svqcvt_u16_u32_x2 (z29), + z0_res = svqcvt_u16 (z29)) + +/* +** qcvt_z5_z0: +** uqcvt z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z5_z0, svuint32x2_t, svuint16_t, + z5 = svqcvt_u16_u32_x2 (z0), + z5 = svqcvt_u16 (z0)) + +/* +** qcvt_z22_z16: +** uqcvt z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvt_z22_z16, svuint32x2_t, svuint16_t, + z22 = svqcvt_u16_u32_x2 (z16), + z22 = svqcvt_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4ff7275b426348c180f8eeab76e1c2d0b23892cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u16_u64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** uqcvt z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svuint64x4_t, svuint16_t, + z0_res = svqcvt_u16_u64_x4 (z0), + z0_res = svqcvt_u16 (z0)) + +/* +** qcvt_z0_z4: +** uqcvt z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svuint64x4_t, svuint16_t, + z0_res = svqcvt_u16_u64_x4 (z4), + z0_res = svqcvt_u16 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvt z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svuint64x4_t, svuint16_t, + z0_res = svqcvt_u16_u64_x4 (z21), + z0_res = svqcvt_u16 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvt z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svuint64x4_t, svuint16_t, + z25 = svqcvt_u16_u64_x4 (z26), + z25 = svqcvt_u16 (z26)) + +/* +** qcvt_z25_z0: +** uqcvt z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svuint64x4_t, svuint16_t, + z25 = svqcvt_u16_u64_x4 (z0), + z25 = svqcvt_u16 (z0)) + +/* +** qcvt_z22_z16: +** uqcvt z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svuint64x4_t, svuint16_t, + z22_res = svqcvt_u16_u64_x4 (z16), + z22_res = svqcvt_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c9848ba1cc3785791969a447e92f8d935f07c277 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_s32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** sqcvtu z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svint32x4_t, svuint8_t, + z0_res = svqcvt_u8_s32_x4 (z0), + z0_res = svqcvt_u8 (z0)) + +/* +** qcvt_z0_z4: +** sqcvtu z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svint32x4_t, svuint8_t, + z0_res = svqcvt_u8_s32_x4 (z4), + z0_res = svqcvt_u8 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtu z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svint32x4_t, svuint8_t, + z0_res = svqcvt_u8_s32_x4 (z21), + z0_res = svqcvt_u8 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtu z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svint32x4_t, svuint8_t, + z25 = svqcvt_u8_s32_x4 (z26), + z25 = svqcvt_u8 (z26)) + +/* +** qcvt_z25_z0: +** sqcvtu z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svint32x4_t, svuint8_t, + z25 = svqcvt_u8_s32_x4 (z0), + z25 = svqcvt_u8 (z0)) + +/* +** qcvt_z22_z16: +** sqcvtu z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svint32x4_t, svuint8_t, + z22_res = svqcvt_u8_s32_x4 (z16), + z22_res = svqcvt_u8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8eb7fb60121f21a9df0710015514b50b9899a32f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvt_u8_u32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvt_z0_z0: +** uqcvt z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z0, svuint32x4_t, svuint8_t, + z0_res = svqcvt_u8_u32_x4 (z0), + z0_res = svqcvt_u8 (z0)) + +/* +** qcvt_z0_z4: +** uqcvt z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z4, svuint32x4_t, svuint8_t, + z0_res = svqcvt_u8_u32_x4 (z4), + z0_res = svqcvt_u8 (z4)) + +/* +** qcvt_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvt z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z0_z21, svuint32x4_t, svuint8_t, + z0_res = svqcvt_u8_u32_x4 (z21), + z0_res = svqcvt_u8 (z21)) + +/* +** qcvt_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvt z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z26, svuint32x4_t, svuint8_t, + z25 = svqcvt_u8_u32_x4 (z26), + z25 = svqcvt_u8 (z26)) + +/* +** qcvt_z25_z0: +** uqcvt z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z25_z0, svuint32x4_t, svuint8_t, + z25 = svqcvt_u8_u32_x4 (z0), + z25 = svqcvt_u8 (z0)) + +/* +** qcvt_z22_z16: +** uqcvt z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvt_z22_z16, svuint32x4_t, svuint8_t, + z22_res = svqcvt_u8_u32_x4 (z16), + z22_res = svqcvt_u8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5cee69fdf3e7c61e2f27639ff815361335a9ca24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtn z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svint16_t, + z0_res = svqcvtn_s16_s32_x2 (z0), + z0_res = svqcvtn_s16 (z0)) + +/* +** qcvtn_z0_z6: +** sqcvtn z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svint16_t, + z0_res = svqcvtn_s16_s32_x2 (z6), + z0_res = svqcvtn_s16 (z6)) + +/* +** qcvtn_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** sqcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svint16_t, + z0_res = svqcvtn_s16_s32_x2 (z29), + z0_res = svqcvtn_s16 (z29)) + +/* +** qcvtn_z5_z0: +** sqcvtn z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svint16_t, + z5 = svqcvtn_s16_s32_x2 (z0), + z5 = svqcvtn_s16 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtn z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svint16_t, + z22 = svqcvtn_s16_s32_x2 (z16), + z22 = svqcvtn_s16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fedb0a4d4e0fd478afbe100eaabfcffd45f51d9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s16_s64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtn z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svint64x4_t, svint16_t, + z0_res = svqcvtn_s16_s64_x4 (z0), + z0_res = svqcvtn_s16 (z0)) + +/* +** qcvtn_z0_z4: +** sqcvtn z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svint64x4_t, svint16_t, + z0_res = svqcvtn_s16_s64_x4 (z4), + z0_res = svqcvtn_s16 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svint64x4_t, svint16_t, + z0_res = svqcvtn_s16_s64_x4 (z21), + z0_res = svqcvtn_s16 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtn z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svint64x4_t, svint16_t, + z25 = svqcvtn_s16_s64_x4 (z26), + z25 = svqcvtn_s16 (z26)) + +/* +** qcvtn_z25_z0: +** sqcvtn z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svint64x4_t, svint16_t, + z25 = svqcvtn_s16_s64_x4 (z0), + z25 = svqcvtn_s16 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtn z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svint64x4_t, svint16_t, + z22_res = svqcvtn_s16_s64_x4 (z16), + z22_res = svqcvtn_s16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a03e06524b6514152839cc700979d5f67c912995 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_s8_s32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtn z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svint32x4_t, svint8_t, + z0_res = svqcvtn_s8_s32_x4 (z0), + z0_res = svqcvtn_s8 (z0)) + +/* +** qcvtn_z0_z4: +** sqcvtn z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svint32x4_t, svint8_t, + z0_res = svqcvtn_s8_s32_x4 (z4), + z0_res = svqcvtn_s8 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtn z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svint32x4_t, svint8_t, + z0_res = svqcvtn_s8_s32_x4 (z21), + z0_res = svqcvtn_s8 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtn z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svint32x4_t, svint8_t, + z25 = svqcvtn_s8_s32_x4 (z26), + z25 = svqcvtn_s8 (z26)) + +/* +** qcvtn_z25_z0: +** sqcvtn z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svint32x4_t, svint8_t, + z25 = svqcvtn_s8_s32_x4 (z0), + z25 = svqcvtn_s8 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtn z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svint32x4_t, svint8_t, + z22_res = svqcvtn_s8_s32_x4 (z16), + z22_res = svqcvtn_s8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3033ac37480006d82e0cccec2d3991e0e54f3323 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtun z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_s32_x2 (z0), + z0_res = svqcvtn_u16 (z0)) + +/* +** qcvtn_z0_z6: +** sqcvtun z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_s32_x2 (z6), + z0_res = svqcvtn_u16 (z6)) + +/* +** qcvtn_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** sqcvtun z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_s32_x2 (z29), + z0_res = svqcvtn_u16 (z29)) + +/* +** qcvtn_z5_z0: +** sqcvtun z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svuint16_t, + z5 = svqcvtn_u16_s32_x2 (z0), + z5 = svqcvtn_u16 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtun z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svuint16_t, + z22 = svqcvtn_u16_s32_x2 (z16), + z22 = svqcvtn_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..384c15441e68c6943b7e2394ebb65500d03c3a9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_s64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtun z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_s64_x4 (z0), + z0_res = svqcvtn_u16 (z0)) + +/* +** qcvtn_z0_z4: +** sqcvtun z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_s64_x4 (z4), + z0_res = svqcvtn_u16 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtun z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_s64_x4 (z21), + z0_res = svqcvtn_u16 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtun z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svint64x4_t, svuint16_t, + z25 = svqcvtn_u16_s64_x4 (z26), + z25 = svqcvtn_u16 (z26)) + +/* +** qcvtn_z25_z0: +** sqcvtun z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svint64x4_t, svuint16_t, + z25 = svqcvtn_u16_s64_x4 (z0), + z25 = svqcvtn_u16 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtun z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svint64x4_t, svuint16_t, + z22_res = svqcvtn_u16_s64_x4 (z16), + z22_res = svqcvtn_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8416aecdf08156c9cdca27c90202f12b6b2915b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** uqcvtn z0\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z0, svuint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_u32_x2 (z0), + z0_res = svqcvtn_u16 (z0)) + +/* +** qcvtn_z0_z6: +** uqcvtn z0\.h, {z6\.s - z7\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z6, svuint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_u32_x2 (z6), + z0_res = svqcvtn_u16 (z6)) + +/* +** qcvtn_z0_z29: +** mov [^\n]+ +** mov [^\n]+ +** uqcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X2_NARROW (qcvtn_z0_z29, svuint32x2_t, svuint16_t, + z0_res = svqcvtn_u16_u32_x2 (z29), + z0_res = svqcvtn_u16 (z29)) + +/* +** qcvtn_z5_z0: +** uqcvtn z5\.h, {z0\.s - z1\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z5_z0, svuint32x2_t, svuint16_t, + z5 = svqcvtn_u16_u32_x2 (z0), + z5 = svqcvtn_u16 (z0)) + +/* +** qcvtn_z22_z16: +** uqcvtn z22\.h, {z16\.s - z17\.s} +** ret +*/ +TEST_X2_NARROW (qcvtn_z22_z16, svuint32x2_t, svuint16_t, + z22 = svqcvtn_u16_u32_x2 (z16), + z22 = svqcvtn_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7de742bdaa7c530032445e037757a89c463ccf7f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u16_u64_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** uqcvtn z0\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svuint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_u64_x4 (z0), + z0_res = svqcvtn_u16 (z0)) + +/* +** qcvtn_z0_z4: +** uqcvtn z0\.h, {z4\.d - z7\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svuint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_u64_x4 (z4), + z0_res = svqcvtn_u16 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvtn z0\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svuint64x4_t, svuint16_t, + z0_res = svqcvtn_u16_u64_x4 (z21), + z0_res = svqcvtn_u16 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvtn z25\.h, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svuint64x4_t, svuint16_t, + z25 = svqcvtn_u16_u64_x4 (z26), + z25 = svqcvtn_u16 (z26)) + +/* +** qcvtn_z25_z0: +** uqcvtn z25\.h, {z0\.d - z3\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svuint64x4_t, svuint16_t, + z25 = svqcvtn_u16_u64_x4 (z0), + z25 = svqcvtn_u16 (z0)) + +/* +** qcvtn_z22_z16: +** uqcvtn z22\.h, {z16\.d - z19\.d} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svuint64x4_t, svuint16_t, + z22_res = svqcvtn_u16_u64_x4 (z16), + z22_res = svqcvtn_u16 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6228223a8ca6e96a83be9accbe9ae497a7d19277 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_s32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** sqcvtun z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_s32_x4 (z0), + z0_res = svqcvtn_u8 (z0)) + +/* +** qcvtn_z0_z4: +** sqcvtun z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_s32_x4 (z4), + z0_res = svqcvtn_u8 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtun z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_s32_x4 (z21), + z0_res = svqcvtn_u8 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqcvtun z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svint32x4_t, svuint8_t, + z25 = svqcvtn_u8_s32_x4 (z26), + z25 = svqcvtn_u8 (z26)) + +/* +** qcvtn_z25_z0: +** sqcvtun z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svint32x4_t, svuint8_t, + z25 = svqcvtn_u8_s32_x4 (z0), + z25 = svqcvtn_u8 (z0)) + +/* +** qcvtn_z22_z16: +** sqcvtun z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svint32x4_t, svuint8_t, + z22_res = svqcvtn_u8_s32_x4 (z16), + z22_res = svqcvtn_u8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9c721caf9a6211de19bed4220fab6e847ed105c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qcvtn_u8_u32_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qcvtn_z0_z0: +** uqcvtn z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z0, svuint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_u32_x4 (z0), + z0_res = svqcvtn_u8 (z0)) + +/* +** qcvtn_z0_z4: +** uqcvtn z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z4, svuint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_u32_x4 (z4), + z0_res = svqcvtn_u8 (z4)) + +/* +** qcvtn_z0_z21: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvtn z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z0_z21, svuint32x4_t, svuint8_t, + z0_res = svqcvtn_u8_u32_x4 (z21), + z0_res = svqcvtn_u8 (z21)) + +/* +** qcvtn_z25_z26: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqcvtn z25\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z26, svuint32x4_t, svuint8_t, + z25 = svqcvtn_u8_u32_x4 (z26), + z25 = svqcvtn_u8 (z26)) + +/* +** qcvtn_z25_z0: +** uqcvtn z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z25_z0, svuint32x4_t, svuint8_t, + z25 = svqcvtn_u8_u32_x4 (z0), + z25 = svqcvtn_u8 (z0)) + +/* +** qcvtn_z22_z16: +** uqcvtn z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (qcvtn_z22_z16, svuint32x4_t, svuint8_t, + z22_res = svqcvtn_u8_u32_x4 (z16), + z22_res = svqcvtn_u8 (z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f8585ca5bfb513753cd163e03515b1387c30640d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint16x2_t, z0, + svqdmulh_s16_x2 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint16x2_t, z0, + svqdmulh_s16_x2 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.h - z29\.h} +** | +** sqdmulh [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint16x2_t, z0, + svqdmulh_s16_x2 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** sqdmulh {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint16x2_t, z18, + svqdmulh_s16_x2 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z18, svint16x2_t, z23, + svqdmulh_s16_x2 (z23, z18), + svqdmulh (z23, z18)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint16x2_t, z28, + svqdmulh_s16_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint16x2_t, z0, + svqdmulh_s16_x2 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** sqdmulh {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint16x2_t, z4, + svqdmulh_s16_x2 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint16x2_t, svint16_t, z24, + svqdmulh_single_s16_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** sqdmulh {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint16x2_t, svint16_t, z24, + svqdmulh_single_s16_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint16x2_t, svint16_t, z24, + svqdmulh_single_s16_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint16x2_t, svint16_t, z1, + svqdmulh_single_s16_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint16x2_t, svint16_t, z1, + svqdmulh_single_s16_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** sqdmulh {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint16x2_t, svint16_t, z18, + svqdmulh_single_s16_x2 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint16x2_t, svint16_t, + z0_res = svqdmulh_single_s16_x2 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint16x2_t, svint16_t, + z0 = svqdmulh_single_s16_x2 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint16x2_t, svint16_t, z24, + svqdmulh_single_s16_x2 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f33ec05e29af77fb90ea980e99afea7e6c2f37f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint16x4_t, z0, + svqdmulh_s16_x4 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint16x4_t, z0, + svqdmulh_s16_x4 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.h - z31\.h} +** | +** sqdmulh [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint16x4_t, z0, + svqdmulh_s16_x4 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint16x4_t, z18, + svqdmulh_s16_x4 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z28, svint16x4_t, z23, + svqdmulh_s16_x4 (z23, z28), + svqdmulh (z23, z28)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint16x4_t, z28, + svqdmulh_s16_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint16x4_t, z0, + svqdmulh_s16_x4 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** sqdmulh {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint16x4_t, z4, + svqdmulh_s16_x4 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint16x4_t, svint16_t, z24, + svqdmulh_single_s16_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** sqdmulh {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint16x4_t, svint16_t, z24, + svqdmulh_single_s16_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint16x4_t, svint16_t, z24, + svqdmulh_single_s16_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint16x4_t, svint16_t, z1, + svqdmulh_single_s16_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint16x4_t, svint16_t, z1, + svqdmulh_single_s16_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint16x4_t, svint16_t, z18, + svqdmulh_single_s16_x4 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint16x4_t, svint16_t, + z0_res = svqdmulh_single_s16_x4 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint16x4_t, svint16_t, + z0 = svqdmulh_single_s16_x4 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint16x4_t, svint16_t, z24, + svqdmulh_single_s16_x4 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..973aa4db05525adeb3f7328ca7ddbc346d6dfca7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint32x2_t, z0, + svqdmulh_s32_x2 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint32x2_t, z0, + svqdmulh_s32_x2 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.s - z29\.s} +** | +** sqdmulh [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint32x2_t, z0, + svqdmulh_s32_x2 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** sqdmulh {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint32x2_t, z18, + svqdmulh_s32_x2 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z18, svint32x2_t, z23, + svqdmulh_s32_x2 (z23, z18), + svqdmulh (z23, z18)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint32x2_t, z28, + svqdmulh_s32_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint32x2_t, z0, + svqdmulh_s32_x2 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** sqdmulh {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint32x2_t, z4, + svqdmulh_s32_x2 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint32x2_t, svint32_t, z24, + svqdmulh_single_s32_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** sqdmulh {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint32x2_t, svint32_t, z24, + svqdmulh_single_s32_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint32x2_t, svint32_t, z24, + svqdmulh_single_s32_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint32x2_t, svint32_t, z1, + svqdmulh_single_s32_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint32x2_t, svint32_t, z1, + svqdmulh_single_s32_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** sqdmulh {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint32x2_t, svint32_t, z18, + svqdmulh_single_s32_x2 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint32x2_t, svint32_t, + z0_res = svqdmulh_single_s32_x2 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint32x2_t, svint32_t, + z0 = svqdmulh_single_s32_x2 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint32x2_t, svint32_t, z24, + svqdmulh_single_s32_x2 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a40b925881ef9c49cc7245b4fb610fb78c8c6385 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint32x4_t, z0, + svqdmulh_s32_x4 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint32x4_t, z0, + svqdmulh_s32_x4 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.s - z31\.s} +** | +** sqdmulh [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint32x4_t, z0, + svqdmulh_s32_x4 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint32x4_t, z18, + svqdmulh_s32_x4 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z28, svint32x4_t, z23, + svqdmulh_s32_x4 (z23, z28), + svqdmulh (z23, z28)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint32x4_t, z28, + svqdmulh_s32_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint32x4_t, z0, + svqdmulh_s32_x4 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** sqdmulh {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint32x4_t, z4, + svqdmulh_s32_x4 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint32x4_t, svint32_t, z24, + svqdmulh_single_s32_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** sqdmulh {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint32x4_t, svint32_t, z24, + svqdmulh_single_s32_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint32x4_t, svint32_t, z24, + svqdmulh_single_s32_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint32x4_t, svint32_t, z1, + svqdmulh_single_s32_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint32x4_t, svint32_t, z1, + svqdmulh_single_s32_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint32x4_t, svint32_t, z18, + svqdmulh_single_s32_x4 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint32x4_t, svint32_t, + z0_res = svqdmulh_single_s32_x4 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint32x4_t, svint32_t, + z0 = svqdmulh_single_s32_x4 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint32x4_t, svint32_t, z24, + svqdmulh_single_s32_x4 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..bb85007a80d27b1b6fa24690abd1a8a287d72216 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint64x2_t, z0, + svqdmulh_s64_x2 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint64x2_t, z0, + svqdmulh_s64_x2 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.d - z29\.d} +** | +** sqdmulh [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint64x2_t, z0, + svqdmulh_s64_x2 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** sqdmulh {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint64x2_t, z18, + svqdmulh_s64_x2 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z18, svint64x2_t, z23, + svqdmulh_s64_x2 (z23, z18), + svqdmulh (z23, z18)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint64x2_t, z28, + svqdmulh_s64_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint64x2_t, z0, + svqdmulh_s64_x2 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** sqdmulh {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint64x2_t, z4, + svqdmulh_s64_x2 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint64x2_t, svint64_t, z24, + svqdmulh_single_s64_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** sqdmulh {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint64x2_t, svint64_t, z24, + svqdmulh_single_s64_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint64x2_t, svint64_t, z24, + svqdmulh_single_s64_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint64x2_t, svint64_t, z1, + svqdmulh_single_s64_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint64x2_t, svint64_t, z1, + svqdmulh_single_s64_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** sqdmulh {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint64x2_t, svint64_t, z18, + svqdmulh_single_s64_x2 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint64x2_t, svint64_t, + z0_res = svqdmulh_single_s64_x2 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint64x2_t, svint64_t, + z0 = svqdmulh_single_s64_x2 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint64x2_t, svint64_t, z24, + svqdmulh_single_s64_x2 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..00dbc73cdd1cf6900f7f31f61804242fc08a2e38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint64x4_t, z0, + svqdmulh_s64_x4 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint64x4_t, z0, + svqdmulh_s64_x4 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.d - z31\.d} +** | +** sqdmulh [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint64x4_t, z0, + svqdmulh_s64_x4 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint64x4_t, z18, + svqdmulh_s64_x4 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z28, svint64x4_t, z23, + svqdmulh_s64_x4 (z23, z28), + svqdmulh (z23, z28)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint64x4_t, z28, + svqdmulh_s64_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint64x4_t, z0, + svqdmulh_s64_x4 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** sqdmulh {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint64x4_t, z4, + svqdmulh_s64_x4 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint64x4_t, svint64_t, z24, + svqdmulh_single_s64_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** sqdmulh {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint64x4_t, svint64_t, z24, + svqdmulh_single_s64_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint64x4_t, svint64_t, z24, + svqdmulh_single_s64_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint64x4_t, svint64_t, z1, + svqdmulh_single_s64_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint64x4_t, svint64_t, z1, + svqdmulh_single_s64_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint64x4_t, svint64_t, z18, + svqdmulh_single_s64_x4 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint64x4_t, svint64_t, + z0_res = svqdmulh_single_s64_x4 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint64x4_t, svint64_t, + z0 = svqdmulh_single_s64_x4 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint64x4_t, svint64_t, z24, + svqdmulh_single_s64_x4 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e033534e438d7db979146672595baa362f7089c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint8x2_t, z0, + svqdmulh_s8_x2 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint8x2_t, z0, + svqdmulh_s8_x2 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.b - z29\.b} +** | +** sqdmulh [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint8x2_t, z0, + svqdmulh_s8_x2 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** sqdmulh {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint8x2_t, z18, + svqdmulh_s8_x2 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z18, svint8x2_t, z23, + svqdmulh_s8_x2 (z23, z18), + svqdmulh (z23, z18)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint8x2_t, z28, + svqdmulh_s8_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint8x2_t, z0, + svqdmulh_s8_x2 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** sqdmulh {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint8x2_t, z4, + svqdmulh_s8_x2 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint8x2_t, svint8_t, z24, + svqdmulh_single_s8_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** sqdmulh {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint8x2_t, svint8_t, z24, + svqdmulh_single_s8_x2 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint8x2_t, svint8_t, z24, + svqdmulh_single_s8_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint8x2_t, svint8_t, z1, + svqdmulh_single_s8_x2 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint8x2_t, svint8_t, z1, + svqdmulh_single_s8_x2 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** sqdmulh {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint8x2_t, svint8_t, z18, + svqdmulh_single_s8_x2 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint8x2_t, svint8_t, + z0_res = svqdmulh_single_s8_x2 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint8x2_t, svint8_t, + z0 = svqdmulh_single_s8_x2 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint8x2_t, svint8_t, z24, + svqdmulh_single_s8_x2 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fcdae509a5a9ff92846baa95284d5b4b90d63187 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qdmulh_s8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qdmulh_z0_z0_z4: +** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (qdmulh_z0_z0_z4, svint8x4_t, z0, + svqdmulh_s8_x4 (z0, z4), + svqdmulh (z0, z4)) + +/* +** qdmulh_z0_z4_z0: +** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (qdmulh_z0_z4_z0, svint8x4_t, z0, + svqdmulh_s8_x4 (z4, z0), + svqdmulh (z4, z0)) + +/* +** qdmulh_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.b - z31\.b} +** | +** sqdmulh [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z4_z28, svint8x4_t, z0, + svqdmulh_s8_x4 (z4, z28), + svqdmulh (z4, z28)) + +/* +** qdmulh_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z18_z18_z4, svint8x4_t, z18, + svqdmulh_s8_x4 (z18, z4), + svqdmulh (z18, z4)) + +/* +** qdmulh_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (qdmulh_z23_z23_z28, svint8x4_t, z23, + svqdmulh_s8_x4 (z23, z28), + svqdmulh (z23, z28)) + +/* +** qdmulh_z28_z28_z0: +** sqdmulh {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (qdmulh_z28_z28_z0, svint8x4_t, z28, + svqdmulh_s8_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z0_z0_z18, svint8x4_t, z0, + svqdmulh_s8_x4 (z0, z18), + svqdmulh (z0, z18)) + +/* +** qdmulh_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** sqdmulh {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (qdmulh_z4_z4_z23, svint8x4_t, z4, + svqdmulh_s8_x4 (z4, z23), + svqdmulh (z4, z23)) + +/* +** qdmulh_single_z24_z24_z0: +** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z0, svint8x4_t, svint8_t, z24, + svqdmulh_single_s8_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** sqdmulh {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z28_z0, svint8x4_t, svint8_t, z24, + svqdmulh_single_s8_x4 (z28, z0), + svqdmulh (z28, z0)) + +/* +** qdmulh_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z1_z0, svint8x4_t, svint8_t, z24, + svqdmulh_single_s8_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z1_z24_z0: +** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z24_z0, svint8x4_t, svint8_t, z1, + svqdmulh_single_s8_x4 (z24, z0), + svqdmulh (z24, z0)) + +/* +** qdmulh_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z1_z1_z0, svint8x4_t, svint8_t, z1, + svqdmulh_single_s8_x4 (z1, z0), + svqdmulh (z1, z0)) + +/* +** qdmulh_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqdmulh [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z18_z18_z0, svint8x4_t, svint8_t, z18, + svqdmulh_single_s8_x4 (z18, z0), + svqdmulh (z18, z0)) + +/* +** qdmulh_single_awkward: +** ... +** sqdmulh ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (qdmulh_single_awkward, svint8x4_t, svint8_t, + z0_res = svqdmulh_single_s8_x4 (z1, z0), + z0_res = svqdmulh (z1, z0)) + +/* +** qdmulh_single_z0_z0_z15: +** ... +** sqdmulh {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (qdmulh_single_z0_z0_z15, svint8x4_t, svint8_t, + z0 = svqdmulh_single_s8_x4 (z0, z15), + z0 = svqdmulh (z0, z15)) + +/* +** qdmulh_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** sqdmulh {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (qdmulh_single_z24_z24_z16, svint8x4_t, svint8_t, z24, + svqdmulh_single_s8_x4 (z24, z16), + svqdmulh (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..00377b7ad6062c06ba1229c90d822ea361be3503 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** sqrshr z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z0_1, svint32x2_t, svint16_t, + z0_res = svqrshr_n_s16_s32_x2 (z0, 1), + z0_res = svqrshr_s16 (z0, 1)) + +/* +** qrshr_z0_z6_16: +** sqrshr z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z6_16, svint32x2_t, svint16_t, + z0_res = svqrshr_n_s16_s32_x2 (z6, 16), + z0_res = svqrshr_s16 (z6, 16)) + +/* +** qrshr_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** sqrshr z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z29_13, svint32x2_t, svint16_t, + z0_res = svqrshr_n_s16_s32_x2 (z29, 13), + z0_res = svqrshr_s16 (z29, 13)) + +/* +** qrshr_z5_z0_11: +** sqrshr z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshr_z5_z0_11, svint32x2_t, svint16_t, + z5 = svqrshr_n_s16_s32_x2 (z0, 11), + z5 = svqrshr_s16 (z0, 11)) + +/* +** qrshr_z22_z16_15: +** sqrshr z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshr_z22_z16_15, svint32x2_t, svint16_t, + z22 = svqrshr_n_s16_s32_x2 (z16, 15), + z22 = svqrshr_s16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..42b083cd4bbd405515191f8c609c6459e0eaa488 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** sqrshr z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z0_1, svint64x4_t, svint16_t, + z0_res = svqrshr_n_s16_s64_x4 (z0, 1), + z0_res = svqrshr_s16 (z0, 1)) + +/* +** qrshr_z0_z4_64: +** sqrshr z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z4_64, svint64x4_t, svint16_t, + z0_res = svqrshr_n_s16_s64_x4 (z4, 64), + z0_res = svqrshr_s16 (z4, 64)) + +/* +** qrshr_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshr z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z21_33, svint64x4_t, svint16_t, + z0_res = svqrshr_n_s16_s64_x4 (z21, 33), + z0_res = svqrshr_s16 (z21, 33)) + +/* +** qrshr_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshr z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z26_12, svint64x4_t, svint16_t, + z25 = svqrshr_n_s16_s64_x4 (z26, 12), + z25 = svqrshr_s16 (z26, 12)) + +/* +** qrshr_z25_z0_32: +** sqrshr z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z0_32, svint64x4_t, svint16_t, + z25 = svqrshr_n_s16_s64_x4 (z0, 32), + z25 = svqrshr_s16 (z0, 32)) + +/* +** qrshr_z22_z16_63: +** sqrshr z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshr_z22_z16_63, svint64x4_t, svint16_t, + z22_res = svqrshr_n_s16_s64_x4 (z16, 63), + z22_res = svqrshr_s16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f01ace7d104aed705171c6891047f5a523e0753c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_s8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** sqrshr z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z0_1, svint32x4_t, svint8_t, + z0_res = svqrshr_n_s8_s32_x4 (z0, 1), + z0_res = svqrshr_s8 (z0, 1)) + +/* +** qrshr_z0_z4_32: +** sqrshr z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z4_32, svint32x4_t, svint8_t, + z0_res = svqrshr_n_s8_s32_x4 (z4, 32), + z0_res = svqrshr_s8 (z4, 32)) + +/* +** qrshr_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshr z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z21_2, svint32x4_t, svint8_t, + z0_res = svqrshr_n_s8_s32_x4 (z21, 2), + z0_res = svqrshr_s8 (z21, 2)) + +/* +** qrshr_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshr z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z26_12, svint32x4_t, svint8_t, + z25 = svqrshr_n_s8_s32_x4 (z26, 12), + z25 = svqrshr_s8 (z26, 12)) + +/* +** qrshr_z25_z0_16: +** sqrshr z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z0_16, svint32x4_t, svint8_t, + z25 = svqrshr_n_s8_s32_x4 (z0, 16), + z25 = svqrshr_s8 (z0, 16)) + +/* +** qrshr_z22_z16_31: +** sqrshr z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshr_z22_z16_31, svint32x4_t, svint8_t, + z22_res = svqrshr_n_s8_s32_x4 (z16, 31), + z22_res = svqrshr_s8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..aa035f7fa1ce4789069dcef03204586b9610ed54 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** uqrshr z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z0_1, svuint32x2_t, svuint16_t, + z0_res = svqrshr_n_u16_u32_x2 (z0, 1), + z0_res = svqrshr_u16 (z0, 1)) + +/* +** qrshr_z0_z6_16: +** uqrshr z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z6_16, svuint32x2_t, svuint16_t, + z0_res = svqrshr_n_u16_u32_x2 (z6, 16), + z0_res = svqrshr_u16 (z6, 16)) + +/* +** qrshr_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** uqrshr z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshr_z0_z29_13, svuint32x2_t, svuint16_t, + z0_res = svqrshr_n_u16_u32_x2 (z29, 13), + z0_res = svqrshr_u16 (z29, 13)) + +/* +** qrshr_z5_z0_11: +** uqrshr z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshr_z5_z0_11, svuint32x2_t, svuint16_t, + z5 = svqrshr_n_u16_u32_x2 (z0, 11), + z5 = svqrshr_u16 (z0, 11)) + +/* +** qrshr_z22_z16_15: +** uqrshr z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshr_z22_z16_15, svuint32x2_t, svuint16_t, + z22 = svqrshr_n_u16_u32_x2 (z16, 15), + z22 = svqrshr_u16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0065a416db2c072702e5149efab228a08275d93f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** uqrshr z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z0_1, svuint64x4_t, svuint16_t, + z0_res = svqrshr_n_u16_u64_x4 (z0, 1), + z0_res = svqrshr_u16 (z0, 1)) + +/* +** qrshr_z0_z4_64: +** uqrshr z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z4_64, svuint64x4_t, svuint16_t, + z0_res = svqrshr_n_u16_u64_x4 (z4, 64), + z0_res = svqrshr_u16 (z4, 64)) + +/* +** qrshr_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshr z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z21_33, svuint64x4_t, svuint16_t, + z0_res = svqrshr_n_u16_u64_x4 (z21, 33), + z0_res = svqrshr_u16 (z21, 33)) + +/* +** qrshr_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshr z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z26_12, svuint64x4_t, svuint16_t, + z25 = svqrshr_n_u16_u64_x4 (z26, 12), + z25 = svqrshr_u16 (z26, 12)) + +/* +** qrshr_z25_z0_32: +** uqrshr z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z0_32, svuint64x4_t, svuint16_t, + z25 = svqrshr_n_u16_u64_x4 (z0, 32), + z25 = svqrshr_u16 (z0, 32)) + +/* +** qrshr_z22_z16_63: +** uqrshr z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshr_z22_z16_63, svuint64x4_t, svuint16_t, + z22_res = svqrshr_n_u16_u64_x4 (z16, 63), + z22_res = svqrshr_u16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6b73b3abc734cf4d6f551bdbcfd8a272c0e293fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshr_u8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshr_z0_z0_1: +** uqrshr z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z0_1, svuint32x4_t, svuint8_t, + z0_res = svqrshr_n_u8_u32_x4 (z0, 1), + z0_res = svqrshr_u8 (z0, 1)) + +/* +** qrshr_z0_z4_32: +** uqrshr z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z4_32, svuint32x4_t, svuint8_t, + z0_res = svqrshr_n_u8_u32_x4 (z4, 32), + z0_res = svqrshr_u8 (z4, 32)) + +/* +** qrshr_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshr z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshr_z0_z21_2, svuint32x4_t, svuint8_t, + z0_res = svqrshr_n_u8_u32_x4 (z21, 2), + z0_res = svqrshr_u8 (z21, 2)) + +/* +** qrshr_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshr z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z26_12, svuint32x4_t, svuint8_t, + z25 = svqrshr_n_u8_u32_x4 (z26, 12), + z25 = svqrshr_u8 (z26, 12)) + +/* +** qrshr_z25_z0_16: +** uqrshr z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshr_z25_z0_16, svuint32x4_t, svuint8_t, + z25 = svqrshr_n_u8_u32_x4 (z0, 16), + z25 = svqrshr_u8 (z0, 16)) + +/* +** qrshr_z22_z16_31: +** uqrshr z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshr_z22_z16_31, svuint32x4_t, svuint8_t, + z22_res = svqrshr_n_u8_u32_x4 (z16, 31), + z22_res = svqrshr_u8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..208b7e46e8af002079dd663d4d2500e1da113cdd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** sqrshrn z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z0_1, svint32x2_t, svint16_t, + z0_res = svqrshrn_n_s16_s32_x2 (z0, 1), + z0_res = svqrshrn_s16 (z0, 1)) + +/* +** qrshrn_z0_z6_16: +** sqrshrn z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z6_16, svint32x2_t, svint16_t, + z0_res = svqrshrn_n_s16_s32_x2 (z6, 16), + z0_res = svqrshrn_s16 (z6, 16)) + +/* +** qrshrn_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** sqrshrn z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z29_13, svint32x2_t, svint16_t, + z0_res = svqrshrn_n_s16_s32_x2 (z29, 13), + z0_res = svqrshrn_s16 (z29, 13)) + +/* +** qrshrn_z5_z0_11: +** sqrshrn z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshrn_z5_z0_11, svint32x2_t, svint16_t, + z5 = svqrshrn_n_s16_s32_x2 (z0, 11), + z5 = svqrshrn_s16 (z0, 11)) + +/* +** qrshrn_z22_z16_15: +** sqrshrn z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshrn_z22_z16_15, svint32x2_t, svint16_t, + z22 = svqrshrn_n_s16_s32_x2 (z16, 15), + z22 = svqrshrn_s16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c3f84ce9e4d39ea3680359b2af98fe96dff0272f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** sqrshrn z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z0_1, svint64x4_t, svint16_t, + z0_res = svqrshrn_n_s16_s64_x4 (z0, 1), + z0_res = svqrshrn_s16 (z0, 1)) + +/* +** qrshrn_z0_z4_64: +** sqrshrn z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z4_64, svint64x4_t, svint16_t, + z0_res = svqrshrn_n_s16_s64_x4 (z4, 64), + z0_res = svqrshrn_s16 (z4, 64)) + +/* +** qrshrn_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrn z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z21_33, svint64x4_t, svint16_t, + z0_res = svqrshrn_n_s16_s64_x4 (z21, 33), + z0_res = svqrshrn_s16 (z21, 33)) + +/* +** qrshrn_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrn z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z26_12, svint64x4_t, svint16_t, + z25 = svqrshrn_n_s16_s64_x4 (z26, 12), + z25 = svqrshrn_s16 (z26, 12)) + +/* +** qrshrn_z25_z0_32: +** sqrshrn z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z0_32, svint64x4_t, svint16_t, + z25 = svqrshrn_n_s16_s64_x4 (z0, 32), + z25 = svqrshrn_s16 (z0, 32)) + +/* +** qrshrn_z22_z16_63: +** sqrshrn z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshrn_z22_z16_63, svint64x4_t, svint16_t, + z22_res = svqrshrn_n_s16_s64_x4 (z16, 63), + z22_res = svqrshrn_s16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c2fde5258de2c8afacacf7a5c9ce017f0128147a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_s8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** sqrshrn z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z0_1, svint32x4_t, svint8_t, + z0_res = svqrshrn_n_s8_s32_x4 (z0, 1), + z0_res = svqrshrn_s8 (z0, 1)) + +/* +** qrshrn_z0_z4_32: +** sqrshrn z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z4_32, svint32x4_t, svint8_t, + z0_res = svqrshrn_n_s8_s32_x4 (z4, 32), + z0_res = svqrshrn_s8 (z4, 32)) + +/* +** qrshrn_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrn z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z21_2, svint32x4_t, svint8_t, + z0_res = svqrshrn_n_s8_s32_x4 (z21, 2), + z0_res = svqrshrn_s8 (z21, 2)) + +/* +** qrshrn_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrn z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z26_12, svint32x4_t, svint8_t, + z25 = svqrshrn_n_s8_s32_x4 (z26, 12), + z25 = svqrshrn_s8 (z26, 12)) + +/* +** qrshrn_z25_z0_16: +** sqrshrn z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z0_16, svint32x4_t, svint8_t, + z25 = svqrshrn_n_s8_s32_x4 (z0, 16), + z25 = svqrshrn_s8 (z0, 16)) + +/* +** qrshrn_z22_z16_31: +** sqrshrn z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshrn_z22_z16_31, svint32x4_t, svint8_t, + z22_res = svqrshrn_n_s8_s32_x4 (z16, 31), + z22_res = svqrshrn_s8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..66029c6dd6605b4bae0f73e5baf51ee4a041aa11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** uqrshrn z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z0_1, svuint32x2_t, svuint16_t, + z0_res = svqrshrn_n_u16_u32_x2 (z0, 1), + z0_res = svqrshrn_u16 (z0, 1)) + +/* +** qrshrn_z0_z6_16: +** uqrshrn z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z6_16, svuint32x2_t, svuint16_t, + z0_res = svqrshrn_n_u16_u32_x2 (z6, 16), + z0_res = svqrshrn_u16 (z6, 16)) + +/* +** qrshrn_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** uqrshrn z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshrn_z0_z29_13, svuint32x2_t, svuint16_t, + z0_res = svqrshrn_n_u16_u32_x2 (z29, 13), + z0_res = svqrshrn_u16 (z29, 13)) + +/* +** qrshrn_z5_z0_11: +** uqrshrn z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshrn_z5_z0_11, svuint32x2_t, svuint16_t, + z5 = svqrshrn_n_u16_u32_x2 (z0, 11), + z5 = svqrshrn_u16 (z0, 11)) + +/* +** qrshrn_z22_z16_15: +** uqrshrn z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshrn_z22_z16_15, svuint32x2_t, svuint16_t, + z22 = svqrshrn_n_u16_u32_x2 (z16, 15), + z22 = svqrshrn_u16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6680a9053cfdc57ef893598d094421f30dd3785f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** uqrshrn z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z0_1, svuint64x4_t, svuint16_t, + z0_res = svqrshrn_n_u16_u64_x4 (z0, 1), + z0_res = svqrshrn_u16 (z0, 1)) + +/* +** qrshrn_z0_z4_64: +** uqrshrn z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z4_64, svuint64x4_t, svuint16_t, + z0_res = svqrshrn_n_u16_u64_x4 (z4, 64), + z0_res = svqrshrn_u16 (z4, 64)) + +/* +** qrshrn_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshrn z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z21_33, svuint64x4_t, svuint16_t, + z0_res = svqrshrn_n_u16_u64_x4 (z21, 33), + z0_res = svqrshrn_u16 (z21, 33)) + +/* +** qrshrn_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshrn z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z26_12, svuint64x4_t, svuint16_t, + z25 = svqrshrn_n_u16_u64_x4 (z26, 12), + z25 = svqrshrn_u16 (z26, 12)) + +/* +** qrshrn_z25_z0_32: +** uqrshrn z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z0_32, svuint64x4_t, svuint16_t, + z25 = svqrshrn_n_u16_u64_x4 (z0, 32), + z25 = svqrshrn_u16 (z0, 32)) + +/* +** qrshrn_z22_z16_63: +** uqrshrn z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshrn_z22_z16_63, svuint64x4_t, svuint16_t, + z22_res = svqrshrn_n_u16_u64_x4 (z16, 63), + z22_res = svqrshrn_u16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7755f95f12a889bfb85345e1d7d81aa2f3b3d4c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrn_u8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrn_z0_z0_1: +** uqrshrn z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z0_1, svuint32x4_t, svuint8_t, + z0_res = svqrshrn_n_u8_u32_x4 (z0, 1), + z0_res = svqrshrn_u8 (z0, 1)) + +/* +** qrshrn_z0_z4_32: +** uqrshrn z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z4_32, svuint32x4_t, svuint8_t, + z0_res = svqrshrn_n_u8_u32_x4 (z4, 32), + z0_res = svqrshrn_u8 (z4, 32)) + +/* +** qrshrn_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshrn z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshrn_z0_z21_2, svuint32x4_t, svuint8_t, + z0_res = svqrshrn_n_u8_u32_x4 (z21, 2), + z0_res = svqrshrn_u8 (z21, 2)) + +/* +** qrshrn_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uqrshrn z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z26_12, svuint32x4_t, svuint8_t, + z25 = svqrshrn_n_u8_u32_x4 (z26, 12), + z25 = svqrshrn_u8 (z26, 12)) + +/* +** qrshrn_z25_z0_16: +** uqrshrn z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshrn_z25_z0_16, svuint32x4_t, svuint8_t, + z25 = svqrshrn_n_u8_u32_x4 (z0, 16), + z25 = svqrshrn_u8 (z0, 16)) + +/* +** qrshrn_z22_z16_31: +** uqrshrn z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshrn_z22_z16_31, svuint32x4_t, svuint8_t, + z22_res = svqrshrn_n_u8_u32_x4 (z16, 31), + z22_res = svqrshrn_u8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..872ce7a82ec53d49d953784cecdd3c59dd4f7f86 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshru_z0_z0_1: +** sqrshru z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshru_z0_z0_1, svint32x2_t, svuint16_t, + z0_res = svqrshru_n_u16_s32_x2 (z0, 1), + z0_res = svqrshru_u16 (z0, 1)) + +/* +** qrshru_z0_z6_16: +** sqrshru z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshru_z0_z6_16, svint32x2_t, svuint16_t, + z0_res = svqrshru_n_u16_s32_x2 (z6, 16), + z0_res = svqrshru_u16 (z6, 16)) + +/* +** qrshru_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** sqrshru z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshru_z0_z29_13, svint32x2_t, svuint16_t, + z0_res = svqrshru_n_u16_s32_x2 (z29, 13), + z0_res = svqrshru_u16 (z29, 13)) + +/* +** qrshru_z5_z0_11: +** sqrshru z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshru_z5_z0_11, svint32x2_t, svuint16_t, + z5 = svqrshru_n_u16_s32_x2 (z0, 11), + z5 = svqrshru_u16 (z0, 11)) + +/* +** qrshru_z22_z16_15: +** sqrshru z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshru_z22_z16_15, svint32x2_t, svuint16_t, + z22 = svqrshru_n_u16_s32_x2 (z16, 15), + z22 = svqrshru_u16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..dc830b2578fdbfbe5091e8f75ec4491021eeded0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshru_z0_z0_1: +** sqrshru z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z0_1, svint64x4_t, svuint16_t, + z0_res = svqrshru_n_u16_s64_x4 (z0, 1), + z0_res = svqrshru_u16 (z0, 1)) + +/* +** qrshru_z0_z4_64: +** sqrshru z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z4_64, svint64x4_t, svuint16_t, + z0_res = svqrshru_n_u16_s64_x4 (z4, 64), + z0_res = svqrshru_u16 (z4, 64)) + +/* +** qrshru_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshru z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z21_33, svint64x4_t, svuint16_t, + z0_res = svqrshru_n_u16_s64_x4 (z21, 33), + z0_res = svqrshru_u16 (z21, 33)) + +/* +** qrshru_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshru z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshru_z25_z26_12, svint64x4_t, svuint16_t, + z25 = svqrshru_n_u16_s64_x4 (z26, 12), + z25 = svqrshru_u16 (z26, 12)) + +/* +** qrshru_z25_z0_32: +** sqrshru z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshru_z25_z0_32, svint64x4_t, svuint16_t, + z25 = svqrshru_n_u16_s64_x4 (z0, 32), + z25 = svqrshru_u16 (z0, 32)) + +/* +** qrshru_z22_z16_63: +** sqrshru z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshru_z22_z16_63, svint64x4_t, svuint16_t, + z22_res = svqrshru_n_u16_s64_x4 (z16, 63), + z22_res = svqrshru_u16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3adaa189389c4b7056c3228a6fbeb92b826f11e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshru_u8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshru_z0_z0_1: +** sqrshru z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z0_1, svint32x4_t, svuint8_t, + z0_res = svqrshru_n_u8_s32_x4 (z0, 1), + z0_res = svqrshru_u8 (z0, 1)) + +/* +** qrshru_z0_z4_32: +** sqrshru z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z4_32, svint32x4_t, svuint8_t, + z0_res = svqrshru_n_u8_s32_x4 (z4, 32), + z0_res = svqrshru_u8 (z4, 32)) + +/* +** qrshru_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshru z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshru_z0_z21_2, svint32x4_t, svuint8_t, + z0_res = svqrshru_n_u8_s32_x4 (z21, 2), + z0_res = svqrshru_u8 (z21, 2)) + +/* +** qrshru_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshru z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshru_z25_z26_12, svint32x4_t, svuint8_t, + z25 = svqrshru_n_u8_s32_x4 (z26, 12), + z25 = svqrshru_u8 (z26, 12)) + +/* +** qrshru_z25_z0_16: +** sqrshru z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshru_z25_z0_16, svint32x4_t, svuint8_t, + z25 = svqrshru_n_u8_s32_x4 (z0, 16), + z25 = svqrshru_u8 (z0, 16)) + +/* +** qrshru_z22_z16_31: +** sqrshru z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshru_z22_z16_31, svint32x4_t, svuint8_t, + z22_res = svqrshru_n_u8_s32_x4 (z16, 31), + z22_res = svqrshru_u8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a6dfa236e3b1a9f90c9ecf5071fa9ddee0240c2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrun_z0_z0_1: +** sqrshrun z0\.h, {z0\.s - z1\.s}, #1 +** ret +*/ +TEST_X2_NARROW (qrshrun_z0_z0_1, svint32x2_t, svuint16_t, + z0_res = svqrshrun_n_u16_s32_x2 (z0, 1), + z0_res = svqrshrun_u16 (z0, 1)) + +/* +** qrshrun_z0_z6_16: +** sqrshrun z0\.h, {z6\.s - z7\.s}, #16 +** ret +*/ +TEST_X2_NARROW (qrshrun_z0_z6_16, svint32x2_t, svuint16_t, + z0_res = svqrshrun_n_u16_s32_x2 (z6, 16), + z0_res = svqrshrun_u16 (z6, 16)) + +/* +** qrshrun_z0_z29_13: +** mov [^\n]+ +** mov [^\n]+ +** sqrshrun z0\.h, [^\n]+, #13 +** ret +*/ +TEST_X2_NARROW (qrshrun_z0_z29_13, svint32x2_t, svuint16_t, + z0_res = svqrshrun_n_u16_s32_x2 (z29, 13), + z0_res = svqrshrun_u16 (z29, 13)) + +/* +** qrshrun_z5_z0_11: +** sqrshrun z5\.h, {z0\.s - z1\.s}, #11 +** ret +*/ +TEST_X2_NARROW (qrshrun_z5_z0_11, svint32x2_t, svuint16_t, + z5 = svqrshrun_n_u16_s32_x2 (z0, 11), + z5 = svqrshrun_u16 (z0, 11)) + +/* +** qrshrun_z22_z16_15: +** sqrshrun z22\.h, {z16\.s - z17\.s}, #15 +** ret +*/ +TEST_X2_NARROW (qrshrun_z22_z16_15, svint32x2_t, svuint16_t, + z22 = svqrshrun_n_u16_s32_x2 (z16, 15), + z22 = svqrshrun_u16 (z16, 15)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4226f7fe024715bd0c743ede29d7ae4b51b37719 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u16_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrun_z0_z0_1: +** sqrshrun z0\.h, {z0\.d - z3\.d}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z0_1, svint64x4_t, svuint16_t, + z0_res = svqrshrun_n_u16_s64_x4 (z0, 1), + z0_res = svqrshrun_u16 (z0, 1)) + +/* +** qrshrun_z0_z4_64: +** sqrshrun z0\.h, {z4\.d - z7\.d}, #64 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z4_64, svint64x4_t, svuint16_t, + z0_res = svqrshrun_n_u16_s64_x4 (z4, 64), + z0_res = svqrshrun_u16 (z4, 64)) + +/* +** qrshrun_z0_z21_33: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrun z0\.h, [^\n]+, #33 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z21_33, svint64x4_t, svuint16_t, + z0_res = svqrshrun_n_u16_s64_x4 (z21, 33), + z0_res = svqrshrun_u16 (z21, 33)) + +/* +** qrshrun_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrun z25\.h, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrun_z25_z26_12, svint64x4_t, svuint16_t, + z25 = svqrshrun_n_u16_s64_x4 (z26, 12), + z25 = svqrshrun_u16 (z26, 12)) + +/* +** qrshrun_z25_z0_32: +** sqrshrun z25\.h, {z0\.d - z3\.d}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrun_z25_z0_32, svint64x4_t, svuint16_t, + z25 = svqrshrun_n_u16_s64_x4 (z0, 32), + z25 = svqrshrun_u16 (z0, 32)) + +/* +** qrshrun_z22_z16_63: +** sqrshrun z22\.h, {z16\.d - z19\.d}, #63 +** ret +*/ +TEST_X4_NARROW (qrshrun_z22_z16_63, svint64x4_t, svuint16_t, + z22_res = svqrshrun_n_u16_s64_x4 (z16, 63), + z22_res = svqrshrun_u16 (z16, 63)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6a752014d23668f2195f2e30e9090cf6b2f11161 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/qrshrun_u8_x4.c @@ -0,0 +1,65 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** qrshrun_z0_z0_1: +** sqrshrun z0\.b, {z0\.s - z3\.s}, #1 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z0_1, svint32x4_t, svuint8_t, + z0_res = svqrshrun_n_u8_s32_x4 (z0, 1), + z0_res = svqrshrun_u8 (z0, 1)) + +/* +** qrshrun_z0_z4_32: +** sqrshrun z0\.b, {z4\.s - z7\.s}, #32 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z4_32, svint32x4_t, svuint8_t, + z0_res = svqrshrun_n_u8_s32_x4 (z4, 32), + z0_res = svqrshrun_u8 (z4, 32)) + +/* +** qrshrun_z0_z21_2: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrun z0\.b, [^\n]+, #2 +** ret +*/ +TEST_X4_NARROW (qrshrun_z0_z21_2, svint32x4_t, svuint8_t, + z0_res = svqrshrun_n_u8_s32_x4 (z21, 2), + z0_res = svqrshrun_u8 (z21, 2)) + +/* +** qrshrun_z25_z26_12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sqrshrun z25\.b, [^\n]+, #12 +** ret +*/ +TEST_X4_NARROW (qrshrun_z25_z26_12, svint32x4_t, svuint8_t, + z25 = svqrshrun_n_u8_s32_x4 (z26, 12), + z25 = svqrshrun_u8 (z26, 12)) + +/* +** qrshrun_z25_z0_16: +** sqrshrun z25\.b, {z0\.s - z3\.s}, #16 +** ret +*/ +TEST_X4_NARROW (qrshrun_z25_z0_16, svint32x4_t, svuint8_t, + z25 = svqrshrun_n_u8_s32_x4 (z0, 16), + z25 = svqrshrun_u8 (z0, 16)) + +/* +** qrshrun_z22_z16_31: +** sqrshrun z22\.b, {z16\.s - z19\.s}, #31 +** ret +*/ +TEST_X4_NARROW (qrshrun_z22_z16_31, svint32x4_t, svuint8_t, + z22_res = svqrshrun_n_u8_s32_x4 (z16, 31), + z22_res = svqrshrun_u8 (z16, 31)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..a24b8486549c7ff11ea21ad5355fddc12a952c16 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.h - z1\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x2_t, + z0 = svread_hor_za16_s16_vg2 (0, 0), + z0 = svread_hor_za16_s16_vg2 (0, 0)) + +/* +** read_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.h - z5\.h}, za1h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x2_t, + z4 = svread_hor_za16_u16_vg2 (1, 1), + z4 = svread_hor_za16_u16_vg2 (1, 1)) + +/* +** read_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.h - z29\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x2_t, + z28 = svread_hor_za16_f16_vg2 (0, w11), + z28 = svread_hor_za16_f16_vg2 (0, w11)) + +/* +** read_za16_bf16_z0_1_w12: +** mova {z0\.h - z1\.h}, za1h\.h\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z0_1_w12, svbfloat16x2_t, + z0 = svread_hor_za16_bf16_vg2 (1, w12), + z0 = svread_hor_za16_bf16_vg2 (1, w12)) + +/* +** read_za16_u16_z18_0_w15: +** mova {z18\.h - z19\.h}, za0h\.h\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x2_t, + z18 = svread_hor_za16_u16_vg2 (0, w15), + z18 = svread_hor_za16_u16_vg2 (0, w15)) + +/* +** read_za16_s16_z23_1_w12p6: +** mova {[^\n]+}, za1h\.h\[w12, 6:7\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z23_1_w12p6, svint16x2_t, + z23 = svread_hor_za16_s16_vg2 (1, w12 + 6), + z23 = svread_hor_za16_s16_vg2 (1, w12 + 6)) + +/* +** read_za16_f16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.h - z5\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z4_0_w12p1, svfloat16x2_t, + z4 = svread_hor_za16_f16_vg2 (0, w12 + 1), + z4 = svread_hor_za16_f16_vg2 (0, w12 + 1)) + +/* +** read_za16_s16_z28_1_w12p2: +** mova {z28\.h - z29\.h}, za1h\.h\[w12, 2:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x2_t, + z28 = svread_hor_za16_s16_vg2 (1, w12 + 2), + z28 = svread_hor_za16_s16_vg2 (1, w12 + 2)) + +/* +** read_za16_u16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.h - z1\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z0_0_w15p3, svuint16x2_t, + z0 = svread_hor_za16_u16_vg2 (0, w15 + 3), + z0 = svread_hor_za16_u16_vg2 (0, w15 + 3)) + +/* +** read_za16_bf16_z4_1_w15p4: +** mova {z4\.h - z5\.h}, za1h\.h\[w15, 4:5\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z4_1_w15p4, svbfloat16x2_t, + z4 = svread_hor_za16_bf16_vg2 (1, w15 + 4), + z4 = svread_hor_za16_bf16_vg2 (1, w15 + 4)) + +/* +** read_za16_u16_z28_0_w12p7: +** add (w[0-9]+), w12, #?7 +** mova {z28\.h - z29\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_0_w12p7, svuint16x2_t, + z28 = svread_hor_za16_u16_vg2 (0, w12 + 7), + z28 = svread_hor_za16_u16_vg2 (0, w12 + 7)) + +/* +** read_za16_s16_z0_1_w15p8: +** add (w[0-9]+), w15, #?8 +** mova {z0\.h - z1\.h}, za1h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_1_w15p8, svint16x2_t, + z0 = svread_hor_za16_s16_vg2 (1, w15 + 8), + z0 = svread_hor_za16_s16_vg2 (1, w15 + 8)) + +/* +** read_za16_u16_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.h - z5\.h}, za0h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_0_w12m1, svuint16x2_t, + z4 = svread_hor_za16_u16_vg2 (0, w12 - 1), + z4 = svread_hor_za16_u16_vg2 (0, w12 - 1)) + +/* +** read_za16_u16_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.h - z19\.h}, za1h\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_1_w16, svuint16x2_t, + z18 = svread_hor_za16_u16_vg2 (1, w16), + z18 = svread_hor_za16_u16_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..c6c8d142f3e97e0599d758a9bdbe533baa2dc6c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za16_vg4.c @@ -0,0 +1,138 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x4_t, + z0 = svread_hor_za16_s16_vg4 (0, 0), + z0 = svread_hor_za16_s16_vg4 (0, 0)) + +/* +** read_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.h - z7\.h}, za1h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x4_t, + z4 = svread_hor_za16_u16_vg4 (1, 1), + z4 = svread_hor_za16_u16_vg4 (1, 1)) + +/* +** read_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.h - z31\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x4_t, + z28 = svread_hor_za16_f16_vg4 (0, w11), + z28 = svread_hor_za16_f16_vg4 (0, w11)) + +/* +** read_za16_s16_z0_1_w12: +** mova {z0\.h - z3\.h}, za1h\.h\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_1_w12, svint16x4_t, + z0 = svread_hor_za16_s16_vg4 (1, w12), + z0 = svread_hor_za16_s16_vg4 (1, w12)) + +/* +** read_za16_u16_z18_0_w15: +** mova {[^\n]+}, za0h\.h\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x4_t, + z18 = svread_hor_za16_u16_vg4 (0, w15), + z18 = svread_hor_za16_u16_vg4 (0, w15)) + +/* +** read_za16_bf16_z23_1_w12p4: +** mova {[^\n]+}, za1h\.h\[w12, 4:7\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z23_1_w12p4, svbfloat16x4_t, + z23 = svread_hor_za16_bf16_vg4 (1, w12 + 4), + z23 = svread_hor_za16_bf16_vg4 (1, w12 + 4)) + +/* +** read_za16_u16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.h - z7\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_0_w12p1, svuint16x4_t, + z4 = svread_hor_za16_u16_vg4 (0, w12 + 1), + z4 = svread_hor_za16_u16_vg4 (0, w12 + 1)) + +/* +** read_za16_s16_z28_1_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.h - z31\.h}, za1h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x4_t, + z28 = svread_hor_za16_s16_vg4 (1, w12 + 2), + z28 = svread_hor_za16_s16_vg4 (1, w12 + 2)) + +/* +** read_za16_f16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z0_0_w15p3, svfloat16x4_t, + z0 = svread_hor_za16_f16_vg4 (0, w15 + 3), + z0 = svread_hor_za16_f16_vg4 (0, w15 + 3)) + +/* +** read_za16_u16_z28_1_w12p6: +** add (w[0-9]+), w12, #?6 +** mova {z28\.h - z31\.h}, za1h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_1_w12p6, svuint16x4_t, + z28 = svread_hor_za16_u16_vg4 (1, w12 + 6), + z28 = svread_hor_za16_u16_vg4 (1, w12 + 6)) + +/* +** read_za16_s16_z0_0_w15p8: +** add (w[0-9]+), w15, #?8 +** mova {z0\.h - z3\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_w15p8, svint16x4_t, + z0 = svread_hor_za16_s16_vg4 (0, w15 + 8), + z0 = svread_hor_za16_s16_vg4 (0, w15 + 8)) + +/* +** read_za16_bf16_z4_1_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.h - z7\.h}, za1h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z4_1_w12m1, svbfloat16x4_t, + z4 = svread_hor_za16_bf16_vg4 (1, w12 - 1), + z4 = svread_hor_za16_bf16_vg4 (1, w12 - 1)) + +/* +** read_za16_u16_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.h - z31\.h}, za0h\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_0_w16, svuint16x4_t, + z28 = svread_hor_za16_u16_vg4 (0, w16), + z28 = svread_hor_za16_u16_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..ce2ac70f9a19cce7bf13a9a35eccacfe1f47e80a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.s - z1\.s}, za0h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x2_t, + z0 = svread_hor_za32_s32_vg2 (0, 0), + z0 = svread_hor_za32_s32_vg2 (0, 0)) + +/* +** read_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.s - z5\.s}, za1h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x2_t, + z4 = svread_hor_za32_u32_vg2 (1, 1), + z4 = svread_hor_za32_u32_vg2 (1, 1)) + +/* +** read_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.s - z29\.s}, za2h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x2_t, + z28 = svread_hor_za32_f32_vg2 (2, w11), + z28 = svread_hor_za32_f32_vg2 (2, w11)) + +/* +** read_za32_f32_z0_3_w12: +** mova {z0\.s - z1\.s}, za3h\.s\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z0_3_w12, svfloat32x2_t, + z0 = svread_hor_za32_f32_vg2 (3, w12), + z0 = svread_hor_za32_f32_vg2 (3, w12)) + +/* +** read_za32_u32_z18_0_w15: +** mova {z18\.s - z19\.s}, za0h\.s\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x2_t, + z18 = svread_hor_za32_u32_vg2 (0, w15), + z18 = svread_hor_za32_u32_vg2 (0, w15)) + +/* +** read_za32_s32_z23_1_w12p2: +** mova {[^\n]+}, za1h\.s\[w12, 2:3\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z23_1_w12p2, svint32x2_t, + z23 = svread_hor_za32_s32_vg2 (1, w12 + 2), + z23 = svread_hor_za32_s32_vg2 (1, w12 + 2)) + +/* +** read_za32_f32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.s - z5\.s}, za2h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z4_2_w12p1, svfloat32x2_t, + z4 = svread_hor_za32_f32_vg2 (2, w12 + 1), + z4 = svread_hor_za32_f32_vg2 (2, w12 + 1)) + +/* +** read_za32_u32_z0_3_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.s - z1\.s}, za3h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z0_3_w15p3, svuint32x2_t, + z0 = svread_hor_za32_u32_vg2 (3, w15 + 3), + z0 = svread_hor_za32_u32_vg2 (3, w15 + 3)) + +/* +** read_za32_s32_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova {z0\.s - z1\.s}, za1h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_1_w15p4, svint32x2_t, + z0 = svread_hor_za32_s32_vg2 (1, w15 + 4), + z0 = svread_hor_za32_s32_vg2 (1, w15 + 4)) + +/* +** read_za32_u32_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.s - z5\.s}, za3h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_3_w12m1, svuint32x2_t, + z4 = svread_hor_za32_u32_vg2 (3, w12 - 1), + z4 = svread_hor_za32_u32_vg2 (3, w12 - 1)) + +/* +** read_za32_u32_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.s - z19\.s}, za1h\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_1_w16, svuint32x2_t, + z18 = svread_hor_za32_u32_vg2 (1, w16), + z18 = svread_hor_za32_u32_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..2e8eb8dd94f141c6fc93630f87fd14b2befc9d77 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za32_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.s - z3\.s}, za0h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x4_t, + z0 = svread_hor_za32_s32_vg4 (0, 0), + z0 = svread_hor_za32_s32_vg4 (0, 0)) + +/* +** read_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.s - z7\.s}, za1h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x4_t, + z4 = svread_hor_za32_u32_vg4 (1, 1), + z4 = svread_hor_za32_u32_vg4 (1, 1)) + +/* +** read_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.s - z31\.s}, za2h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x4_t, + z28 = svread_hor_za32_f32_vg4 (2, w11), + z28 = svread_hor_za32_f32_vg4 (2, w11)) + +/* +** read_za32_s32_z0_3_w12: +** mova {z0\.s - z3\.s}, za3h\.s\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_3_w12, svint32x4_t, + z0 = svread_hor_za32_s32_vg4 (3, w12), + z0 = svread_hor_za32_s32_vg4 (3, w12)) + +/* +** read_za32_u32_z18_0_w15: +** mova {[^\n]+}, za0h\.s\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x4_t, + z18 = svread_hor_za32_u32_vg4 (0, w15), + z18 = svread_hor_za32_u32_vg4 (0, w15)) + +/* +** read_za32_f32_z23_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {[^\n]+}, za1h\.s\[\1, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z23_1_w12p4, svfloat32x4_t, + z23 = svread_hor_za32_f32_vg4 (1, w12 + 4), + z23 = svread_hor_za32_f32_vg4 (1, w12 + 4)) + +/* +** read_za32_u32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.s - z7\.s}, za2h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_2_w12p1, svuint32x4_t, + z4 = svread_hor_za32_u32_vg4 (2, w12 + 1), + z4 = svread_hor_za32_u32_vg4 (2, w12 + 1)) + +/* +** read_za32_s32_z28_3_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.s - z31\.s}, za3h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z28_3_w12p2, svint32x4_t, + z28 = svread_hor_za32_s32_vg4 (3, w12 + 2), + z28 = svread_hor_za32_s32_vg4 (3, w12 + 2)) + +/* +** read_za32_f32_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.s - z3\.s}, za0h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z0_0_w15p3, svfloat32x4_t, + z0 = svread_hor_za32_f32_vg4 (0, w15 + 3), + z0 = svread_hor_za32_f32_vg4 (0, w15 + 3)) + +/* +** read_za32_u32_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {z28\.s - z31\.s}, za1h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z28_1_w12p4, svuint32x4_t, + z28 = svread_hor_za32_u32_vg4 (1, w12 + 4), + z28 = svread_hor_za32_u32_vg4 (1, w12 + 4)) + +/* +** read_za32_f32_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.s - z7\.s}, za2h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z4_2_w12m1, svfloat32x4_t, + z4 = svread_hor_za32_f32_vg4 (2, w12 - 1), + z4 = svread_hor_za32_f32_vg4 (2, w12 - 1)) + +/* +** read_za32_u32_z28_3_w16: +** mov (w1[2-5]), w16 +** mova {z28\.s - z31\.s}, za3h\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z28_3_w16, svuint32x4_t, + z28 = svread_hor_za32_u32_vg4 (3, w16), + z28 = svread_hor_za32_u32_vg4 (3, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..d18468ea841a787098f9977ae4c9f531b91995e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg2.c @@ -0,0 +1,113 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.d - z1\.d}, za0h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x2_t, + z0 = svread_hor_za64_s64_vg2 (0, 0), + z0 = svread_hor_za64_s64_vg2 (0, 0)) + +/* +** read_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.d - z5\.d}, za1h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x2_t, + z4 = svread_hor_za64_u64_vg2 (1, 1), + z4 = svread_hor_za64_u64_vg2 (1, 1)) + +/* +** read_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.d - z29\.d}, za2h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x2_t, + z28 = svread_hor_za64_f64_vg2 (2, w11), + z28 = svread_hor_za64_f64_vg2 (2, w11)) + +/* +** read_za64_f64_z0_3_w12: +** mova {z0\.d - z1\.d}, za3h\.d\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z0_3_w12, svfloat64x2_t, + z0 = svread_hor_za64_f64_vg2 (3, w12), + z0 = svread_hor_za64_f64_vg2 (3, w12)) + +/* +** read_za64_u64_z18_4_w15: +** mova {z18\.d - z19\.d}, za4h\.d\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x2_t, + z18 = svread_hor_za64_u64_vg2 (4, w15), + z18 = svread_hor_za64_u64_vg2 (4, w15)) + +/* +** read_za64_s64_z23_5_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {[^\n]+}, za5h\.d\[\1, 0:1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z23_5_w12p2, svint64x2_t, + z23 = svread_hor_za64_s64_vg2 (5, w12 + 2), + z23 = svread_hor_za64_s64_vg2 (5, w12 + 2)) + +/* +** read_za64_f64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.d - z5\.d}, za6h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z4_6_w12p1, svfloat64x2_t, + z4 = svread_hor_za64_f64_vg2 (6, w12 + 1), + z4 = svread_hor_za64_f64_vg2 (6, w12 + 1)) + +/* +** read_za64_u64_z0_7_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.d - z1\.d}, za7h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z0_7_w15p3, svuint64x2_t, + z0 = svread_hor_za64_u64_vg2 (7, w15 + 3), + z0 = svread_hor_za64_u64_vg2 (7, w15 + 3)) + +/* +** read_za64_s64_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova {z0\.d - z1\.d}, za1h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_1_w15p4, svint64x2_t, + z0 = svread_hor_za64_s64_vg2 (1, w15 + 4), + z0 = svread_hor_za64_s64_vg2 (1, w15 + 4)) + +/* +** read_za64_u64_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.d - z5\.d}, za3h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_3_w12m1, svuint64x2_t, + z4 = svread_hor_za64_u64_vg2 (3, w12 - 1), + z4 = svread_hor_za64_u64_vg2 (3, w12 - 1)) + +/* +** read_za64_u64_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.d - z19\.d}, za1h\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_1_w16, svuint64x2_t, + z18 = svread_hor_za64_u64_vg2 (1, w16), + z18 = svread_hor_za64_u64_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..1dff7013c917d7d29bbfa11de946da74f090f8e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za64_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.d - z3\.d}, za0h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x4_t, + z0 = svread_hor_za64_s64_vg4 (0, 0), + z0 = svread_hor_za64_s64_vg4 (0, 0)) + +/* +** read_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.d - z7\.d}, za1h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x4_t, + z4 = svread_hor_za64_u64_vg4 (1, 1), + z4 = svread_hor_za64_u64_vg4 (1, 1)) + +/* +** read_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.d - z31\.d}, za2h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x4_t, + z28 = svread_hor_za64_f64_vg4 (2, w11), + z28 = svread_hor_za64_f64_vg4 (2, w11)) + +/* +** read_za64_s64_z0_3_w12: +** mova {z0\.d - z3\.d}, za3h\.d\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_3_w12, svint64x4_t, + z0 = svread_hor_za64_s64_vg4 (3, w12), + z0 = svread_hor_za64_s64_vg4 (3, w12)) + +/* +** read_za64_u64_z18_4_w15: +** mova {[^\n]+}, za4h\.d\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x4_t, + z18 = svread_hor_za64_u64_vg4 (4, w15), + z18 = svread_hor_za64_u64_vg4 (4, w15)) + +/* +** read_za64_f64_z23_5_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {[^\n]+}, za5h\.d\[\1, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z23_5_w12p4, svfloat64x4_t, + z23 = svread_hor_za64_f64_vg4 (5, w12 + 4), + z23 = svread_hor_za64_f64_vg4 (5, w12 + 4)) + +/* +** read_za64_u64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.d - z7\.d}, za6h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_6_w12p1, svuint64x4_t, + z4 = svread_hor_za64_u64_vg4 (6, w12 + 1), + z4 = svread_hor_za64_u64_vg4 (6, w12 + 1)) + +/* +** read_za64_s64_z28_7_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.d - z31\.d}, za7h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z28_7_w12p2, svint64x4_t, + z28 = svread_hor_za64_s64_vg4 (7, w12 + 2), + z28 = svread_hor_za64_s64_vg4 (7, w12 + 2)) + +/* +** read_za64_f64_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.d - z3\.d}, za0h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z0_0_w15p3, svfloat64x4_t, + z0 = svread_hor_za64_f64_vg4 (0, w15 + 3), + z0 = svread_hor_za64_f64_vg4 (0, w15 + 3)) + +/* +** read_za64_u64_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {z28\.d - z31\.d}, za1h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z28_1_w12p4, svuint64x4_t, + z28 = svread_hor_za64_u64_vg4 (1, w12 + 4), + z28 = svread_hor_za64_u64_vg4 (1, w12 + 4)) + +/* +** read_za64_f64_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.d - z7\.d}, za2h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z4_2_w12m1, svfloat64x4_t, + z4 = svread_hor_za64_f64_vg4 (2, w12 - 1), + z4 = svread_hor_za64_f64_vg4 (2, w12 - 1)) + +/* +** read_za64_u64_z28_3_w16: +** mov (w1[2-5]), w16 +** mova {z28\.d - z31\.d}, za3h\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z28_3_w16, svuint64x4_t, + z28 = svread_hor_za64_u64_vg4 (3, w16), + z28 = svread_hor_za64_u64_vg4 (3, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..ec31a68b46ece4030c28c3c3b6dcbf365b5e816b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x2_t, + z0 = svread_hor_za8_s8_vg2 (0, 0), + z0 = svread_hor_za8_s8_vg2 (0, 0)) + +/* +** read_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t, + z4 = svread_hor_za8_u8_vg2 (0, 1), + z4 = svread_hor_za8_u8_vg2 (0, 1)) + +/* +** read_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x2_t, + z28 = svread_hor_za8_s8_vg2 (0, w11), + z28 = svread_hor_za8_s8_vg2 (0, w11)) + +/* +** read_za8_s8_z0_0_w12: +** mova {z0\.b - z1\.b}, za0h\.b\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x2_t, + z0 = svread_hor_za8_s8_vg2 (0, w12), + z0 = svread_hor_za8_s8_vg2 (0, w12)) + +/* +** read_za8_u8_z18_0_w15: +** mova {z18\.b - z19\.b}, za0h\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t, + z18 = svread_hor_za8_u8_vg2 (0, w15), + z18 = svread_hor_za8_u8_vg2 (0, w15)) + +/* +** read_za8_s8_z23_0_w12p14: +** mova {[^\n]+}, za0h\.b\[w12, 14:15\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p14, svint8x2_t, + z23 = svread_hor_za8_s8_vg2 (0, w12 + 14), + z23 = svread_hor_za8_s8_vg2 (0, w12 + 14)) + +/* +** read_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t, + z4 = svread_hor_za8_u8_vg2 (0, w12 + 1), + z4 = svread_hor_za8_u8_vg2 (0, w12 + 1)) + +/* +** read_za8_s8_z28_0_w12p2: +** mova {z28\.b - z29\.b}, za0h\.b\[w12, 2:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x2_t, + z28 = svread_hor_za8_s8_vg2 (0, w12 + 2), + z28 = svread_hor_za8_s8_vg2 (0, w12 + 2)) + +/* +** read_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t, + z0 = svread_hor_za8_u8_vg2 (0, w15 + 3), + z0 = svread_hor_za8_u8_vg2 (0, w15 + 3)) + +/* +** read_za8_u8_z4_0_w15p12: +** mova {z4\.b - z5\.b}, za0h\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t, + z4 = svread_hor_za8_u8_vg2 (0, w15 + 12), + z4 = svread_hor_za8_u8_vg2 (0, w15 + 12)) + +/* +** read_za8_u8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova {z28\.b - z29\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t, + z28 = svread_hor_za8_u8_vg2 (0, w12 + 15), + z28 = svread_hor_za8_u8_vg2 (0, w12 + 15)) + +/* +** read_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova {z0\.b - z1\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x2_t, + z0 = svread_hor_za8_s8_vg2 (0, w15 + 16), + z0 = svread_hor_za8_s8_vg2 (0, w15 + 16)) + +/* +** read_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, + z4 = svread_hor_za8_u8_vg2 (0, w12 - 1), + z4 = svread_hor_za8_u8_vg2 (0, w12 - 1)) + +/* +** read_za8_u8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova {z18\.b - z19\.b}, za0h\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t, + z18 = svread_hor_za8_u8_vg2 (0, w16), + z18 = svread_hor_za8_u8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..261cbead44283e2c0e6f92beb57f5dbeaa3f455f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c @@ -0,0 +1,156 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x4_t, + z0 = svread_hor_za8_s8_vg4 (0, 0), + z0 = svread_hor_za8_s8_vg4 (0, 0)) + +/* +** read_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t, + z4 = svread_hor_za8_u8_vg4 (0, 1), + z4 = svread_hor_za8_u8_vg4 (0, 1)) + +/* +** read_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x4_t, + z28 = svread_hor_za8_s8_vg4 (0, w11), + z28 = svread_hor_za8_s8_vg4 (0, w11)) + +/* +** read_za8_s8_z0_0_w12: +** mova {z0\.b - z3\.b}, za0h\.b\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x4_t, + z0 = svread_hor_za8_s8_vg4 (0, w12), + z0 = svread_hor_za8_s8_vg4 (0, w12)) + +/* +** read_za8_u8_z18_0_w15: +** mova {[^\n]+}, za0h\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t, + z18 = svread_hor_za8_u8_vg4 (0, w15), + z18 = svread_hor_za8_u8_vg4 (0, w15)) + +/* +** read_za8_s8_z23_0_w12p12: +** mova {[^\n]+}, za0h\.b\[w12, 12:15\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p12, svint8x4_t, + z23 = svread_hor_za8_s8_vg4 (0, w12 + 12), + z23 = svread_hor_za8_s8_vg4 (0, w12 + 12)) + +/* +** read_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t, + z4 = svread_hor_za8_u8_vg4 (0, w12 + 1), + z4 = svread_hor_za8_u8_vg4 (0, w12 + 1)) + +/* +** read_za8_s8_z28_0_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x4_t, + z28 = svread_hor_za8_s8_vg4 (0, w12 + 2), + z28 = svread_hor_za8_s8_vg4 (0, w12 + 2)) + +/* +** read_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t, + z0 = svread_hor_za8_u8_vg4 (0, w15 + 3), + z0 = svread_hor_za8_u8_vg4 (0, w15 + 3)) + +/* +** read_za8_u8_z0_0_w12p4: +** mova {z0\.b - z3\.b}, za0h\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t, + z0 = svread_hor_za8_u8_vg4 (0, w12 + 4), + z0 = svread_hor_za8_u8_vg4 (0, w12 + 4)) + +/* +** read_za8_u8_z4_0_w15p12: +** mova {z4\.b - z7\.b}, za0h\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t, + z4 = svread_hor_za8_u8_vg4 (0, w15 + 12), + z4 = svread_hor_za8_u8_vg4 (0, w15 + 12)) + +/* +** read_za8_u8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t, + z28 = svread_hor_za8_u8_vg4 (0, w12 + 14), + z28 = svread_hor_za8_u8_vg4 (0, w12 + 14)) + +/* +** read_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova {z0\.b - z3\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x4_t, + z0 = svread_hor_za8_s8_vg4 (0, w15 + 16), + z0 = svread_hor_za8_s8_vg4 (0, w15 + 16)) + +/* +** read_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, + z4 = svread_hor_za8_u8_vg4 (0, w12 - 1), + z4 = svread_hor_za8_u8_vg4 (0, w12 - 1)) + +/* +** read_za8_u8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.b - z31\.b}, za0h\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t, + z28 = svread_hor_za8_u8_vg4 (0, w16), + z28 = svread_hor_za8_u8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..a71d4d0c8cd425679b42fad5c1b79845f2e302c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.h - z1\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x2_t, + z0 = svread_ver_za16_s16_vg2 (0, 0), + z0 = svread_ver_za16_s16_vg2 (0, 0)) + +/* +** read_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.h - z5\.h}, za1v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x2_t, + z4 = svread_ver_za16_u16_vg2 (1, 1), + z4 = svread_ver_za16_u16_vg2 (1, 1)) + +/* +** read_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.h - z29\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x2_t, + z28 = svread_ver_za16_f16_vg2 (0, w11), + z28 = svread_ver_za16_f16_vg2 (0, w11)) + +/* +** read_za16_bf16_z0_1_w12: +** mova {z0\.h - z1\.h}, za1v\.h\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z0_1_w12, svbfloat16x2_t, + z0 = svread_ver_za16_bf16_vg2 (1, w12), + z0 = svread_ver_za16_bf16_vg2 (1, w12)) + +/* +** read_za16_u16_z18_0_w15: +** mova {z18\.h - z19\.h}, za0v\.h\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x2_t, + z18 = svread_ver_za16_u16_vg2 (0, w15), + z18 = svread_ver_za16_u16_vg2 (0, w15)) + +/* +** read_za16_s16_z23_1_w12p6: +** mova {[^\n]+}, za1v\.h\[w12, 6:7\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z23_1_w12p6, svint16x2_t, + z23 = svread_ver_za16_s16_vg2 (1, w12 + 6), + z23 = svread_ver_za16_s16_vg2 (1, w12 + 6)) + +/* +** read_za16_f16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.h - z5\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z4_0_w12p1, svfloat16x2_t, + z4 = svread_ver_za16_f16_vg2 (0, w12 + 1), + z4 = svread_ver_za16_f16_vg2 (0, w12 + 1)) + +/* +** read_za16_s16_z28_1_w12p2: +** mova {z28\.h - z29\.h}, za1v\.h\[w12, 2:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x2_t, + z28 = svread_ver_za16_s16_vg2 (1, w12 + 2), + z28 = svread_ver_za16_s16_vg2 (1, w12 + 2)) + +/* +** read_za16_u16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.h - z1\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z0_0_w15p3, svuint16x2_t, + z0 = svread_ver_za16_u16_vg2 (0, w15 + 3), + z0 = svread_ver_za16_u16_vg2 (0, w15 + 3)) + +/* +** read_za16_bf16_z4_1_w15p4: +** mova {z4\.h - z5\.h}, za1v\.h\[w15, 4:5\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z4_1_w15p4, svbfloat16x2_t, + z4 = svread_ver_za16_bf16_vg2 (1, w15 + 4), + z4 = svread_ver_za16_bf16_vg2 (1, w15 + 4)) + +/* +** read_za16_u16_z28_0_w12p7: +** add (w[0-9]+), w12, #?7 +** mova {z28\.h - z29\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_0_w12p7, svuint16x2_t, + z28 = svread_ver_za16_u16_vg2 (0, w12 + 7), + z28 = svread_ver_za16_u16_vg2 (0, w12 + 7)) + +/* +** read_za16_s16_z0_1_w15p8: +** add (w[0-9]+), w15, #?8 +** mova {z0\.h - z1\.h}, za1v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_1_w15p8, svint16x2_t, + z0 = svread_ver_za16_s16_vg2 (1, w15 + 8), + z0 = svread_ver_za16_s16_vg2 (1, w15 + 8)) + +/* +** read_za16_u16_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.h - z5\.h}, za0v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_0_w12m1, svuint16x2_t, + z4 = svread_ver_za16_u16_vg2 (0, w12 - 1), + z4 = svread_ver_za16_u16_vg2 (0, w12 - 1)) + +/* +** read_za16_u16_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.h - z19\.h}, za1v\.h\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_1_w16, svuint16x2_t, + z18 = svread_ver_za16_u16_vg2 (1, w16), + z18 = svread_ver_za16_u16_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..792351df808e1921b27766e7213f4cb5093a014d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za16_vg4.c @@ -0,0 +1,138 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_0, svint16x4_t, + z0 = svread_ver_za16_s16_vg4 (0, 0), + z0 = svread_ver_za16_s16_vg4 (0, 0)) + +/* +** read_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.h - z7\.h}, za1v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_1_1, svuint16x4_t, + z4 = svread_ver_za16_u16_vg4 (1, 1), + z4 = svread_ver_za16_u16_vg4 (1, 1)) + +/* +** read_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.h - z31\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z28_0_w11, svfloat16x4_t, + z28 = svread_ver_za16_f16_vg4 (0, w11), + z28 = svread_ver_za16_f16_vg4 (0, w11)) + +/* +** read_za16_s16_z0_1_w12: +** mova {z0\.h - z3\.h}, za1v\.h\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_1_w12, svint16x4_t, + z0 = svread_ver_za16_s16_vg4 (1, w12), + z0 = svread_ver_za16_s16_vg4 (1, w12)) + +/* +** read_za16_u16_z18_0_w15: +** mova {[^\n]+}, za0v\.h\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z18_0_w15, svuint16x4_t, + z18 = svread_ver_za16_u16_vg4 (0, w15), + z18 = svread_ver_za16_u16_vg4 (0, w15)) + +/* +** read_za16_bf16_z23_1_w12p4: +** mova {[^\n]+}, za1v\.h\[w12, 4:7\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z23_1_w12p4, svbfloat16x4_t, + z23 = svread_ver_za16_bf16_vg4 (1, w12 + 4), + z23 = svread_ver_za16_bf16_vg4 (1, w12 + 4)) + +/* +** read_za16_u16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.h - z7\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z4_0_w12p1, svuint16x4_t, + z4 = svread_ver_za16_u16_vg4 (0, w12 + 1), + z4 = svread_ver_za16_u16_vg4 (0, w12 + 1)) + +/* +** read_za16_s16_z28_1_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.h - z31\.h}, za1v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z28_1_w12p2, svint16x4_t, + z28 = svread_ver_za16_s16_vg4 (1, w12 + 2), + z28 = svread_ver_za16_s16_vg4 (1, w12 + 2)) + +/* +** read_za16_f16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_f16_z0_0_w15p3, svfloat16x4_t, + z0 = svread_ver_za16_f16_vg4 (0, w15 + 3), + z0 = svread_ver_za16_f16_vg4 (0, w15 + 3)) + +/* +** read_za16_u16_z28_1_w12p6: +** add (w[0-9]+), w12, #?6 +** mova {z28\.h - z31\.h}, za1v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_1_w12p6, svuint16x4_t, + z28 = svread_ver_za16_u16_vg4 (1, w12 + 6), + z28 = svread_ver_za16_u16_vg4 (1, w12 + 6)) + +/* +** read_za16_s16_z0_0_w15p8: +** add (w[0-9]+), w15, #?8 +** mova {z0\.h - z3\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_s16_z0_0_w15p8, svint16x4_t, + z0 = svread_ver_za16_s16_vg4 (0, w15 + 8), + z0 = svread_ver_za16_s16_vg4 (0, w15 + 8)) + +/* +** read_za16_bf16_z4_1_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.h - z7\.h}, za1v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_bf16_z4_1_w12m1, svbfloat16x4_t, + z4 = svread_ver_za16_bf16_vg4 (1, w12 - 1), + z4 = svread_ver_za16_bf16_vg4 (1, w12 - 1)) + +/* +** read_za16_u16_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.h - z31\.h}, za0v\.h\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za16_u16_z28_0_w16, svuint16x4_t, + z28 = svread_ver_za16_u16_vg4 (0, w16), + z28 = svread_ver_za16_u16_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..85fc7e2a7e19638daca2dbaec94b6210dbc71462 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.s - z1\.s}, za0v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x2_t, + z0 = svread_ver_za32_s32_vg2 (0, 0), + z0 = svread_ver_za32_s32_vg2 (0, 0)) + +/* +** read_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.s - z5\.s}, za1v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x2_t, + z4 = svread_ver_za32_u32_vg2 (1, 1), + z4 = svread_ver_za32_u32_vg2 (1, 1)) + +/* +** read_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.s - z29\.s}, za2v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x2_t, + z28 = svread_ver_za32_f32_vg2 (2, w11), + z28 = svread_ver_za32_f32_vg2 (2, w11)) + +/* +** read_za32_f32_z0_3_w12: +** mova {z0\.s - z1\.s}, za3v\.s\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z0_3_w12, svfloat32x2_t, + z0 = svread_ver_za32_f32_vg2 (3, w12), + z0 = svread_ver_za32_f32_vg2 (3, w12)) + +/* +** read_za32_u32_z18_0_w15: +** mova {z18\.s - z19\.s}, za0v\.s\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x2_t, + z18 = svread_ver_za32_u32_vg2 (0, w15), + z18 = svread_ver_za32_u32_vg2 (0, w15)) + +/* +** read_za32_s32_z23_1_w12p2: +** mova {[^\n]+}, za1v\.s\[w12, 2:3\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z23_1_w12p2, svint32x2_t, + z23 = svread_ver_za32_s32_vg2 (1, w12 + 2), + z23 = svread_ver_za32_s32_vg2 (1, w12 + 2)) + +/* +** read_za32_f32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.s - z5\.s}, za2v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z4_2_w12p1, svfloat32x2_t, + z4 = svread_ver_za32_f32_vg2 (2, w12 + 1), + z4 = svread_ver_za32_f32_vg2 (2, w12 + 1)) + +/* +** read_za32_u32_z0_3_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.s - z1\.s}, za3v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z0_3_w15p3, svuint32x2_t, + z0 = svread_ver_za32_u32_vg2 (3, w15 + 3), + z0 = svread_ver_za32_u32_vg2 (3, w15 + 3)) + +/* +** read_za32_s32_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova {z0\.s - z1\.s}, za1v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_1_w15p4, svint32x2_t, + z0 = svread_ver_za32_s32_vg2 (1, w15 + 4), + z0 = svread_ver_za32_s32_vg2 (1, w15 + 4)) + +/* +** read_za32_u32_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.s - z5\.s}, za3v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_3_w12m1, svuint32x2_t, + z4 = svread_ver_za32_u32_vg2 (3, w12 - 1), + z4 = svread_ver_za32_u32_vg2 (3, w12 - 1)) + +/* +** read_za32_u32_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.s - z19\.s}, za1v\.s\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_1_w16, svuint32x2_t, + z18 = svread_ver_za32_u32_vg2 (1, w16), + z18 = svread_ver_za32_u32_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..a7924ae93340663d65ed4947824bb48de2a91fdc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za32_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.s - z3\.s}, za0v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_0_0, svint32x4_t, + z0 = svread_ver_za32_s32_vg4 (0, 0), + z0 = svread_ver_za32_s32_vg4 (0, 0)) + +/* +** read_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.s - z7\.s}, za1v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_1_1, svuint32x4_t, + z4 = svread_ver_za32_u32_vg4 (1, 1), + z4 = svread_ver_za32_u32_vg4 (1, 1)) + +/* +** read_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.s - z31\.s}, za2v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z28_2_w11, svfloat32x4_t, + z28 = svread_ver_za32_f32_vg4 (2, w11), + z28 = svread_ver_za32_f32_vg4 (2, w11)) + +/* +** read_za32_s32_z0_3_w12: +** mova {z0\.s - z3\.s}, za3v\.s\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z0_3_w12, svint32x4_t, + z0 = svread_ver_za32_s32_vg4 (3, w12), + z0 = svread_ver_za32_s32_vg4 (3, w12)) + +/* +** read_za32_u32_z18_0_w15: +** mova {[^\n]+}, za0v\.s\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z18_0_w15, svuint32x4_t, + z18 = svread_ver_za32_u32_vg4 (0, w15), + z18 = svread_ver_za32_u32_vg4 (0, w15)) + +/* +** read_za32_f32_z23_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {[^\n]+}, za1v\.s\[\1, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z23_1_w12p4, svfloat32x4_t, + z23 = svread_ver_za32_f32_vg4 (1, w12 + 4), + z23 = svread_ver_za32_f32_vg4 (1, w12 + 4)) + +/* +** read_za32_u32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.s - z7\.s}, za2v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z4_2_w12p1, svuint32x4_t, + z4 = svread_ver_za32_u32_vg4 (2, w12 + 1), + z4 = svread_ver_za32_u32_vg4 (2, w12 + 1)) + +/* +** read_za32_s32_z28_3_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.s - z31\.s}, za3v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_s32_z28_3_w12p2, svint32x4_t, + z28 = svread_ver_za32_s32_vg4 (3, w12 + 2), + z28 = svread_ver_za32_s32_vg4 (3, w12 + 2)) + +/* +** read_za32_f32_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.s - z3\.s}, za0v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z0_0_w15p3, svfloat32x4_t, + z0 = svread_ver_za32_f32_vg4 (0, w15 + 3), + z0 = svread_ver_za32_f32_vg4 (0, w15 + 3)) + +/* +** read_za32_u32_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {z28\.s - z31\.s}, za1v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z28_1_w12p4, svuint32x4_t, + z28 = svread_ver_za32_u32_vg4 (1, w12 + 4), + z28 = svread_ver_za32_u32_vg4 (1, w12 + 4)) + +/* +** read_za32_f32_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.s - z7\.s}, za2v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_f32_z4_2_w12m1, svfloat32x4_t, + z4 = svread_ver_za32_f32_vg4 (2, w12 - 1), + z4 = svread_ver_za32_f32_vg4 (2, w12 - 1)) + +/* +** read_za32_u32_z28_3_w16: +** mov (w1[2-5]), w16 +** mova {z28\.s - z31\.s}, za3v\.s\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za32_u32_z28_3_w16, svuint32x4_t, + z28 = svread_ver_za32_u32_vg4 (3, w16), + z28 = svread_ver_za32_u32_vg4 (3, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..6636eb129586929ec21c604211de7b007101ccd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg2.c @@ -0,0 +1,113 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.d - z1\.d}, za0v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x2_t, + z0 = svread_ver_za64_s64_vg2 (0, 0), + z0 = svread_ver_za64_s64_vg2 (0, 0)) + +/* +** read_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.d - z5\.d}, za1v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x2_t, + z4 = svread_ver_za64_u64_vg2 (1, 1), + z4 = svread_ver_za64_u64_vg2 (1, 1)) + +/* +** read_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.d - z29\.d}, za2v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x2_t, + z28 = svread_ver_za64_f64_vg2 (2, w11), + z28 = svread_ver_za64_f64_vg2 (2, w11)) + +/* +** read_za64_f64_z0_3_w12: +** mova {z0\.d - z1\.d}, za3v\.d\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z0_3_w12, svfloat64x2_t, + z0 = svread_ver_za64_f64_vg2 (3, w12), + z0 = svread_ver_za64_f64_vg2 (3, w12)) + +/* +** read_za64_u64_z18_4_w15: +** mova {z18\.d - z19\.d}, za4v\.d\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x2_t, + z18 = svread_ver_za64_u64_vg2 (4, w15), + z18 = svread_ver_za64_u64_vg2 (4, w15)) + +/* +** read_za64_s64_z23_5_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {[^\n]+}, za5v\.d\[\1, 0:1\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z23_5_w12p2, svint64x2_t, + z23 = svread_ver_za64_s64_vg2 (5, w12 + 2), + z23 = svread_ver_za64_s64_vg2 (5, w12 + 2)) + +/* +** read_za64_f64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.d - z5\.d}, za6v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z4_6_w12p1, svfloat64x2_t, + z4 = svread_ver_za64_f64_vg2 (6, w12 + 1), + z4 = svread_ver_za64_f64_vg2 (6, w12 + 1)) + +/* +** read_za64_u64_z0_7_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.d - z1\.d}, za7v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z0_7_w15p3, svuint64x2_t, + z0 = svread_ver_za64_u64_vg2 (7, w15 + 3), + z0 = svread_ver_za64_u64_vg2 (7, w15 + 3)) + +/* +** read_za64_s64_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova {z0\.d - z1\.d}, za1v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_1_w15p4, svint64x2_t, + z0 = svread_ver_za64_s64_vg2 (1, w15 + 4), + z0 = svread_ver_za64_s64_vg2 (1, w15 + 4)) + +/* +** read_za64_u64_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.d - z5\.d}, za3v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_3_w12m1, svuint64x2_t, + z4 = svread_ver_za64_u64_vg2 (3, w12 - 1), + z4 = svread_ver_za64_u64_vg2 (3, w12 - 1)) + +/* +** read_za64_u64_z18_1_w16: +** mov (w1[2-5]), w16 +** mova {z18\.d - z19\.d}, za1v\.d\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_1_w16, svuint64x2_t, + z18 = svread_ver_za64_u64_vg2 (1, w16), + z18 = svread_ver_za64_u64_vg2 (1, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..2531f47769f56e991a1f97074498a9808f937c2b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za64_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.d - z3\.d}, za0v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_0_0, svint64x4_t, + z0 = svread_ver_za64_s64_vg4 (0, 0), + z0 = svread_ver_za64_s64_vg4 (0, 0)) + +/* +** read_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova {z4\.d - z7\.d}, za1v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_1_1, svuint64x4_t, + z4 = svread_ver_za64_u64_vg4 (1, 1), + z4 = svread_ver_za64_u64_vg4 (1, 1)) + +/* +** read_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova {z28\.d - z31\.d}, za2v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z28_2_w11, svfloat64x4_t, + z28 = svread_ver_za64_f64_vg4 (2, w11), + z28 = svread_ver_za64_f64_vg4 (2, w11)) + +/* +** read_za64_s64_z0_3_w12: +** mova {z0\.d - z3\.d}, za3v\.d\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z0_3_w12, svint64x4_t, + z0 = svread_ver_za64_s64_vg4 (3, w12), + z0 = svread_ver_za64_s64_vg4 (3, w12)) + +/* +** read_za64_u64_z18_4_w15: +** mova {[^\n]+}, za4v\.d\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z18_4_w15, svuint64x4_t, + z18 = svread_ver_za64_u64_vg4 (4, w15), + z18 = svread_ver_za64_u64_vg4 (4, w15)) + +/* +** read_za64_f64_z23_5_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {[^\n]+}, za5v\.d\[\1, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z23_5_w12p4, svfloat64x4_t, + z23 = svread_ver_za64_f64_vg4 (5, w12 + 4), + z23 = svread_ver_za64_f64_vg4 (5, w12 + 4)) + +/* +** read_za64_u64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.d - z7\.d}, za6v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z4_6_w12p1, svuint64x4_t, + z4 = svread_ver_za64_u64_vg4 (6, w12 + 1), + z4 = svread_ver_za64_u64_vg4 (6, w12 + 1)) + +/* +** read_za64_s64_z28_7_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.d - z31\.d}, za7v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_s64_z28_7_w12p2, svint64x4_t, + z28 = svread_ver_za64_s64_vg4 (7, w12 + 2), + z28 = svread_ver_za64_s64_vg4 (7, w12 + 2)) + +/* +** read_za64_f64_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.d - z3\.d}, za0v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z0_0_w15p3, svfloat64x4_t, + z0 = svread_ver_za64_f64_vg4 (0, w15 + 3), + z0 = svread_ver_za64_f64_vg4 (0, w15 + 3)) + +/* +** read_za64_u64_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova {z28\.d - z31\.d}, za1v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z28_1_w12p4, svuint64x4_t, + z28 = svread_ver_za64_u64_vg4 (1, w12 + 4), + z28 = svread_ver_za64_u64_vg4 (1, w12 + 4)) + +/* +** read_za64_f64_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.d - z7\.d}, za2v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_f64_z4_2_w12m1, svfloat64x4_t, + z4 = svread_ver_za64_f64_vg4 (2, w12 - 1), + z4 = svread_ver_za64_f64_vg4 (2, w12 - 1)) + +/* +** read_za64_u64_z28_3_w16: +** mov (w1[2-5]), w16 +** mova {z28\.d - z31\.d}, za3v\.d\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za64_u64_z28_3_w16, svuint64x4_t, + z28 = svread_ver_za64_u64_vg4 (3, w16), + z28 = svread_ver_za64_u64_vg4 (3, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..55970616ba8de297ec7828389ce69ed363d5aa1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x2_t, + z0 = svread_ver_za8_s8_vg2 (0, 0), + z0 = svread_ver_za8_s8_vg2 (0, 0)) + +/* +** read_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t, + z4 = svread_ver_za8_u8_vg2 (0, 1), + z4 = svread_ver_za8_u8_vg2 (0, 1)) + +/* +** read_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x2_t, + z28 = svread_ver_za8_s8_vg2 (0, w11), + z28 = svread_ver_za8_s8_vg2 (0, w11)) + +/* +** read_za8_s8_z0_0_w12: +** mova {z0\.b - z1\.b}, za0v\.b\[w12, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x2_t, + z0 = svread_ver_za8_s8_vg2 (0, w12), + z0 = svread_ver_za8_s8_vg2 (0, w12)) + +/* +** read_za8_u8_z18_0_w15: +** mova {z18\.b - z19\.b}, za0v\.b\[w15, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t, + z18 = svread_ver_za8_u8_vg2 (0, w15), + z18 = svread_ver_za8_u8_vg2 (0, w15)) + +/* +** read_za8_s8_z23_0_w12p14: +** mova {[^\n]+}, za0v\.b\[w12, 14:15\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p14, svint8x2_t, + z23 = svread_ver_za8_s8_vg2 (0, w12 + 14), + z23 = svread_ver_za8_s8_vg2 (0, w12 + 14)) + +/* +** read_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t, + z4 = svread_ver_za8_u8_vg2 (0, w12 + 1), + z4 = svread_ver_za8_u8_vg2 (0, w12 + 1)) + +/* +** read_za8_s8_z28_0_w12p2: +** mova {z28\.b - z29\.b}, za0v\.b\[w12, 2:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x2_t, + z28 = svread_ver_za8_s8_vg2 (0, w12 + 2), + z28 = svread_ver_za8_s8_vg2 (0, w12 + 2)) + +/* +** read_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t, + z0 = svread_ver_za8_u8_vg2 (0, w15 + 3), + z0 = svread_ver_za8_u8_vg2 (0, w15 + 3)) + +/* +** read_za8_u8_z4_0_w15p12: +** mova {z4\.b - z5\.b}, za0v\.b\[w15, 12:13\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t, + z4 = svread_ver_za8_u8_vg2 (0, w15 + 12), + z4 = svread_ver_za8_u8_vg2 (0, w15 + 12)) + +/* +** read_za8_u8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova {z28\.b - z29\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t, + z28 = svread_ver_za8_u8_vg2 (0, w12 + 15), + z28 = svread_ver_za8_u8_vg2 (0, w12 + 15)) + +/* +** read_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova {z0\.b - z1\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x2_t, + z0 = svread_ver_za8_s8_vg2 (0, w15 + 16), + z0 = svread_ver_za8_s8_vg2 (0, w15 + 16)) + +/* +** read_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z5\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t, + z4 = svread_ver_za8_u8_vg2 (0, w12 - 1), + z4 = svread_ver_za8_u8_vg2 (0, w12 - 1)) + +/* +** read_za8_u8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova {z18\.b - z19\.b}, za0v\.b\[\1, 0:1\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t, + z18 = svread_ver_za8_u8_vg2 (0, w16), + z18 = svread_ver_za8_u8_vg2 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..6fd8a976d4f08e1ab8ed02935ba730b22fc6ad46 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c @@ -0,0 +1,156 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_0, svint8x4_t, + z0 = svread_ver_za8_s8_vg4 (0, 0), + z0 = svread_ver_za8_s8_vg4 (0, 0)) + +/* +** read_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t, + z4 = svread_ver_za8_u8_vg4 (0, 1), + z4 = svread_ver_za8_u8_vg4 (0, 1)) + +/* +** read_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w11, svint8x4_t, + z28 = svread_ver_za8_s8_vg4 (0, w11), + z28 = svread_ver_za8_s8_vg4 (0, w11)) + +/* +** read_za8_s8_z0_0_w12: +** mova {z0\.b - z3\.b}, za0v\.b\[w12, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w12, svint8x4_t, + z0 = svread_ver_za8_s8_vg4 (0, w12), + z0 = svread_ver_za8_s8_vg4 (0, w12)) + +/* +** read_za8_u8_z18_0_w15: +** mova {[^\n]+}, za0v\.b\[w15, 0:3\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t, + z18 = svread_ver_za8_u8_vg4 (0, w15), + z18 = svread_ver_za8_u8_vg4 (0, w15)) + +/* +** read_za8_s8_z23_0_w12p12: +** mova {[^\n]+}, za0v\.b\[w12, 12:15\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z23_0_w12p12, svint8x4_t, + z23 = svread_ver_za8_s8_vg4 (0, w12 + 12), + z23 = svread_ver_za8_s8_vg4 (0, w12 + 12)) + +/* +** read_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t, + z4 = svread_ver_za8_u8_vg4 (0, w12 + 1), + z4 = svread_ver_za8_u8_vg4 (0, w12 + 1)) + +/* +** read_za8_s8_z28_0_w12p2: +** add (w[0-9]+), w12, #?2 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z28_0_w12p2, svint8x4_t, + z28 = svread_ver_za8_s8_vg4 (0, w12 + 2), + z28 = svread_ver_za8_s8_vg4 (0, w12 + 2)) + +/* +** read_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t, + z0 = svread_ver_za8_u8_vg4 (0, w15 + 3), + z0 = svread_ver_za8_u8_vg4 (0, w15 + 3)) + +/* +** read_za8_u8_z0_0_w12p4: +** mova {z0\.b - z3\.b}, za0v\.b\[w12, 4:7\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t, + z0 = svread_ver_za8_u8_vg4 (0, w12 + 4), + z0 = svread_ver_za8_u8_vg4 (0, w12 + 4)) + +/* +** read_za8_u8_z4_0_w15p12: +** mova {z4\.b - z7\.b}, za0v\.b\[w15, 12:15\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t, + z4 = svread_ver_za8_u8_vg4 (0, w15 + 12), + z4 = svread_ver_za8_u8_vg4 (0, w15 + 12)) + +/* +** read_za8_u8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t, + z28 = svread_ver_za8_u8_vg4 (0, w12 + 14), + z28 = svread_ver_za8_u8_vg4 (0, w12 + 14)) + +/* +** read_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova {z0\.b - z3\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_s8_z0_0_w15p16, svint8x4_t, + z0 = svread_ver_za8_s8_vg4 (0, w15 + 16), + z0 = svread_ver_za8_s8_vg4 (0, w15 + 16)) + +/* +** read_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova {z4\.b - z7\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t, + z4 = svread_ver_za8_u8_vg4 (0, w12 - 1), + z4 = svread_ver_za8_u8_vg4 (0, w12 - 1)) + +/* +** read_za8_u8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova {z28\.b - z31\.b}, za0v\.b\[\1, 0:3\] +** ret +*/ +TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t, + z28 = svread_ver_za8_u8_vg4 (0, w16), + z28 = svread_ver_za8_u8_vg4 (0, w16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f8a9316396bdaf12b75a40c6ce4c786ca3cf0ee7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svfloat16x2_t, + z0 = svread_za16_f16_vg1x2 (0), + z0 = svread_za16_f16_vg1x2 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svint16x2_t, + z0 = svread_za16_s16_vg1x2 (w0), + z0 = svread_za16_s16_vg1x2 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svuint16x2_t, + z0 = svread_za16_u16_vg1x2 (w7), + z0 = svread_za16_u16_vg1x2 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svbfloat16x2_t, + z0 = svread_za16_bf16_vg1x2 (w8), + z0 = svread_za16_bf16_vg1x2 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svint16x2_t, + z0 = svread_za16_s16_vg1x2 (w11), + z0 = svread_za16_s16_vg1x2 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svuint16x2_t, + z0 = svread_za16_u16_vg1x2 (w12), + z0 = svread_za16_u16_vg1x2 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svfloat16x2_t, + z0 = svread_za16_f16_vg1x2 (w8 + 7), + z0 = svread_za16_f16_vg1x2 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svint16x2_t, + z0 = svread_za16_s16_vg1x2 (w8 + 8), + z0 = svread_za16_s16_vg1x2 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svuint16x2_t, + z0 = svread_za16_u16_vg1x2 (w8 - 1), + z0 = svread_za16_u16_vg1x2 (w8 - 1)) + +/* +** read_w8_z18: +** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svfloat16x2_t, + z18 = svread_za16_f16_vg1x2 (w8), + z18 = svread_za16_f16_vg1x2 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx2\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svint16x2_t, + z23 = svread_za16_s16_vg1x2 (w8), + z23 = svread_za16_s16_vg1x2 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svbfloat16x2_t, + z28 = svread_za16_bf16_vg1x2 (w8), + z28 = svread_za16_bf16_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c983f8b0924d0704e87a95a55358d62c58d76e5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za16_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svint16x4_t, + z0 = svread_za16_s16_vg1x4 (0), + z0 = svread_za16_s16_vg1x4 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svuint16x4_t, + z0 = svread_za16_u16_vg1x4 (w0), + z0 = svread_za16_u16_vg1x4 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svfloat16x4_t, + z0 = svread_za16_f16_vg1x4 (w7), + z0 = svread_za16_f16_vg1x4 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svint16x4_t, + z0 = svread_za16_s16_vg1x4 (w8), + z0 = svread_za16_s16_vg1x4 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svuint16x4_t, + z0 = svread_za16_u16_vg1x4 (w11), + z0 = svread_za16_u16_vg1x4 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svbfloat16x4_t, + z0 = svread_za16_bf16_vg1x4 (w12), + z0 = svread_za16_bf16_vg1x4 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svint16x4_t, + z0 = svread_za16_s16_vg1x4 (w8 + 7), + z0 = svread_za16_s16_vg1x4 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svuint16x4_t, + z0 = svread_za16_u16_vg1x4 (w8 + 8), + z0 = svread_za16_u16_vg1x4 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svfloat16x4_t, + z0 = svread_za16_f16_vg1x4 (w8 - 1), + z0 = svread_za16_f16_vg1x4 (w8 - 1)) + +/* +** read_w8_z4: +** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z4, svint16x4_t, + z4 = svread_za16_s16_vg1x4 (w8), + z4 = svread_za16_s16_vg1x4 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z18: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svuint16x4_t, + z18 = svread_za16_u16_vg1x4 (w8), + z18 = svread_za16_u16_vg1x4 (w8)) + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svbfloat16x4_t, + z23 = svread_za16_bf16_vg1x4 (w8), + z23 = svread_za16_bf16_vg1x4 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svint16x4_t, + z28 = svread_za16_s16_vg1x4 (w8), + z28 = svread_za16_s16_vg1x4 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..667f482250db3f56d4b3044127dd36f9f7a259c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svfloat32x2_t, + z0 = svread_za32_f32_vg1x2 (0), + z0 = svread_za32_f32_vg1x2 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svint32x2_t, + z0 = svread_za32_s32_vg1x2 (w0), + z0 = svread_za32_s32_vg1x2 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svuint32x2_t, + z0 = svread_za32_u32_vg1x2 (w7), + z0 = svread_za32_u32_vg1x2 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svfloat32x2_t, + z0 = svread_za32_f32_vg1x2 (w8), + z0 = svread_za32_f32_vg1x2 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svint32x2_t, + z0 = svread_za32_s32_vg1x2 (w11), + z0 = svread_za32_s32_vg1x2 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svuint32x2_t, + z0 = svread_za32_u32_vg1x2 (w12), + z0 = svread_za32_u32_vg1x2 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svfloat32x2_t, + z0 = svread_za32_f32_vg1x2 (w8 + 7), + z0 = svread_za32_f32_vg1x2 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svint32x2_t, + z0 = svread_za32_s32_vg1x2 (w8 + 8), + z0 = svread_za32_s32_vg1x2 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svuint32x2_t, + z0 = svread_za32_u32_vg1x2 (w8 - 1), + z0 = svread_za32_u32_vg1x2 (w8 - 1)) + +/* +** read_w8_z18: +** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svfloat32x2_t, + z18 = svread_za32_f32_vg1x2 (w8), + z18 = svread_za32_f32_vg1x2 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx2\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svint32x2_t, + z23 = svread_za32_s32_vg1x2 (w8), + z23 = svread_za32_s32_vg1x2 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svuint32x2_t, + z28 = svread_za32_u32_vg1x2 (w8), + z28 = svread_za32_u32_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c2579e2e983858ccfd11cb469efb5fa96d88a6bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svint32x4_t, + z0 = svread_za32_s32_vg1x4 (0), + z0 = svread_za32_s32_vg1x4 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svuint32x4_t, + z0 = svread_za32_u32_vg1x4 (w0), + z0 = svread_za32_u32_vg1x4 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svfloat32x4_t, + z0 = svread_za32_f32_vg1x4 (w7), + z0 = svread_za32_f32_vg1x4 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svint32x4_t, + z0 = svread_za32_s32_vg1x4 (w8), + z0 = svread_za32_s32_vg1x4 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svuint32x4_t, + z0 = svread_za32_u32_vg1x4 (w11), + z0 = svread_za32_u32_vg1x4 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svfloat32x4_t, + z0 = svread_za32_f32_vg1x4 (w12), + z0 = svread_za32_f32_vg1x4 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svint32x4_t, + z0 = svread_za32_s32_vg1x4 (w8 + 7), + z0 = svread_za32_s32_vg1x4 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svuint32x4_t, + z0 = svread_za32_u32_vg1x4 (w8 + 8), + z0 = svread_za32_u32_vg1x4 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svfloat32x4_t, + z0 = svread_za32_f32_vg1x4 (w8 - 1), + z0 = svread_za32_f32_vg1x4 (w8 - 1)) + +/* +** read_w8_z4: +** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z4, svint32x4_t, + z4 = svread_za32_s32_vg1x4 (w8), + z4 = svread_za32_s32_vg1x4 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z18: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svuint32x4_t, + z18 = svread_za32_u32_vg1x4 (w8), + z18 = svread_za32_u32_vg1x4 (w8)) + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svfloat32x4_t, + z23 = svread_za32_f32_vg1x4 (w8), + z23 = svread_za32_f32_vg1x4 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svint32x4_t, + z28 = svread_za32_s32_vg1x4 (w8), + z28 = svread_za32_s32_vg1x4 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..15ca800e10f4684919696bf15c334b824024ac85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svfloat64x2_t, + z0 = svread_za64_f64_vg1x2 (0), + z0 = svread_za64_f64_vg1x2 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svint64x2_t, + z0 = svread_za64_s64_vg1x2 (w0), + z0 = svread_za64_s64_vg1x2 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svuint64x2_t, + z0 = svread_za64_u64_vg1x2 (w7), + z0 = svread_za64_u64_vg1x2 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svfloat64x2_t, + z0 = svread_za64_f64_vg1x2 (w8), + z0 = svread_za64_f64_vg1x2 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svint64x2_t, + z0 = svread_za64_s64_vg1x2 (w11), + z0 = svread_za64_s64_vg1x2 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svuint64x2_t, + z0 = svread_za64_u64_vg1x2 (w12), + z0 = svread_za64_u64_vg1x2 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svfloat64x2_t, + z0 = svread_za64_f64_vg1x2 (w8 + 7), + z0 = svread_za64_f64_vg1x2 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svint64x2_t, + z0 = svread_za64_s64_vg1x2 (w8 + 8), + z0 = svread_za64_s64_vg1x2 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svuint64x2_t, + z0 = svread_za64_u64_vg1x2 (w8 - 1), + z0 = svread_za64_u64_vg1x2 (w8 - 1)) + +/* +** read_w8_z18: +** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svfloat64x2_t, + z18 = svread_za64_f64_vg1x2 (w8), + z18 = svread_za64_f64_vg1x2 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx2\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svint64x2_t, + z23 = svread_za64_s64_vg1x2 (w8), + z23 = svread_za64_s64_vg1x2 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svuint64x2_t, + z28 = svread_za64_u64_vg1x2 (w8), + z28 = svread_za64_u64_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5b7684e9b26442d36ae67e06684dbc08c772d062 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za64_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svint64x4_t, + z0 = svread_za64_s64_vg1x4 (0), + z0 = svread_za64_s64_vg1x4 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svuint64x4_t, + z0 = svread_za64_u64_vg1x4 (w0), + z0 = svread_za64_u64_vg1x4 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svfloat64x4_t, + z0 = svread_za64_f64_vg1x4 (w7), + z0 = svread_za64_f64_vg1x4 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svint64x4_t, + z0 = svread_za64_s64_vg1x4 (w8), + z0 = svread_za64_s64_vg1x4 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svuint64x4_t, + z0 = svread_za64_u64_vg1x4 (w11), + z0 = svread_za64_u64_vg1x4 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svfloat64x4_t, + z0 = svread_za64_f64_vg1x4 (w12), + z0 = svread_za64_f64_vg1x4 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svint64x4_t, + z0 = svread_za64_s64_vg1x4 (w8 + 7), + z0 = svread_za64_s64_vg1x4 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svuint64x4_t, + z0 = svread_za64_u64_vg1x4 (w8 + 8), + z0 = svread_za64_u64_vg1x4 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svfloat64x4_t, + z0 = svread_za64_f64_vg1x4 (w8 - 1), + z0 = svread_za64_f64_vg1x4 (w8 - 1)) + +/* +** read_w8_z4: +** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z4, svint64x4_t, + z4 = svread_za64_s64_vg1x4 (w8), + z4 = svread_za64_s64_vg1x4 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z18: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svuint64x4_t, + z18 = svread_za64_u64_vg1x4 (w8), + z18 = svread_za64_u64_vg1x4 (w8)) + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svfloat64x4_t, + z23 = svread_za64_f64_vg1x4 (w8), + z23 = svread_za64_f64_vg1x4 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svint64x4_t, + z28 = svread_za64_s64_vg1x4 (w8), + z28 = svread_za64_s64_vg1x4 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9b151abf4faef2483adae9e1ce523cd955ee2580 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (0), + z0 = svread_za8_s8_vg1x2 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (w0), + z0 = svread_za8_s8_vg1x2 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svuint8x2_t, + z0 = svread_za8_u8_vg1x2 (w7), + z0 = svread_za8_u8_vg1x2 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (w8), + z0 = svread_za8_s8_vg1x2 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z1\.d}, za\.d\[w11, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (w11), + z0 = svread_za8_s8_vg1x2 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svuint8x2_t, + z0 = svread_za8_u8_vg1x2 (w12), + z0 = svread_za8_u8_vg1x2 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z1\.d}, za\.d\[w8, 7, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (w8 + 7), + z0 = svread_za8_s8_vg1x2 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svint8x2_t, + z0 = svread_za8_s8_vg1x2 (w8 + 8), + z0 = svread_za8_s8_vg1x2 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z1\.d}, za\.d\[\1, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svuint8x2_t, + z0 = svread_za8_u8_vg1x2 (w8 - 1), + z0 = svread_za8_u8_vg1x2 (w8 - 1)) + +/* +** read_w8_z18: +** mova {z18\.d - z19\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svuint8x2_t, + z18 = svread_za8_u8_vg1x2 (w8), + z18 = svread_za8_u8_vg1x2 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx2\] +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svint8x2_t, + z23 = svread_za8_s8_vg1x2 (w8), + z23 = svread_za8_s8_vg1x2 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z29\.d}, za\.d\[w8, 0, vgx2\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svuint8x2_t, + z28 = svread_za8_u8_vg1x2 (w8), + z28 = svread_za8_u8_vg1x2 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..80c81dde097d664181ba6d97624db81e99bc57e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** read_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_0_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (0), + z0 = svread_za8_s8_vg1x4 (0)) + +/* +** read_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w0_z0, svuint8x4_t, + z0 = svread_za8_u8_vg1x4 (w0), + z0 = svread_za8_u8_vg1x4 (w0)) + +/* +** read_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w7_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (w7), + z0 = svread_za8_s8_vg1x4 (w7)) + +/* +** read_w8_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (w8), + z0 = svread_za8_s8_vg1x4 (w8)) + +/* +** read_w11_z0: +** mova {z0\.d - z3\.d}, za\.d\[w11, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w11_z0, svuint8x4_t, + z0 = svread_za8_u8_vg1x4 (w11), + z0 = svread_za8_u8_vg1x4 (w11)) + + +/* +** read_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w12_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (w12), + z0 = svread_za8_s8_vg1x4 (w12)) + +/* +** read_w8p7_z0: +** mova {z0\.d - z3\.d}, za\.d\[w8, 7, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p7_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (w8 + 7), + z0 = svread_za8_s8_vg1x4 (w8 + 7)) + +/* +** read_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8p8_z0, svuint8x4_t, + z0 = svread_za8_u8_vg1x4 (w8 + 8), + z0 = svread_za8_u8_vg1x4 (w8 + 8)) + +/* +** read_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova {z0\.d - z3\.d}, za\.d\[\1, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8m1_z0, svint8x4_t, + z0 = svread_za8_s8_vg1x4 (w8 - 1), + z0 = svread_za8_s8_vg1x4 (w8 - 1)) + +/* +** read_w8_z4: +** mova {z4\.d - z7\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z4, svint8x4_t, + z4 = svread_za8_s8_vg1x4 (w8), + z4 = svread_za8_s8_vg1x4 (w8)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** read_w8_z18: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z18, svuint8x4_t, + z18 = svread_za8_u8_vg1x4 (w8), + z18 = svread_za8_u8_vg1x4 (w8)) + +/* +** read_w8_z23: +** mova [^\n]+, za\.d\[w8, 0, vgx4\] +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_READ_ZA_XN (read_w8_z23, svuint8x4_t, + z23 = svread_za8_u8_vg1x4 (w8), + z23 = svread_za8_u8_vg1x4 (w8)) + +/* +** read_w8_z28: +** mova {z28\.d - z31\.d}, za\.d\[w8, 0, vgx4\] +** ret +*/ +TEST_READ_ZA_XN (read_w8_z28, svint8x4_t, + z28 = svread_za8_s8_vg1x4 (w8), + z28 = svread_za8_s8_vg1x4 (w8)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..48017f0e6beae36235665ab8a1e3d3bcc3b9a0cc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x2.c @@ -0,0 +1,61 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rinta_z0_z0: +** frinta {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rinta_z0_z0, svfloat32x2_t, z0, + svrinta_f32_x2 (z0), + svrinta (z0)) + +/* +** rinta_z0_z4: +** frinta {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rinta_z0_z4, svfloat32x2_t, z0, + svrinta_f32_x2 (z4), + svrinta (z4)) + +/* +** rinta_z4_z18: +** frinta {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (rinta_z4_z18, svfloat32x2_t, z4, + svrinta_f32_x2 (z18), + svrinta (z18)) + +/* +** rinta_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** frinta {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_XN (rinta_z18_z23, svfloat32x2_t, z18, + svrinta_f32_x2 (z23), + svrinta (z23)) + +/* +** rinta_z23_z28: +** frinta [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rinta_z23_z28, svfloat32x2_t, z23, + svrinta_f32_x2 (z28), + svrinta (z28)) + +/* +** rinta_z28_z0: +** frinta {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rinta_z28_z0, svfloat32x2_t, z28, + svrinta_f32_x2 (z0), + svrinta (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..94e3a06084109a6949fae45341f0ebf8fd7f182a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rinta_s32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rinta_z0_z0: +** frinta {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rinta_z0_z0, svfloat32x4_t, z0, + svrinta_f32_x4 (z0), + svrinta (z0)) + +/* +** rinta_z0_z4: +** frinta {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rinta_z0_z4, svfloat32x4_t, z0, + svrinta_f32_x4 (z4), + svrinta (z4)) + +/* +** rinta_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frinta {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (rinta_z4_z18, svfloat32x4_t, z4, + svrinta_f32_x4 (z18), + svrinta (z18)) + +/* +** rinta_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frinta {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rinta_z18_z23, svfloat32x4_t, z18, + svrinta_f32_x4 (z23), + svrinta (z23)) + +/* +** rinta_z23_z28: +** frinta [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rinta_z23_z28, svfloat32x4_t, z23, + svrinta_f32_x4 (z28), + svrinta (z28)) + +/* +** rinta_z28_z0: +** frinta {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rinta_z28_z0, svfloat32x4_t, z28, + svrinta_f32_x4 (z0), + svrinta (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..db41ef2285002f0e101fb6ee674a8c66450e6bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x2.c @@ -0,0 +1,61 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintm_z0_z0: +** frintm {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintm_z0_z0, svfloat32x2_t, z0, + svrintm_f32_x2 (z0), + svrintm (z0)) + +/* +** rintm_z0_z4: +** frintm {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rintm_z0_z4, svfloat32x2_t, z0, + svrintm_f32_x2 (z4), + svrintm (z4)) + +/* +** rintm_z4_z18: +** frintm {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (rintm_z4_z18, svfloat32x2_t, z4, + svrintm_f32_x2 (z18), + svrintm (z18)) + +/* +** rintm_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** frintm {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintm_z18_z23, svfloat32x2_t, z18, + svrintm_f32_x2 (z23), + svrintm (z23)) + +/* +** rintm_z23_z28: +** frintm [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintm_z23_z28, svfloat32x2_t, z23, + svrintm_f32_x2 (z28), + svrintm (z28)) + +/* +** rintm_z28_z0: +** frintm {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintm_z28_z0, svfloat32x2_t, z28, + svrintm_f32_x2 (z0), + svrintm (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..be1efe3326eadc62bf504cfa5d51d9f45bcb8933 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintm_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintm_z0_z0: +** frintm {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintm_z0_z0, svfloat32x4_t, z0, + svrintm_f32_x4 (z0), + svrintm (z0)) + +/* +** rintm_z0_z4: +** frintm {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rintm_z0_z4, svfloat32x4_t, z0, + svrintm_f32_x4 (z4), + svrintm (z4)) + +/* +** rintm_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintm {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintm_z4_z18, svfloat32x4_t, z4, + svrintm_f32_x4 (z18), + svrintm (z18)) + +/* +** rintm_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintm {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintm_z18_z23, svfloat32x4_t, z18, + svrintm_f32_x4 (z23), + svrintm (z23)) + +/* +** rintm_z23_z28: +** frintm [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintm_z23_z28, svfloat32x4_t, z23, + svrintm_f32_x4 (z28), + svrintm (z28)) + +/* +** rintm_z28_z0: +** frintm {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintm_z28_z0, svfloat32x4_t, z28, + svrintm_f32_x4 (z0), + svrintm (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c54541b77870e6366d52dfa4da03b89e56d2feda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x2.c @@ -0,0 +1,61 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintn_z0_z0: +** frintn {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintn_z0_z0, svfloat32x2_t, z0, + svrintn_f32_x2 (z0), + svrintn (z0)) + +/* +** rintn_z0_z4: +** frintn {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rintn_z0_z4, svfloat32x2_t, z0, + svrintn_f32_x2 (z4), + svrintn (z4)) + +/* +** rintn_z4_z18: +** frintn {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (rintn_z4_z18, svfloat32x2_t, z4, + svrintn_f32_x2 (z18), + svrintn (z18)) + +/* +** rintn_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** frintn {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintn_z18_z23, svfloat32x2_t, z18, + svrintn_f32_x2 (z23), + svrintn (z23)) + +/* +** rintn_z23_z28: +** frintn [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintn_z23_z28, svfloat32x2_t, z23, + svrintn_f32_x2 (z28), + svrintn (z28)) + +/* +** rintn_z28_z0: +** frintn {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintn_z28_z0, svfloat32x2_t, z28, + svrintn_f32_x2 (z0), + svrintn (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ff6e55bbff01d2dea7b401993dd3a6abbc960c65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintn_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintn_z0_z0: +** frintn {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintn_z0_z0, svfloat32x4_t, z0, + svrintn_f32_x4 (z0), + svrintn (z0)) + +/* +** rintn_z0_z4: +** frintn {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rintn_z0_z4, svfloat32x4_t, z0, + svrintn_f32_x4 (z4), + svrintn (z4)) + +/* +** rintn_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintn {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintn_z4_z18, svfloat32x4_t, z4, + svrintn_f32_x4 (z18), + svrintn (z18)) + +/* +** rintn_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintn {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintn_z18_z23, svfloat32x4_t, z18, + svrintn_f32_x4 (z23), + svrintn (z23)) + +/* +** rintn_z23_z28: +** frintn [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintn_z23_z28, svfloat32x4_t, z23, + svrintn_f32_x4 (z28), + svrintn (z28)) + +/* +** rintn_z28_z0: +** frintn {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintn_z28_z0, svfloat32x4_t, z28, + svrintn_f32_x4 (z0), + svrintn (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..722c0854e7bd35ceddf78d0910a6247e976fa351 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x2.c @@ -0,0 +1,61 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintp_z0_z0: +** frintp {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintp_z0_z0, svfloat32x2_t, z0, + svrintp_f32_x2 (z0), + svrintp (z0)) + +/* +** rintp_z0_z4: +** frintp {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rintp_z0_z4, svfloat32x2_t, z0, + svrintp_f32_x2 (z4), + svrintp (z4)) + +/* +** rintp_z4_z18: +** frintp {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (rintp_z4_z18, svfloat32x2_t, z4, + svrintp_f32_x2 (z18), + svrintp (z18)) + +/* +** rintp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** frintp {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintp_z18_z23, svfloat32x2_t, z18, + svrintp_f32_x2 (z23), + svrintp (z23)) + +/* +** rintp_z23_z28: +** frintp [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintp_z23_z28, svfloat32x2_t, z23, + svrintp_f32_x2 (z28), + svrintp (z28)) + +/* +** rintp_z28_z0: +** frintp {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rintp_z28_z0, svfloat32x2_t, z28, + svrintp_f32_x2 (z0), + svrintp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7c182575de724977cb8e88eb2eaef45774000312 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rintp_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rintp_z0_z0: +** frintp {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintp_z0_z0, svfloat32x4_t, z0, + svrintp_f32_x4 (z0), + svrintp (z0)) + +/* +** rintp_z0_z4: +** frintp {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rintp_z0_z4, svfloat32x4_t, z0, + svrintp_f32_x4 (z4), + svrintp (z4)) + +/* +** rintp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintp {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (rintp_z4_z18, svfloat32x4_t, z4, + svrintp_f32_x4 (z18), + svrintp (z18)) + +/* +** rintp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** frintp {z[^\n]+}, {z.*} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintp_z18_z23, svfloat32x4_t, z18, + svrintp_f32_x4 (z23), + svrintp (z23)) + +/* +** rintp_z23_z28: +** frintp [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rintp_z23_z28, svfloat32x4_t, z23, + svrintp_f32_x4 (z28), + svrintp (z28)) + +/* +** rintp_z28_z0: +** frintp {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rintp_z28_z0, svfloat32x4_t, z28, + svrintp_f32_x4 (z0), + svrintp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..28fe768421e3833b45f86393064ab5c3ef247546 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint16x2_t, z0, + svrshl_s16_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint16x2_t, z0, + svrshl_s16_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.h - z29\.h} +** | +** srshl [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint16x2_t, z0, + svrshl_s16_x2 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** srshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint16x2_t, z18, + svrshl_s16_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z18, svint16x2_t, z23, + svrshl_s16_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint16x2_t, z28, + svrshl_s16_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint16x2_t, z0, + svrshl_s16_x2 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** srshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint16x2_t, z4, + svrshl_s16_x2 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint16x2_t, svint16_t, z24, + svrshl_single_s16_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** srshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint16x2_t, svint16_t, z24, + svrshl_single_s16_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint16x2_t, svint16_t, z24, + svrshl_single_s16_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint16x2_t, svint16_t, z1, + svrshl_single_s16_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint16x2_t, svint16_t, z1, + svrshl_single_s16_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** srshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint16x2_t, svint16_t, z18, + svrshl_single_s16_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint16x2_t, svint16_t, + z0_res = svrshl_single_s16_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint16x2_t, svint16_t, + z0 = svrshl_single_s16_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint16x2_t, svint16_t, z24, + svrshl_single_s16_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..afa3ba92f98e67ae82ee65721882012240960133 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s16_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint16x4_t, z0, + svrshl_s16_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint16x4_t, z0, + svrshl_s16_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.h - z31\.h} +** | +** srshl [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint16x4_t, z0, + svrshl_s16_x4 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint16x4_t, z18, + svrshl_s16_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z28, svint16x4_t, z23, + svrshl_s16_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint16x4_t, z28, + svrshl_s16_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint16x4_t, z0, + svrshl_s16_x4 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** srshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint16x4_t, z4, + svrshl_s16_x4 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint16x4_t, svint16_t, z24, + svrshl_single_s16_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** srshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint16x4_t, svint16_t, z24, + svrshl_single_s16_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint16x4_t, svint16_t, z24, + svrshl_single_s16_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint16x4_t, svint16_t, z1, + svrshl_single_s16_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint16x4_t, svint16_t, z1, + svrshl_single_s16_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint16x4_t, svint16_t, z18, + svrshl_single_s16_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint16x4_t, svint16_t, + z0_res = svrshl_single_s16_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint16x4_t, svint16_t, + z0 = svrshl_single_s16_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint16x4_t, svint16_t, z24, + svrshl_single_s16_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e67d14f76bbf5b6bc25e50663cb94bfb1c663bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint32x2_t, z0, + svrshl_s32_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint32x2_t, z0, + svrshl_s32_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.s - z29\.s} +** | +** srshl [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint32x2_t, z0, + svrshl_s32_x2 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** srshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint32x2_t, z18, + svrshl_s32_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z18, svint32x2_t, z23, + svrshl_s32_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint32x2_t, z28, + svrshl_s32_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint32x2_t, z0, + svrshl_s32_x2 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** srshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint32x2_t, z4, + svrshl_s32_x2 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint32x2_t, svint32_t, z24, + svrshl_single_s32_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** srshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint32x2_t, svint32_t, z24, + svrshl_single_s32_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint32x2_t, svint32_t, z24, + svrshl_single_s32_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint32x2_t, svint32_t, z1, + svrshl_single_s32_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint32x2_t, svint32_t, z1, + svrshl_single_s32_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** srshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint32x2_t, svint32_t, z18, + svrshl_single_s32_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint32x2_t, svint32_t, + z0_res = svrshl_single_s32_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint32x2_t, svint32_t, + z0 = svrshl_single_s32_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint32x2_t, svint32_t, z24, + svrshl_single_s32_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..676d9ebc0995e4e72c1f5b1c52d79b5afe6752aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s32_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint32x4_t, z0, + svrshl_s32_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint32x4_t, z0, + svrshl_s32_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.s - z31\.s} +** | +** srshl [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint32x4_t, z0, + svrshl_s32_x4 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint32x4_t, z18, + svrshl_s32_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z28, svint32x4_t, z23, + svrshl_s32_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint32x4_t, z28, + svrshl_s32_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint32x4_t, z0, + svrshl_s32_x4 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** srshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint32x4_t, z4, + svrshl_s32_x4 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint32x4_t, svint32_t, z24, + svrshl_single_s32_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** srshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint32x4_t, svint32_t, z24, + svrshl_single_s32_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint32x4_t, svint32_t, z24, + svrshl_single_s32_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint32x4_t, svint32_t, z1, + svrshl_single_s32_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint32x4_t, svint32_t, z1, + svrshl_single_s32_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint32x4_t, svint32_t, z18, + svrshl_single_s32_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint32x4_t, svint32_t, + z0_res = svrshl_single_s32_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint32x4_t, svint32_t, + z0 = svrshl_single_s32_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint32x4_t, svint32_t, z24, + svrshl_single_s32_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ce12ebd3d6790b79b85f4506209dbd612b341635 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint64x2_t, z0, + svrshl_s64_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint64x2_t, z0, + svrshl_s64_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.d - z29\.d} +** | +** srshl [^\n]+, {z28\.d - z29\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint64x2_t, z0, + svrshl_s64_x2 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** srshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint64x2_t, z18, + svrshl_s64_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z18, svint64x2_t, z23, + svrshl_s64_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint64x2_t, z28, + svrshl_s64_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint64x2_t, z0, + svrshl_s64_x2 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** srshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint64x2_t, z4, + svrshl_s64_x2 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint64x2_t, svint64_t, z24, + svrshl_single_s64_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** srshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint64x2_t, svint64_t, z24, + svrshl_single_s64_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint64x2_t, svint64_t, z24, + svrshl_single_s64_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint64x2_t, svint64_t, z1, + svrshl_single_s64_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint64x2_t, svint64_t, z1, + svrshl_single_s64_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** srshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint64x2_t, svint64_t, z18, + svrshl_single_s64_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint64x2_t, svint64_t, + z0_res = svrshl_single_s64_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint64x2_t, svint64_t, + z0 = svrshl_single_s64_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint64x2_t, svint64_t, z24, + svrshl_single_s64_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9494302c382b7e49ea8af07041484e95cd356c69 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s64_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint64x4_t, z0, + svrshl_s64_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint64x4_t, z0, + svrshl_s64_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.d - z31\.d} +** | +** srshl [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint64x4_t, z0, + svrshl_s64_x4 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint64x4_t, z18, + svrshl_s64_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z28, svint64x4_t, z23, + svrshl_s64_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint64x4_t, z28, + svrshl_s64_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint64x4_t, z0, + svrshl_s64_x4 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** srshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint64x4_t, z4, + svrshl_s64_x4 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint64x4_t, svint64_t, z24, + svrshl_single_s64_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** srshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint64x4_t, svint64_t, z24, + svrshl_single_s64_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint64x4_t, svint64_t, z24, + svrshl_single_s64_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint64x4_t, svint64_t, z1, + svrshl_single_s64_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint64x4_t, svint64_t, z1, + svrshl_single_s64_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint64x4_t, svint64_t, z18, + svrshl_single_s64_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint64x4_t, svint64_t, + z0_res = svrshl_single_s64_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint64x4_t, svint64_t, + z0 = svrshl_single_s64_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint64x4_t, svint64_t, z24, + svrshl_single_s64_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..49e7b9fd37c029b8d20e0e30323c491b582bed7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint8x2_t, z0, + svrshl_s8_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint8x2_t, z0, + svrshl_s8_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.b - z29\.b} +** | +** srshl [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint8x2_t, z0, + svrshl_s8_x2 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** srshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint8x2_t, z18, + svrshl_s8_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z18, svint8x2_t, z23, + svrshl_s8_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint8x2_t, z28, + svrshl_s8_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint8x2_t, z0, + svrshl_s8_x2 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** | +** srshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint8x2_t, z4, + svrshl_s8_x2 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint8x2_t, svint8_t, z24, + svrshl_single_s8_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** srshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint8x2_t, svint8_t, z24, + svrshl_single_s8_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint8x2_t, svint8_t, z24, + svrshl_single_s8_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint8x2_t, svint8_t, z1, + svrshl_single_s8_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint8x2_t, svint8_t, z1, + svrshl_single_s8_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** srshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint8x2_t, svint8_t, z18, + svrshl_single_s8_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint8x2_t, svint8_t, + z0_res = svrshl_single_s8_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint8x2_t, svint8_t, + z0 = svrshl_single_s8_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint8x2_t, svint8_t, z24, + svrshl_single_s8_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ee33999ec0de359bab99a365f4c07193cfb39f2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_s8_x4.c @@ -0,0 +1,249 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (rshl_z0_z0_z4, svint8x4_t, z0, + svrshl_s8_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z0_z4_z0: +** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (rshl_z0_z4_z0, svint8x4_t, z0, + svrshl_s8_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z4_z28: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.b - z31\.b} +** | +** srshl [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z4_z28, svint8x4_t, z0, + svrshl_s8_x4 (z4, z28), + svrshl (z4, z28)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z18_z18_z4, svint8x4_t, z18, + svrshl_s8_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (rshl_z23_z23_z28, svint8x4_t, z23, + svrshl_s8_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z0: +** srshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (rshl_z28_z28_z0, svint8x4_t, z28, + svrshl_s8_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z0_z0_z18, svint8x4_t, z0, + svrshl_s8_x4 (z0, z18), + svrshl (z0, z18)) + +/* +** rshl_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** srshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (rshl_z4_z4_z23, svint8x4_t, z4, + svrshl_s8_x4 (z4, z23), + svrshl (z4, z23)) + +/* +** rshl_single_z24_z24_z0: +** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svint8x4_t, svint8_t, z24, + svrshl_single_s8_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** srshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svint8x4_t, svint8_t, z24, + svrshl_single_s8_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svint8x4_t, svint8_t, z24, + svrshl_single_s8_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svint8x4_t, svint8_t, z1, + svrshl_single_s8_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svint8x4_t, svint8_t, z1, + svrshl_single_s8_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** srshl [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svint8x4_t, svint8_t, z18, + svrshl_single_s8_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** srshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svint8x4_t, svint8_t, + z0_res = svrshl_single_s8_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** srshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svint8x4_t, svint8_t, + z0 = svrshl_single_s8_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** srshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svint8x4_t, svint8_t, z24, + svrshl_single_s8_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..89fc1ec034b54dee50a1539b4ac241505ddc02d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint16x2_t, svint16x2_t, z0, + svrshl_u16_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint16x2_t, svuint16x2_t, z4, + svrshl_u16_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.h - z5\.h} +** | +** urshl [^\n]+, {z4\.h - z5\.h} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z28_z4, svuint16x2_t, svint16x2_t, z0, + svrshl_u16_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z18_z18_z4: +** urshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint16x2_t, svint16x2_t, z18, + svrshl_u16_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z18, svint16x2_t, svuint16x2_t, z23, + svrshl_u16_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint16x2_t, svint16x2_t, z28, + svrshl_u16_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** urshl {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint16x2_t, svuint16x2_t, z4, + svrshl_u16_x2 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+ +** | +** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z23, svuint16x2_t, svint16x2_t, z28, + svrshl_u16_x2 (z28, z23), + svrshl (z28, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint16x2_t, svint16_t, z24, + svrshl_single_u16_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** urshl {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint16x2_t, svint16_t, z24, + svrshl_single_u16_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint16x2_t, svint16_t, z24, + svrshl_single_u16_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint16x2_t, svint16_t, z1, + svrshl_single_u16_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint16x2_t, svint16_t, z1, + svrshl_single_u16_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** urshl {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint16x2_t, svint16_t, z18, + svrshl_single_u16_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint16x2_t, svint16_t, + z0_res = svrshl_single_u16_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint16x2_t, svint16_t, + z0 = svrshl_single_u16_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint16x2_t, svint16_t, z24, + svrshl_single_u16_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b9976b291c3d9e522b58711a340619f4c3984485 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u16_x4.c @@ -0,0 +1,228 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint16x4_t, svint16x4_t, z0, + svrshl_u16_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint16x4_t, svuint16x4_t, z4, + svrshl_u16_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint16x4_t, svint16x4_t, z18, + svrshl_u16_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z28, svint16x4_t, svuint16x4_t, z23, + svrshl_u16_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint16x4_t, svint16x4_t, z28, + svrshl_u16_x4 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** urshl {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint16x4_t, svuint16x4_t, z4, + svrshl_u16_x4 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z23, svuint16x4_t, svint16x4_t, z0, + svrshl_u16_x4 (z0, z23), + svrshl (z0, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint16x4_t, svint16_t, z24, + svrshl_single_u16_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** urshl {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint16x4_t, svint16_t, z24, + svrshl_single_u16_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint16x4_t, svint16_t, z24, + svrshl_single_u16_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint16x4_t, svint16_t, z1, + svrshl_single_u16_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint16x4_t, svint16_t, z1, + svrshl_single_u16_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint16x4_t, svint16_t, z18, + svrshl_single_u16_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint16x4_t, svint16_t, + z0_res = svrshl_single_u16_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint16x4_t, svint16_t, + z0 = svrshl_single_u16_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint16x4_t, svint16_t, z24, + svrshl_single_u16_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..431e608d6cf07610ff5b50d462bcbd919bc6b507 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint32x2_t, svint32x2_t, z0, + svrshl_u32_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint32x2_t, svuint32x2_t, z4, + svrshl_u32_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.s - z5\.s} +** | +** urshl [^\n]+, {z4\.s - z5\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z28_z4, svuint32x2_t, svint32x2_t, z0, + svrshl_u32_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z18_z18_z4: +** urshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint32x2_t, svint32x2_t, z18, + svrshl_u32_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z18, svint32x2_t, svuint32x2_t, z23, + svrshl_u32_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint32x2_t, svint32x2_t, z28, + svrshl_u32_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** urshl {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint32x2_t, svuint32x2_t, z4, + svrshl_u32_x2 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+ +** | +** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z23, svuint32x2_t, svint32x2_t, z28, + svrshl_u32_x2 (z28, z23), + svrshl (z28, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint32x2_t, svint32_t, z24, + svrshl_single_u32_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** urshl {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint32x2_t, svint32_t, z24, + svrshl_single_u32_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint32x2_t, svint32_t, z24, + svrshl_single_u32_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint32x2_t, svint32_t, z1, + svrshl_single_u32_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint32x2_t, svint32_t, z1, + svrshl_single_u32_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** urshl {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint32x2_t, svint32_t, z18, + svrshl_single_u32_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint32x2_t, svint32_t, + z0_res = svrshl_single_u32_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint32x2_t, svint32_t, + z0 = svrshl_single_u32_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint32x2_t, svint32_t, z24, + svrshl_single_u32_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6f069736a80c2c6691e8d5a61e21b9c6233a4f39 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u32_x4.c @@ -0,0 +1,228 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint32x4_t, svint32x4_t, z0, + svrshl_u32_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint32x4_t, svuint32x4_t, z4, + svrshl_u32_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint32x4_t, svint32x4_t, z18, + svrshl_u32_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z28, svint32x4_t, svuint32x4_t, z23, + svrshl_u32_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint32x4_t, svint32x4_t, z28, + svrshl_u32_x4 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** urshl {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint32x4_t, svuint32x4_t, z4, + svrshl_u32_x4 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z23, svuint32x4_t, svint32x4_t, z0, + svrshl_u32_x4 (z0, z23), + svrshl (z0, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint32x4_t, svint32_t, z24, + svrshl_single_u32_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** urshl {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint32x4_t, svint32_t, z24, + svrshl_single_u32_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint32x4_t, svint32_t, z24, + svrshl_single_u32_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint32x4_t, svint32_t, z1, + svrshl_single_u32_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint32x4_t, svint32_t, z1, + svrshl_single_u32_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint32x4_t, svint32_t, z18, + svrshl_single_u32_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint32x4_t, svint32_t, + z0_res = svrshl_single_u32_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint32x4_t, svint32_t, + z0 = svrshl_single_u32_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint32x4_t, svint32_t, z24, + svrshl_single_u32_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b9014161e2e631d489060f18acf85d7f006c1f54 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint64x2_t, svint64x2_t, z0, + svrshl_u64_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint64x2_t, svuint64x2_t, z4, + svrshl_u64_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.d - z5\.d} +** | +** urshl [^\n]+, {z4\.d - z5\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z28_z4, svuint64x2_t, svint64x2_t, z0, + svrshl_u64_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z18_z18_z4: +** urshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint64x2_t, svint64x2_t, z18, + svrshl_u64_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z18, svint64x2_t, svuint64x2_t, z23, + svrshl_u64_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint64x2_t, svint64x2_t, z28, + svrshl_u64_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** urshl {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint64x2_t, svuint64x2_t, z4, + svrshl_u64_x2 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+ +** | +** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z23, svuint64x2_t, svint64x2_t, z28, + svrshl_u64_x2 (z28, z23), + svrshl (z28, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint64x2_t, svint64_t, z24, + svrshl_single_u64_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** urshl {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint64x2_t, svint64_t, z24, + svrshl_single_u64_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint64x2_t, svint64_t, z24, + svrshl_single_u64_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint64x2_t, svint64_t, z1, + svrshl_single_u64_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint64x2_t, svint64_t, z1, + svrshl_single_u64_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** urshl {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint64x2_t, svint64_t, z18, + svrshl_single_u64_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint64x2_t, svint64_t, + z0_res = svrshl_single_u64_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint64x2_t, svint64_t, + z0 = svrshl_single_u64_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint64x2_t, svint64_t, z24, + svrshl_single_u64_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7bb99a4467db1f60511a9aca069eba1487974a58 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u64_x4.c @@ -0,0 +1,228 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint64x4_t, svint64x4_t, z0, + svrshl_u64_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint64x4_t, svuint64x4_t, z4, + svrshl_u64_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint64x4_t, svint64x4_t, z18, + svrshl_u64_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z28, svint64x4_t, svuint64x4_t, z23, + svrshl_u64_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint64x4_t, svint64x4_t, z28, + svrshl_u64_x4 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** urshl {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint64x4_t, svuint64x4_t, z4, + svrshl_u64_x4 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z23, svuint64x4_t, svint64x4_t, z0, + svrshl_u64_x4 (z0, z23), + svrshl (z0, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint64x4_t, svint64_t, z24, + svrshl_single_u64_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** urshl {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint64x4_t, svint64_t, z24, + svrshl_single_u64_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint64x4_t, svint64_t, z24, + svrshl_single_u64_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint64x4_t, svint64_t, z1, + svrshl_single_u64_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint64x4_t, svint64_t, z1, + svrshl_single_u64_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint64x4_t, svint64_t, z18, + svrshl_single_u64_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint64x4_t, svint64_t, + z0_res = svrshl_single_u64_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint64x4_t, svint64_t, + z0 = svrshl_single_u64_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint64x4_t, svint64_t, z24, + svrshl_single_u64_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e96c41f8555963b128737ac380abbec5ca4e646f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x2.c @@ -0,0 +1,207 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint8x2_t, svint8x2_t, z0, + svrshl_u8_x2 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint8x2_t, svuint8x2_t, z4, + svrshl_u8_x2 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.b - z5\.b} +** | +** urshl [^\n]+, {z4\.b - z5\.b} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z28_z4, svuint8x2_t, svint8x2_t, z0, + svrshl_u8_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z18_z18_z4: +** urshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint8x2_t, svint8x2_t, z18, + svrshl_u8_x2 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z18, svint8x2_t, svuint8x2_t, z23, + svrshl_u8_x2 (z23, z18), + svrshl (z23, z18)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint8x2_t, svint8x2_t, z28, + svrshl_u8_x2 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** urshl {z4\.b - z5\.b}, {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint8x2_t, svuint8x2_t, z4, + svrshl_u8_x2 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, [^\n]+ +** | +** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z23, svuint8x2_t, svint8x2_t, z28, + svrshl_u8_x2 (z28, z23), + svrshl (z28, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint8x2_t, svint8_t, z24, + svrshl_single_u8_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** | +** urshl {z28\.b - z29\.b}, {z28\.b - z29\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint8x2_t, svint8_t, z24, + svrshl_single_u8_x2 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint8x2_t, svint8_t, z24, + svrshl_single_u8_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, z0\.b +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint8x2_t, svint8_t, z1, + svrshl_single_u8_x2 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint8x2_t, svint8_t, z1, + svrshl_single_u8_x2 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** urshl {z18\.b - z19\.b}, {z18\.b - z19\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint8x2_t, svint8_t, z18, + svrshl_single_u8_x2 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint8x2_t, svint8_t, + z0_res = svrshl_single_u8_x2 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.b - z1\.b}, {z0\.b - z1\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint8x2_t, svint8_t, + z0 = svrshl_single_u8_x2 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.b - z25\.b}, {z24\.b - z25\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint8x2_t, svint8_t, z24, + svrshl_single_u8_x2 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b0968f6ce6af6e9113599c7fb78d422d5fef119f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/rshl_u8_x4.c @@ -0,0 +1,228 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** rshl_z0_z0_z4: +** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z4, svuint8x4_t, svint8x4_t, z0, + svrshl_u8_x4 (z0, z4), + svrshl (z0, z4)) + +/* +** rshl_z4_z4_z0: +** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z0, svint8x4_t, svuint8x4_t, z4, + svrshl_u8_x4 (z4, z0), + svrshl (z4, z0)) + +/* +** rshl_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z18_z18_z4, svuint8x4_t, svint8x4_t, z18, + svrshl_u8_x4 (z18, z4), + svrshl (z18, z4)) + +/* +** rshl_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (rshl_z23_z23_z28, svint8x4_t, svuint8x4_t, z23, + svrshl_u8_x4 (z23, z28), + svrshl (z23, z28)) + +/* +** rshl_z28_z28_z4: +** urshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_DUAL_XN (rshl_z28_z28_z4, svuint8x4_t, svint8x4_t, z28, + svrshl_u8_x4 (z28, z4), + svrshl (z28, z4)) + +/* +** rshl_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** | +** urshl {z4\.b - z7\.b}, {z4\.b - z7\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z4_z4_z18, svint8x4_t, svuint8x4_t, z4, + svrshl_u8_x4 (z4, z18), + svrshl (z4, z18)) + +/* +** rshl_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** | +** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (rshl_z0_z0_z23, svuint8x4_t, svint8x4_t, z0, + svrshl_u8_x4 (z0, z23), + svrshl (z0, z23)) + +/* +** rshl_single_z24_z24_z0: +** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z0, svuint8x4_t, svint8_t, z24, + svrshl_single_u8_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** | +** urshl {z28\.b - z31\.b}, {z28\.b - z31\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z28_z0, svuint8x4_t, svint8_t, z24, + svrshl_single_u8_x4 (z28, z0), + svrshl (z28, z0)) + +/* +** rshl_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z1_z0, svuint8x4_t, svint8_t, z24, + svrshl_single_u8_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z1_z24_z0: +** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z24_z0, svuint8x4_t, svint8_t, z1, + svrshl_single_u8_x4 (z24, z0), + svrshl (z24, z0)) + +/* +** rshl_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z1_z1_z0, svuint8x4_t, svint8_t, z1, + svrshl_single_u8_x4 (z1, z0), + svrshl (z1, z0)) + +/* +** rshl_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** urshl [^\n]+, z0\.b +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (rshl_single_z18_z18_z0, svuint8x4_t, svint8_t, z18, + svrshl_single_u8_x4 (z18, z0), + svrshl (z18, z0)) + +/* +** rshl_single_awkward: +** ... +** urshl ({z[0-9]+\.b - z[0-9]+\.b}), \1, z[0-9]+\.b +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (rshl_single_awkward, svuint8x4_t, svint8_t, + z0_res = svrshl_single_u8_x4 (z1, z0), + z0_res = svrshl (z1, z0)) + +/* +** rshl_single_z0_z0_z15: +** ... +** urshl {z0\.b - z3\.b}, {z0\.b - z3\.b}, z15\.b +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (rshl_single_z0_z0_z15, svuint8x4_t, svint8_t, + z0 = svrshl_single_u8_x4 (z0, z15), + z0 = svrshl (z0, z15)) + +/* +** rshl_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** urshl {z24\.b - z27\.b}, {z24\.b - z27\.b}, \1\.b +** ret +*/ +TEST_XN_SINGLE (rshl_single_z24_z24_z16, svuint8x4_t, svint8_t, z24, + svrshl_single_u8_x4 (z24, z16), + svrshl (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..055bee2757694f115e98a34f18cf0c4646f4f982 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svbfloat16x2_t, z0, + svsel_bf16_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svbfloat16x2_t, z0, + svsel_bf16_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svbfloat16x2_t, z0, + svsel_bf16_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svbfloat16x2_t, z4, + svsel_bf16_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svbfloat16x2_t, z18, + svsel_bf16_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svbfloat16x2_t, z18, + svsel_bf16_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svbfloat16x2_t, z23, + svsel_bf16_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svbfloat16x2_t, z0, + svsel_bf16_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svbfloat16x2_t, z0, + svsel_bf16_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8c0d53aef5c5e62cf59f4909ebcd02fe53b9cee8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_bf16_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svbfloat16x4_t, z0, + svsel_bf16_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svbfloat16x4_t, z0, + svsel_bf16_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svbfloat16x4_t, z0, + svsel_bf16_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svbfloat16x4_t, z4, + svsel_bf16_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svbfloat16x4_t, z18, + svsel_bf16_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svbfloat16x4_t, z18, + svsel_bf16_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svbfloat16x4_t, z23, + svsel_bf16_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..617f8bb1143af5737ebdee5a8cc5968cb0d0ceeb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat16x2_t, z0, + svsel_f16_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat16x2_t, z0, + svsel_f16_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat16x2_t, z0, + svsel_f16_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat16x2_t, z4, + svsel_f16_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat16x2_t, z18, + svsel_f16_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat16x2_t, z18, + svsel_f16_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat16x2_t, z23, + svsel_f16_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svfloat16x2_t, z0, + svsel_f16_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svfloat16x2_t, z0, + svsel_f16_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5daa8cac0851dbb56ff6a796053ca598e7de107d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f16_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat16x4_t, z0, + svsel_f16_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat16x4_t, z0, + svsel_f16_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat16x4_t, z0, + svsel_f16_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat16x4_t, z4, + svsel_f16_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat16x4_t, z18, + svsel_f16_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat16x4_t, z18, + svsel_f16_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat16x4_t, z23, + svsel_f16_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a3bbb4f09cd001ab347e1da585be1d5db9a57bb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat32x2_t, z0, + svsel_f32_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat32x2_t, z0, + svsel_f32_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat32x2_t, z0, + svsel_f32_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat32x2_t, z4, + svsel_f32_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat32x2_t, z18, + svsel_f32_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat32x2_t, z18, + svsel_f32_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat32x2_t, z23, + svsel_f32_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svfloat32x2_t, z0, + svsel_f32_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svfloat32x2_t, z0, + svsel_f32_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d0b4e45b91de77357bc6ee8859f2793d9c5f71e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f32_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat32x4_t, z0, + svsel_f32_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat32x4_t, z0, + svsel_f32_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat32x4_t, z0, + svsel_f32_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat32x4_t, z4, + svsel_f32_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat32x4_t, z18, + svsel_f32_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat32x4_t, z18, + svsel_f32_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat32x4_t, z23, + svsel_f32_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..678a410a6ab7366387a3794127797698370affda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat64x2_t, z0, + svsel_f64_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat64x2_t, z0, + svsel_f64_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat64x2_t, z0, + svsel_f64_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat64x2_t, z4, + svsel_f64_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat64x2_t, z18, + svsel_f64_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat64x2_t, z18, + svsel_f64_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat64x2_t, z23, + svsel_f64_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svfloat64x2_t, z0, + svsel_f64_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svfloat64x2_t, z0, + svsel_f64_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..08352f05d5d923d78f96729404911c980e28c1de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_f64_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svfloat64x4_t, z0, + svsel_f64_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svfloat64x4_t, z0, + svsel_f64_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svfloat64x4_t, z0, + svsel_f64_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svfloat64x4_t, z4, + svsel_f64_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svfloat64x4_t, z18, + svsel_f64_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svfloat64x4_t, z18, + svsel_f64_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svfloat64x4_t, z23, + svsel_f64_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..14f506618cf577c6b6b26d0f5b1ab82cf49d2855 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint16x2_t, z0, + svsel_s16_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint16x2_t, z0, + svsel_s16_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint16x2_t, z0, + svsel_s16_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint16x2_t, z4, + svsel_s16_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint16x2_t, z18, + svsel_s16_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint16x2_t, z18, + svsel_s16_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint16x2_t, z23, + svsel_s16_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svint16x2_t, z0, + svsel_s16_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svint16x2_t, z0, + svsel_s16_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1aa89e38b32a11b2245b0fca6acb57e5c57b86ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s16_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint16x4_t, z0, + svsel_s16_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint16x4_t, z0, + svsel_s16_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint16x4_t, z0, + svsel_s16_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint16x4_t, z4, + svsel_s16_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint16x4_t, z18, + svsel_s16_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint16x4_t, z18, + svsel_s16_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint16x4_t, z23, + svsel_s16_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dd36c30b29fd9a29fbdfbf41b6825e364b3eee1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint32x2_t, z0, + svsel_s32_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint32x2_t, z0, + svsel_s32_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint32x2_t, z0, + svsel_s32_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint32x2_t, z4, + svsel_s32_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint32x2_t, z18, + svsel_s32_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint32x2_t, z18, + svsel_s32_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint32x2_t, z23, + svsel_s32_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svint32x2_t, z0, + svsel_s32_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svint32x2_t, z0, + svsel_s32_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..343a081b3e39baa03633ecb6fe6a54c3781ade7c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s32_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint32x4_t, z0, + svsel_s32_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint32x4_t, z0, + svsel_s32_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint32x4_t, z0, + svsel_s32_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint32x4_t, z4, + svsel_s32_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint32x4_t, z18, + svsel_s32_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint32x4_t, z18, + svsel_s32_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint32x4_t, z23, + svsel_s32_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f286b0e2046123cba25e99aa75aece1df2490df2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint64x2_t, z0, + svsel_s64_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint64x2_t, z0, + svsel_s64_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint64x2_t, z0, + svsel_s64_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint64x2_t, z4, + svsel_s64_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint64x2_t, z18, + svsel_s64_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint64x2_t, z18, + svsel_s64_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint64x2_t, z23, + svsel_s64_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svint64x2_t, z0, + svsel_s64_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svint64x2_t, z0, + svsel_s64_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fbf554e486296774563a5b707462ad7fb4069414 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s64_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint64x4_t, z0, + svsel_s64_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint64x4_t, z0, + svsel_s64_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint64x4_t, z0, + svsel_s64_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint64x4_t, z4, + svsel_s64_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint64x4_t, z18, + svsel_s64_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint64x4_t, z18, + svsel_s64_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint64x4_t, z23, + svsel_s64_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..42d89ff32965caee444f7b0a0a93d43d63571b1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint8x2_t, z0, + svsel_s8_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint8x2_t, z0, + svsel_s8_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint8x2_t, z0, + svsel_s8_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint8x2_t, z4, + svsel_s8_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint8x2_t, z18, + svsel_s8_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint8x2_t, z18, + svsel_s8_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint8x2_t, z23, + svsel_s8_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svint8x2_t, z0, + svsel_s8_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svint8x2_t, z0, + svsel_s8_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c6635f1d066c9b12b899d05d8596da35f969d70a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_s8_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svint8x4_t, z0, + svsel_s8_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svint8x4_t, z0, + svsel_s8_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svint8x4_t, z0, + svsel_s8_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svint8x4_t, z4, + svsel_s8_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svint8x4_t, z18, + svsel_s8_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svint8x4_t, z18, + svsel_s8_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svint8x4_t, z23, + svsel_s8_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..20cd848fbc88d25d6f420beaefed0864c06517e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint16x2_t, z0, + svsel_u16_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z1\.h}, pn\1, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint16x2_t, z0, + svsel_u16_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z1\.h}, pn8, {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint16x2_t, z0, + svsel_u16_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.h - z5\.h}, pn8, {z18\.h - z19\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint16x2_t, z4, + svsel_u16_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.h - z19\.h}, pn15, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint16x2_t, z18, + svsel_u16_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.h - z19\.h}, pn8, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint16x2_t, z18, + svsel_u16_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint16x2_t, z23, + svsel_u16_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn15, {[^}]+}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svuint16x2_t, z0, + svsel_u16_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.h - z1\.h}, pn8, {z28\.h - z29\.h}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svuint16x2_t, z0, + svsel_u16_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f5ba4fd682319f7eae54927896f0c1edde3ee7ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u16_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint16x4_t, z0, + svsel_u16_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.h - z3\.h}, pn\1, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint16x4_t, z0, + svsel_u16_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.h - z3\.h}, pn8, {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint16x4_t, z0, + svsel_u16_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.h - z7\.h}, pn8, {[^}]+}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint16x4_t, z4, + svsel_u16_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint16x4_t, z18, + svsel_u16_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint16x4_t, z18, + svsel_u16_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.h - z3\.h}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint16x4_t, z23, + svsel_u16_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2b736e6f8d2fa6e73cc4776f52786a9a480c9dae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint32x2_t, z0, + svsel_u32_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z1\.s}, pn\1, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint32x2_t, z0, + svsel_u32_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z1\.s}, pn8, {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint32x2_t, z0, + svsel_u32_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.s - z5\.s}, pn8, {z18\.s - z19\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint32x2_t, z4, + svsel_u32_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.s - z19\.s}, pn15, {z28\.s - z29\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint32x2_t, z18, + svsel_u32_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.s - z19\.s}, pn8, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint32x2_t, z18, + svsel_u32_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint32x2_t, z23, + svsel_u32_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn15, {[^}]+}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svuint32x2_t, z0, + svsel_u32_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.s - z1\.s}, pn8, {z28\.s - z29\.s}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svuint32x2_t, z0, + svsel_u32_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4783cf7a7557e4ebf1cf29455f7a9dec511c4fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u32_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint32x4_t, z0, + svsel_u32_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.s - z3\.s}, pn\1, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint32x4_t, z0, + svsel_u32_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.s - z3\.s}, pn8, {z4\.s - z7\.s}, {z28\.s - z31\.s} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint32x4_t, z0, + svsel_u32_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.s - z7\.s}, pn8, {[^}]+}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint32x4_t, z4, + svsel_u32_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.s - z31\.s}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint32x4_t, z18, + svsel_u32_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint32x4_t, z18, + svsel_u32_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.s - z3\.s}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint32x4_t, z23, + svsel_u32_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2e039e23bb08c3ccd5c21be124152b23ab67c7fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint64x2_t, z0, + svsel_u64_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z1\.d}, pn\1, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint64x2_t, z0, + svsel_u64_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z1\.d}, pn8, {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint64x2_t, z0, + svsel_u64_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.d - z5\.d}, pn8, {z18\.d - z19\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint64x2_t, z4, + svsel_u64_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.d - z19\.d}, pn15, {z28\.d - z29\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint64x2_t, z18, + svsel_u64_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.d - z19\.d}, pn8, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint64x2_t, z18, + svsel_u64_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint64x2_t, z23, + svsel_u64_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn15, {[^}]+}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svuint64x2_t, z0, + svsel_u64_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.d - z1\.d}, pn8, {z28\.d - z29\.d}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svuint64x2_t, z0, + svsel_u64_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..28a92d4e02a368c7c6cc5e5fefc6861c5487bc17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u64_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint64x4_t, z0, + svsel_u64_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.d - z3\.d}, pn\1, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint64x4_t, z0, + svsel_u64_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.d - z3\.d}, pn8, {z4\.d - z7\.d}, {z28\.d - z31\.d} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint64x4_t, z0, + svsel_u64_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.d - z7\.d}, pn8, {[^}]+}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint64x4_t, z4, + svsel_u64_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.d - z31\.d}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint64x4_t, z18, + svsel_u64_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint64x4_t, z18, + svsel_u64_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.d - z3\.d}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint64x4_t, z23, + svsel_u64_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..edcd1e92fa8a4f23d87ef946cade218610c2efe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x2.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint8x2_t, z0, + svsel_u8_x2 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z1\.b}, pn\1, {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint8x2_t, z0, + svsel_u8_x2 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z1\.b}, pn8, {z4\.b - z5\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint8x2_t, z0, + svsel_u8_x2 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** sel {z4\.b - z5\.b}, pn8, {z18\.b - z19\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint8x2_t, z4, + svsel_u8_x2 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {z18\.b - z19\.b}, pn15, {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint8x2_t, z18, + svsel_u8_x2 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** sel {z18\.b - z19\.b}, pn8, {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint8x2_t, z18, + svsel_u8_x2 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** sel [^\n]+, pn15, {z0\.b - z1\.b}, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint8x2_t, z23, + svsel_u8_x2 (pn15, z0, z18), + svsel (pn15, z0, z18)) + +/* +** sel_z0_pn15_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn15, {[^}]+}, {z28\.b - z29\.b} +** ret +*/ +TEST_XN (sel_z0_pn15_z23_z28, svuint8x2_t, z0, + svsel_u8_x2 (pn15, z23, z28), + svsel (pn15, z23, z28)) + +/* +** sel_z0_pn8_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sel {z0\.b - z1\.b}, pn8, {z28\.b - z29\.b}, {[^}]+} +** ret +*/ +TEST_XN (sel_z0_pn8_z28_z23, svuint8x2_t, z0, + svsel_u8_x2 (pn8, z28, z23), + svsel (pn8, z28, z23)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3a4557420ab527a19edb728080d02e6bf6e1958a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_u8_x4.c @@ -0,0 +1,92 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sel_z0_pn0_z0_z4: +** mov p([0-9]+)\.b, p0\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn0_z0_z4, svuint8x4_t, z0, + svsel_u8_x4 (pn0, z0, z4), + svsel (pn0, z0, z4)) + +/* +** sel_z0_pn7_z0_z4: +** mov p([0-9]+)\.b, p7\.b +** sel {z0\.b - z3\.b}, pn\1, {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (sel_z0_pn7_z0_z4, svuint8x4_t, z0, + svsel_u8_x4 (pn7, z0, z4), + svsel (pn7, z0, z4)) + +/* +** sel_z0_pn8_z4_z28: +** sel {z0\.b - z3\.b}, pn8, {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_XN (sel_z0_pn8_z4_z28, svuint8x4_t, z0, + svsel_u8_x4 (pn8, z4, z28), + svsel (pn8, z4, z28)) + +/* +** sel_z4_pn8_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {z4\.b - z7\.b}, pn8, {[^}]+}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (sel_z4_pn8_z18_z0, svuint8x4_t, z4, + svsel_u8_x4 (pn8, z18, z0), + svsel (pn8, z18, z0)) + +/* +** sel_z18_pn15_z28_z4: +** sel {[^}]+}, pn15, {z28\.b - z31\.b}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn15_z28_z4, svuint8x4_t, z18, + svsel_u8_x4 (pn15, z28, z4), + svsel (pn15, z28, z4)) + +/* +** sel_z18_pn8_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel {[^}]+}, pn8, {[^}]+}, {z4\.b - z7\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z18_pn8_z18_z4, svuint8x4_t, z18, + svsel_u8_x4 (pn8, z18, z4), + svsel (pn8, z18, z4)) + +/* +** sel_z23_pn15_z0_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sel [^\n]+, pn15, {z0\.b - z3\.b}, {[^}]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (sel_z23_pn15_z0_z18, svuint8x4_t, z23, + svsel_u8_x4 (pn15, z0, z18), + svsel (pn15, z0, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..904135b9084328d15f7cb4586eb44d2fcd82fce2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_bf16_base: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_base, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_bf16_index: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_index, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_1, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* +** st1_bf16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_2, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** st1_bf16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_14, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 + svcnth () * 14, z0), + svst1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_16, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 + svcnth () * 16, z0), + svst1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* +** st1_bf16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** st1_bf16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m16, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 - svcnth () * 16, z0), + svst1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** st1_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m18, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0 - svcnth () * 18, z0), + svst1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** st1_bf16_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_bf16_z22: +** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_bf16_z28: +** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_bf16_pn15: +** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x2_t, bfloat16_t, + svst1_bf16_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_bf16_0: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_bf16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_bf16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_bf16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_bf16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, + svst1_vnum_bf16_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2563b72e63cc343b1638c58d88f036ca2d3dbe87 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_bf16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_bf16_base: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_base, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_bf16_index: +** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_index, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_1, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_2, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_3, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth () * 3, z0), + svst1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** st1_bf16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_4, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth () * 4, z0), + svst1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** st1_bf16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_28, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth () * 28, z0), + svst1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** st1_bf16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_32, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 + svcnth () * 32, z0), + svst1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_bf16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m3, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth () * 3, z0), + svst1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** st1_bf16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m4, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth () * 4, z0), + svst1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** st1_bf16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m32, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth () * 32, z0), + svst1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** st1_bf16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_m36, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0 - svcnth () * 36, z0), + svst1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** st1_bf16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_bf16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_bf16_z28: +** st1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_bf16_pn15: +** st1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x4_t, bfloat16_t, + svst1_bf16_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_bf16_0: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_bf16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_bf16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_bf16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_bf16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_bf16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_bf16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_bf16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, + svst1_vnum_bf16_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5b0c12b26cf4301db1437c92c453412852002d02 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f16_base: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_base, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f16_index: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_index, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_1, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* +** st1_f16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_2, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** st1_f16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_14, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 + svcnth () * 14, z0), + svst1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_16, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 + svcnth () * 16, z0), + svst1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m1, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* +** st1_f16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m2, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** st1_f16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m16, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 - svcnth () * 16, z0), + svst1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** st1_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m18, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0 - svcnth () * 18, z0), + svst1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** st1_f16_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z17, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f16_z22: +** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z22, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f16_z28: +** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z28, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn0, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn7, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f16_pn15: +** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn15, svfloat16x2_t, float16_t, + svst1_f16_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f16_0: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_f16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_f16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_14, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_16, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_f16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_f16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m16, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m18, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x2_t, float16_t, + svst1_vnum_f16_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..da0617cd97db4bb7b63783c791e56b0ed7641225 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f16_base: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_base, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f16_index: +** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_index, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_1, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_2, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_3, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth () * 3, z0), + svst1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** st1_f16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_4, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth () * 4, z0), + svst1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** st1_f16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_28, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth () * 28, z0), + svst1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** st1_f16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_32, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 + svcnth () * 32, z0), + svst1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m1, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m2, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m3, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth () * 3, z0), + svst1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** st1_f16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m4, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth () * 4, z0), + svst1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** st1_f16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m32, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth () * 32, z0), + svst1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** st1_f16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_m36, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0 - svcnth () * 36, z0), + svst1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** st1_f16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z17, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z22, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f16_z28: +** st1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_z28, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn0, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn7, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f16_pn15: +** st1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f16_pn15, svfloat16x4_t, float16_t, + svst1_f16_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f16_0: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_3, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_f16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_4, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_f16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_28, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_f16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_32, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m3, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_f16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m4, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_f16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m32, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_f16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_m36, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x4_t, float16_t, + svst1_vnum_f16_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f0114b61e6855939f88eb6fff1c813dce420fa54 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f32_base: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_base, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f32_index: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_index, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_1, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* +** st1_f32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_2, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** st1_f32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_14, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 + svcntw () * 14, z0), + svst1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_16, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 + svcntw () * 16, z0), + svst1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m1, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* +** st1_f32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m2, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** st1_f32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m16, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 - svcntw () * 16, z0), + svst1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** st1_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m18, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0 - svcntw () * 18, z0), + svst1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** st1_f32_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z17, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f32_z22: +** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z22, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f32_z28: +** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z28, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn0, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn7, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f32_pn15: +** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn15, svfloat32x2_t, float32_t, + svst1_f32_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f32_0: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_f32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_f32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_14, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_16, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_f32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_f32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m16, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m18, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x2_t, float32_t, + svst1_vnum_f32_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f861a9a8a4c91422500012393d3105be1276f4bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f32_base: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_base, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f32_index: +** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_index, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_1, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_2, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_3, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw () * 3, z0), + svst1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** st1_f32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_4, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw () * 4, z0), + svst1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** st1_f32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_28, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw () * 28, z0), + svst1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** st1_f32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_32, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 + svcntw () * 32, z0), + svst1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m1, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m2, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m3, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw () * 3, z0), + svst1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** st1_f32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m4, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw () * 4, z0), + svst1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** st1_f32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m32, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw () * 32, z0), + svst1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** st1_f32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_m36, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0 - svcntw () * 36, z0), + svst1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** st1_f32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z17, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z22, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f32_z28: +** st1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_z28, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn0, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn7, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f32_pn15: +** st1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f32_pn15, svfloat32x4_t, float32_t, + svst1_f32_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f32_0: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_3, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_f32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_4, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_f32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_28, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_f32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_32, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m3, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_f32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m4, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_f32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m32, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_f32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_m36, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x4_t, float32_t, + svst1_vnum_f32_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e373ea54bd1b209484b511c5efb4273486d5e7b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f64_base: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_base, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f64_index: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_index, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_1, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* +** st1_f64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_2, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** st1_f64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_14, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 + svcntd () * 14, z0), + svst1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_16, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 + svcntd () * 16, z0), + svst1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m1, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* +** st1_f64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m2, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** st1_f64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m16, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 - svcntd () * 16, z0), + svst1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** st1_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m18, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0 - svcntd () * 18, z0), + svst1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** st1_f64_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z17, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f64_z22: +** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z22, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f64_z28: +** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z28, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn0, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn7, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f64_pn15: +** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn15, svfloat64x2_t, float64_t, + svst1_f64_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f64_0: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_f64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_f64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_14, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_16, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_f64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_f64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m16, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m18, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x2_t, float64_t, + svst1_vnum_f64_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d7b218250d84b0bc316122c03a114c236d766dd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_f64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_f64_base: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_base, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_f64_index: +** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_index, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_1, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_2, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_3, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd () * 3, z0), + svst1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** st1_f64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_4, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd () * 4, z0), + svst1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** st1_f64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_28, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd () * 28, z0), + svst1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** st1_f64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_32, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 + svcntd () * 32, z0), + svst1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m1, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m2, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_f64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m3, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd () * 3, z0), + svst1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** st1_f64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m4, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd () * 4, z0), + svst1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** st1_f64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m32, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd () * 32, z0), + svst1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** st1_f64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_m36, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0 - svcntd () * 36, z0), + svst1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** st1_f64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z17, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_f64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z22, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_f64_z28: +** st1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_z28, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn0, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn7, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_f64_pn15: +** st1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_f64_pn15, svfloat64x4_t, float64_t, + svst1_f64_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_f64_0: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_3, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_f64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_4, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_f64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_28, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_f64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_32, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_f64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m3, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_f64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m4, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_f64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m32, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_f64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_m36, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x4_t, float64_t, + svst1_vnum_f64_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..623b09ba42500db0f7f20fb3faa64567d6d661a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s16_base: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_base, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s16_index: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_index, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_1, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* +** st1_s16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_2, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** st1_s16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_14, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 + svcnth () * 14, z0), + svst1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_16, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 + svcnth () * 16, z0), + svst1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m1, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* +** st1_s16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m2, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** st1_s16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m16, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 - svcnth () * 16, z0), + svst1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** st1_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m18, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0 - svcnth () * 18, z0), + svst1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** st1_s16_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z17, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s16_z22: +** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z22, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s16_z28: +** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z28, svint16x2_t, int16_t, + svst1_s16_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn0, svint16x2_t, int16_t, + svst1_s16_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn7, svint16x2_t, int16_t, + svst1_s16_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s16_pn15: +** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn15, svint16x2_t, int16_t, + svst1_s16_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s16_0: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_0, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_1, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_s16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_2, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_s16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_14, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_16, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_s16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_s16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m16, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m18, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x2_t, int16_t, + svst1_vnum_s16_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bf1611a2556cd6f2cba9f067ebfdd00ec2f151c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s16_base: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_base, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s16_index: +** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_index, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_1, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_2, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_3, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth () * 3, z0), + svst1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** st1_s16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_4, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth () * 4, z0), + svst1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** st1_s16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_28, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth () * 28, z0), + svst1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** st1_s16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_32, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 + svcnth () * 32, z0), + svst1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m1, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m2, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m3, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth () * 3, z0), + svst1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** st1_s16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m4, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth () * 4, z0), + svst1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** st1_s16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m32, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth () * 32, z0), + svst1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** st1_s16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_m36, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0 - svcnth () * 36, z0), + svst1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** st1_s16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z17, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z22, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s16_z28: +** st1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_z28, svint16x4_t, int16_t, + svst1_s16_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn0, svint16x4_t, int16_t, + svst1_s16_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn7, svint16x4_t, int16_t, + svst1_s16_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s16_pn15: +** st1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s16_pn15, svint16x4_t, int16_t, + svst1_s16_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s16_0: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_0, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_1, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_2, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_3, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_s16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_4, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_s16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_28, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_s16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_32, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m3, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_s16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m4, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_s16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m32, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_s16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_m36, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x4_t, int16_t, + svst1_vnum_s16_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2bb81743170a5e3dc66914b96abc033de3699690 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s32_base: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_base, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s32_index: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_index, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_1, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* +** st1_s32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_2, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** st1_s32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_14, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 + svcntw () * 14, z0), + svst1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_16, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 + svcntw () * 16, z0), + svst1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m1, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* +** st1_s32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m2, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** st1_s32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m16, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 - svcntw () * 16, z0), + svst1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** st1_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m18, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0 - svcntw () * 18, z0), + svst1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** st1_s32_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z17, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s32_z22: +** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z22, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s32_z28: +** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z28, svint32x2_t, int32_t, + svst1_s32_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn0, svint32x2_t, int32_t, + svst1_s32_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn7, svint32x2_t, int32_t, + svst1_s32_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s32_pn15: +** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn15, svint32x2_t, int32_t, + svst1_s32_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s32_0: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_0, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_1, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_s32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_2, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_s32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_14, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_16, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_s32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_s32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m16, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m18, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x2_t, int32_t, + svst1_vnum_s32_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..35e63a8be6530964fed8a4abcb94a65c92300f9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s32_base: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_base, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s32_index: +** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_index, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_1, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_2, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_3, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw () * 3, z0), + svst1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** st1_s32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_4, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw () * 4, z0), + svst1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** st1_s32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_28, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw () * 28, z0), + svst1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** st1_s32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_32, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 + svcntw () * 32, z0), + svst1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m1, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m2, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m3, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw () * 3, z0), + svst1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** st1_s32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m4, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw () * 4, z0), + svst1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** st1_s32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m32, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw () * 32, z0), + svst1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** st1_s32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_m36, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0 - svcntw () * 36, z0), + svst1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** st1_s32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z17, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z22, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s32_z28: +** st1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_z28, svint32x4_t, int32_t, + svst1_s32_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn0, svint32x4_t, int32_t, + svst1_s32_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn7, svint32x4_t, int32_t, + svst1_s32_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s32_pn15: +** st1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s32_pn15, svint32x4_t, int32_t, + svst1_s32_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s32_0: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_0, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_1, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_2, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_3, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_s32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_4, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_s32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_28, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_s32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_32, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m3, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_s32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m4, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_s32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m32, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_s32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_m36, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x4_t, int32_t, + svst1_vnum_s32_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..b3796793a7915900d9a5baeffabe87ad3b30dc9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s64_base: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_base, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s64_index: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_index, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_1, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* +** st1_s64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_2, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** st1_s64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_14, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 + svcntd () * 14, z0), + svst1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_16, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 + svcntd () * 16, z0), + svst1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m1, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* +** st1_s64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m2, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** st1_s64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m16, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 - svcntd () * 16, z0), + svst1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** st1_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m18, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0 - svcntd () * 18, z0), + svst1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** st1_s64_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z17, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s64_z22: +** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z22, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s64_z28: +** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z28, svint64x2_t, int64_t, + svst1_s64_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn0, svint64x2_t, int64_t, + svst1_s64_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn7, svint64x2_t, int64_t, + svst1_s64_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s64_pn15: +** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn15, svint64x2_t, int64_t, + svst1_s64_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s64_0: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_0, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_1, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_s64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_2, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_s64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_14, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_16, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_s64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_s64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m16, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m18, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x2_t, int64_t, + svst1_vnum_s64_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..359edf624acf9757b72558f295171f7012c5caa7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s64_base: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_base, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s64_index: +** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_index, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_1, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_2, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_3, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd () * 3, z0), + svst1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** st1_s64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_4, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd () * 4, z0), + svst1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** st1_s64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_28, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd () * 28, z0), + svst1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** st1_s64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_32, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 + svcntd () * 32, z0), + svst1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m1, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m2, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m3, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd () * 3, z0), + svst1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** st1_s64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m4, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd () * 4, z0), + svst1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** st1_s64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m32, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd () * 32, z0), + svst1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** st1_s64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_m36, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0 - svcntd () * 36, z0), + svst1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** st1_s64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z17, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z22, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s64_z28: +** st1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_z28, svint64x4_t, int64_t, + svst1_s64_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn0, svint64x4_t, int64_t, + svst1_s64_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn7, svint64x4_t, int64_t, + svst1_s64_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s64_pn15: +** st1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s64_pn15, svint64x4_t, int64_t, + svst1_s64_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s64_0: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_0, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_1, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_2, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_3, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_s64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_4, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_s64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_28, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_s64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_32, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m3, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_s64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m4, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_s64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m32, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_s64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_m36, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x4_t, int64_t, + svst1_vnum_s64_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8da1e5f5b6c2197f501a3322789d2681f1da757a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s8_base: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_base, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s8_index: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_index, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_1, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* +** st1_s8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_2, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** st1_s8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_14, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 + svcntb () * 14, z0), + svst1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_16, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 + svcntb () * 16, z0), + svst1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m1, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* +** st1_s8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m2, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** st1_s8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m16, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 - svcntb () * 16, z0), + svst1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** st1_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m18, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0 - svcntb () * 18, z0), + svst1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** st1_s8_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z17, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s8_z22: +** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z22, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s8_z28: +** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z28, svint8x2_t, int8_t, + svst1_s8_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn0, svint8x2_t, int8_t, + svst1_s8_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn7, svint8x2_t, int8_t, + svst1_s8_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s8_pn15: +** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn15, svint8x2_t, int8_t, + svst1_s8_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s8_0: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_0, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_1, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_s8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_2, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_s8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_14, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_16, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_s8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_s8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m16, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m18, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x2_t, int8_t, + svst1_vnum_s8_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..66b8934911709249a69abbb1d520503164f2a9a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_s8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_s8_base: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_base, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_s8_index: +** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_index, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_1, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_2, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_3, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb () * 3, z0), + svst1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** st1_s8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_4, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb () * 4, z0), + svst1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** st1_s8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_28, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb () * 28, z0), + svst1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** st1_s8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_32, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 + svcntb () * 32, z0), + svst1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m1, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m2, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_s8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m3, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb () * 3, z0), + svst1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** st1_s8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m4, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb () * 4, z0), + svst1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** st1_s8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m32, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb () * 32, z0), + svst1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** st1_s8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_m36, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0 - svcntb () * 36, z0), + svst1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** st1_s8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z17, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_s8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z22, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_s8_z28: +** st1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_z28, svint8x4_t, int8_t, + svst1_s8_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn0, svint8x4_t, int8_t, + svst1_s8_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn7, svint8x4_t, int8_t, + svst1_s8_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_s8_pn15: +** st1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_s8_pn15, svint8x4_t, int8_t, + svst1_s8_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_s8_0: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_0, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_1, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_2, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_3, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_s8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_4, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_s8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_28, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_s8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_32, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_s8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m3, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_s8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m4, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_s8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m32, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_s8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_m36, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x4_t, int8_t, + svst1_vnum_s8_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f9cfe0283a8692934bf2ea783609ee1b933fdc5a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u16_base: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_base, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u16_index: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_index, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_1, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* +** st1_u16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_2, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** st1_u16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_14, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 + svcnth () * 14, z0), + svst1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_16, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 + svcnth () * 16, z0), + svst1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m1, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* +** st1_u16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m2, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** st1_u16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m16, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 - svcnth () * 16, z0), + svst1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** st1_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m18, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0 - svcnth () * 18, z0), + svst1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** st1_u16_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z17, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u16_z22: +** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z22, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u16_z28: +** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z28, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn0, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn7, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u16_pn15: +** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn15, svuint16x2_t, uint16_t, + svst1_u16_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u16_0: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_1: +** incb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_u16_2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_u16_14: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_14, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_16: +** incb x0, all, mul #16 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_16, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_m1: +** decb x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_u16_m2: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_u16_m16: +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m16, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m18, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x2_t, uint16_t, + svst1_vnum_u16_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e0f6dd8398d99eedcae79b1486e2d34c2700b238 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u16_base: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_base, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u16_index: +** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_index, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_1, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth (), z0), + svst1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_2, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth () * 2, z0), + svst1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_3, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth () * 3, z0), + svst1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** st1_u16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_4, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth () * 4, z0), + svst1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** st1_u16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_28, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth () * 28, z0), + svst1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** st1_u16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_32, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 + svcnth () * 32, z0), + svst1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m1, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth (), z0), + svst1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m2, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth () * 2, z0), + svst1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m3, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth () * 3, z0), + svst1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** st1_u16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m4, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth () * 4, z0), + svst1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** st1_u16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m32, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth () * 32, z0), + svst1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** st1_u16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_m36, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0 - svcnth () * 36, z0), + svst1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** st1_u16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z17, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z22, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u16_z28: +** st1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_z28, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn0, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn7, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u16_pn15: +** st1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u16_pn15, svuint16x4_t, uint16_t, + svst1_u16_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u16_0: +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_1: +** incb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_2: +** incb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_3: +** incb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_3, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_u16_4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_4, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_u16_28: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_28, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_u16_32: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_32, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_m1: +** decb x0 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_m2: +** decb x0, all, mul #2 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u16_m3: +** decb x0, all, mul #3 +** st1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m3, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_u16_m4: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m4, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_u16_m32: +** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m32, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_u16_m36: +** [^{]* +** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_m36, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x4_t, uint16_t, + svst1_vnum_u16_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..327ad664d5bbcf7dc2a4f36f6b508ccc2e33d2b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u32_base: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_base, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u32_index: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_index, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_1, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* +** st1_u32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_2, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** st1_u32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_14, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 + svcntw () * 14, z0), + svst1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_16, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 + svcntw () * 16, z0), + svst1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m1, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* +** st1_u32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m2, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** st1_u32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m16, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 - svcntw () * 16, z0), + svst1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** st1_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m18, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0 - svcntw () * 18, z0), + svst1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** st1_u32_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z17, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u32_z22: +** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z22, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u32_z28: +** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z28, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn0, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn7, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u32_pn15: +** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn15, svuint32x2_t, uint32_t, + svst1_u32_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u32_0: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_1: +** incb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_u32_2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_u32_14: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_14, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_16: +** incb x0, all, mul #16 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_16, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_m1: +** decb x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_u32_m2: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_u32_m16: +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m16, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m18, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x2_t, uint32_t, + svst1_vnum_u32_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..cdd276f45f0ab7dce997d5ba5f0e1f7f0812c84e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u32_base: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_base, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u32_index: +** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_index, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_1, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw (), z0), + svst1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_2, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw () * 2, z0), + svst1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_3, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw () * 3, z0), + svst1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** st1_u32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_4, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw () * 4, z0), + svst1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** st1_u32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_28, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw () * 28, z0), + svst1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** st1_u32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_32, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 + svcntw () * 32, z0), + svst1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m1, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw (), z0), + svst1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m2, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw () * 2, z0), + svst1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m3, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw () * 3, z0), + svst1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** st1_u32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m4, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw () * 4, z0), + svst1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** st1_u32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m32, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw () * 32, z0), + svst1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** st1_u32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_m36, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0 - svcntw () * 36, z0), + svst1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** st1_u32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z17, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z22, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u32_z28: +** st1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_z28, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn0, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn7, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u32_pn15: +** st1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u32_pn15, svuint32x4_t, uint32_t, + svst1_u32_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u32_0: +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_1: +** incb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_2: +** incb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_3: +** incb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_3, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_u32_4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_4, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_u32_28: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_28, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_u32_32: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_32, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_m1: +** decb x0 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_m2: +** decb x0, all, mul #2 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u32_m3: +** decb x0, all, mul #3 +** st1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m3, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_u32_m4: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m4, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_u32_m32: +** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m32, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_u32_m36: +** [^{]* +** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_m36, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x4_t, uint32_t, + svst1_vnum_u32_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..91866261449f1515074deb258e0568a0e794f39b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u64_base: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_base, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u64_index: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_index, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_1, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* +** st1_u64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_2, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** st1_u64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_14, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 + svcntd () * 14, z0), + svst1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_16, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 + svcntd () * 16, z0), + svst1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m1, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* +** st1_u64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m2, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** st1_u64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m16, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 - svcntd () * 16, z0), + svst1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** st1_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m18, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0 - svcntd () * 18, z0), + svst1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** st1_u64_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z17, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u64_z22: +** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z22, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u64_z28: +** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z28, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn0, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn7, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u64_pn15: +** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn15, svuint64x2_t, uint64_t, + svst1_u64_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u64_0: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_1: +** incb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_u64_2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_u64_14: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_14, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_16: +** incb x0, all, mul #16 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_16, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_m1: +** decb x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_u64_m2: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_u64_m16: +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m16, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m18, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x2_t, uint64_t, + svst1_vnum_u64_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..829a5b5057039c1599155277e58c2d7c2e616058 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u64_base: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_base, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u64_index: +** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_index, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_1, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd (), z0), + svst1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_2, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd () * 2, z0), + svst1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_3, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd () * 3, z0), + svst1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** st1_u64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_4, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd () * 4, z0), + svst1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** st1_u64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_28, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd () * 28, z0), + svst1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** st1_u64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_32, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 + svcntd () * 32, z0), + svst1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m1, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd (), z0), + svst1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m2, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd () * 2, z0), + svst1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m3, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd () * 3, z0), + svst1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** st1_u64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m4, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd () * 4, z0), + svst1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** st1_u64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m32, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd () * 32, z0), + svst1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** st1_u64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_m36, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0 - svcntd () * 36, z0), + svst1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** st1_u64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z17, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z22, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u64_z28: +** st1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_z28, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn0, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn7, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u64_pn15: +** st1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u64_pn15, svuint64x4_t, uint64_t, + svst1_u64_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u64_0: +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_1: +** incb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_2: +** incb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_3: +** incb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_3, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_u64_4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_4, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_u64_28: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_28, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_u64_32: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_32, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_m1: +** decb x0 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_m2: +** decb x0, all, mul #2 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u64_m3: +** decb x0, all, mul #3 +** st1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m3, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_u64_m4: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m4, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_u64_m32: +** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m32, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_u64_m36: +** [^{]* +** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_m36, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x4_t, uint64_t, + svst1_vnum_u64_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..74dae186c112f8c0252b7f609b2ccc8a5839a0a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u8_base: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_base, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u8_index: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_index, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_1, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* +** st1_u8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_2, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** st1_u8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_14, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 + svcntb () * 14, z0), + svst1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_16, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 + svcntb () * 16, z0), + svst1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m1, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* +** st1_u8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m2, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** st1_u8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m16, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 - svcntb () * 16, z0), + svst1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** st1_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m18, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0 - svcntb () * 18, z0), + svst1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** st1_u8_z17: +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z17, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u8_z22: +** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z22, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u8_z28: +** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z28, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn0, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn7, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u8_pn15: +** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn15, svuint8x2_t, uint8_t, + svst1_u8_x2 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u8_0: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_1: +** incb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* +** st1_vnum_u8_2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* +** st1_vnum_u8_14: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_14, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, 14, z0), + svst1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_16: +** incb x0, all, mul #16 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_16, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, 16, z0), + svst1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_m1: +** decb x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* +** st1_vnum_u8_m2: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* +** st1_vnum_u8_m16: +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m16, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, -16, z0), + svst1_vnum (pn8, x0, -16, z0)) + +/* +** st1_vnum_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m18, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, -18, z0), + svst1_vnum (pn8, x0, -18, z0)) + +/* +** st1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x2_t, uint8_t, + svst1_vnum_u8_x2 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7d22d18fa32952899ac277ad5b3ca9d0c01b4004 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_u8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** st1_u8_base: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_base, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0, z0), + svst1 (pn8, x0, z0)) + +/* +** st1_u8_index: +** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_index, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + x1, z0), + svst1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_1, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb (), z0), + svst1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_2, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb () * 2, z0), + svst1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_3, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb () * 3, z0), + svst1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** st1_u8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_4, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb () * 4, z0), + svst1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** st1_u8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_28, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb () * 28, z0), + svst1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** st1_u8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_32, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 + svcntb () * 32, z0), + svst1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m1, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb (), z0), + svst1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m2, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb () * 2, z0), + svst1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_u8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m3, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb () * 3, z0), + svst1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** st1_u8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m4, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb () * 4, z0), + svst1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** st1_u8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m32, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb () * 32, z0), + svst1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** st1_u8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_m36, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0 - svcntb () * 36, z0), + svst1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** st1_u8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z17, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0, z17), + svst1 (pn8, x0, z17)) + +/* +** st1_u8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** st1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z22, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0, z22), + svst1 (pn8, x0, z22)) + +/* +** st1_u8_z28: +** st1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_z28, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn8, x0, z28), + svst1 (pn8, x0, z28)) + +/* +** st1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn0, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn0, x0, z0), + svst1 (pn0, x0, z0)) + +/* +** st1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** st1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn7, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn7, x0, z0), + svst1 (pn7, x0, z0)) + +/* +** st1_u8_pn15: +** st1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_u8_pn15, svuint8x4_t, uint8_t, + svst1_u8_x4 (pn15, x0, z0), + svst1 (pn15, x0, z0)) + +/* +** st1_vnum_u8_0: +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 0, z0), + svst1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_1: +** incb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 1, z0), + svst1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_2: +** incb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 2, z0), + svst1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_3: +** incb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_3, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 3, z0), + svst1_vnum (pn8, x0, 3, z0)) + +/* +** st1_vnum_u8_4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_4, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 4, z0), + svst1_vnum (pn8, x0, 4, z0)) + +/* +** st1_vnum_u8_28: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_28, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 28, z0), + svst1_vnum (pn8, x0, 28, z0)) + +/* +** st1_vnum_u8_32: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_32, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, 32, z0), + svst1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_m1: +** decb x0 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -1, z0), + svst1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_m2: +** decb x0, all, mul #2 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -2, z0), + svst1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** st1_vnum_u8_m3: +** decb x0, all, mul #3 +** st1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m3, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -3, z0), + svst1_vnum (pn8, x0, -3, z0)) + +/* +** st1_vnum_u8_m4: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m4, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -4, z0), + svst1_vnum (pn8, x0, -4, z0)) + +/* +** st1_vnum_u8_m32: +** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m32, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -32, z0), + svst1_vnum (pn8, x0, -32, z0)) + +/* +** st1_vnum_u8_m36: +** [^{]* +** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_m36, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, -36, z0), + svst1_vnum (pn8, x0, -36, z0)) + +/* +** st1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** st1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x4_t, uint8_t, + svst1_vnum_u8_x4 (pn8, x0, x1, z0), + svst1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d2a37180600ef0bc17e98d9854dacd440cb74556 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_bf16_base: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_bf16_index: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* +** stnt1_bf16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** stnt1_bf16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_14, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 + svcnth () * 14, z0), + svstnt1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_16, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 + svcnth () * 16, z0), + svstnt1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* +** stnt1_bf16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** stnt1_bf16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m16, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 - svcnth () * 16, z0), + svstnt1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** stnt1_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m18, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0 - svcnth () * 18, z0), + svstnt1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** stnt1_bf16_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_bf16_z22: +** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_bf16_z28: +** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_bf16_pn15: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t, + svstnt1_bf16_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_bf16_0: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_bf16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_bf16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_bf16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_bf16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_bf16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, + svstnt1_vnum_bf16_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..4db1e8f238e3c7de23d8778bb557e019dbe1ed9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_bf16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_bf16_base: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_bf16_index: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_3, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth () * 3, z0), + svstnt1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** stnt1_bf16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_4, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth () * 4, z0), + svstnt1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** stnt1_bf16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_28, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth () * 28, z0), + svstnt1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** stnt1_bf16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_32, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 + svcnth () * 32, z0), + svstnt1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_bf16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m3, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth () * 3, z0), + svstnt1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** stnt1_bf16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m4, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth () * 4, z0), + svstnt1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** stnt1_bf16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m32, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth () * 32, z0), + svstnt1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** stnt1_bf16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_m36, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0 - svcnth () * 36, z0), + svstnt1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** stnt1_bf16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_bf16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_bf16_z28: +** stnt1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_bf16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_bf16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_bf16_pn15: +** stnt1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t, + svstnt1_bf16_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_bf16_0: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_bf16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_bf16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_bf16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_bf16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_bf16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_bf16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_bf16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_bf16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, + svstnt1_vnum_bf16_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c3bfab715e9e9170a6c334d05419ea63a1fb30bf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f16_base: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_base, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f16_index: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_index, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_1, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* +** stnt1_f16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_2, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** stnt1_f16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_14, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 + svcnth () * 14, z0), + svstnt1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_16, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 + svcnth () * 16, z0), + svstnt1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* +** stnt1_f16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** stnt1_f16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m16, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 - svcnth () * 16, z0), + svstnt1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** stnt1_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m18, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0 - svcnth () * 18, z0), + svstnt1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** stnt1_f16_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f16_z22: +** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f16_z28: +** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f16_pn15: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x2_t, float16_t, + svstnt1_f16_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f16_0: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_f16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_f16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_14, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_16, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_f16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_f16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m16, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_f16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m18, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x2_t, float16_t, + svstnt1_vnum_f16_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6ccdeceda6df108f804191c4e755d24a8a583f28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f16_base: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_base, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f16_index: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_index, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_1, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_2, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_3, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth () * 3, z0), + svstnt1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** stnt1_f16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_4, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth () * 4, z0), + svstnt1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** stnt1_f16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_28, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth () * 28, z0), + svstnt1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** stnt1_f16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_32, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 + svcnth () * 32, z0), + svstnt1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m3, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth () * 3, z0), + svstnt1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** stnt1_f16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m4, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth () * 4, z0), + svstnt1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** stnt1_f16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m32, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth () * 32, z0), + svstnt1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** stnt1_f16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_m36, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0 - svcnth () * 36, z0), + svstnt1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** stnt1_f16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f16_z28: +** stnt1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f16_pn15: +** stnt1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x4_t, float16_t, + svstnt1_f16_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f16_0: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_3, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_f16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_4, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_f16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_28, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_f16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_32, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m3, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_f16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m4, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_f16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m32, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_f16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_m36, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_f16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x4_t, float16_t, + svstnt1_vnum_f16_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f9c5636e53253510124657249cbd098f7cb8a31e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f32_base: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_base, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f32_index: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_index, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_1, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* +** stnt1_f32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_2, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** stnt1_f32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_14, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 + svcntw () * 14, z0), + svstnt1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_16, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 + svcntw () * 16, z0), + svstnt1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* +** stnt1_f32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** stnt1_f32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m16, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 - svcntw () * 16, z0), + svstnt1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** stnt1_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m18, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0 - svcntw () * 18, z0), + svstnt1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** stnt1_f32_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f32_z22: +** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f32_z28: +** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f32_pn15: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x2_t, float32_t, + svstnt1_f32_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f32_0: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_f32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_f32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_14, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_16, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_f32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_f32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m16, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_f32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m18, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x2_t, float32_t, + svstnt1_vnum_f32_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..30a5ce4f19e7bc65e3dd0e5c462bc038610e063c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f32_base: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_base, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f32_index: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_index, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_1, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_2, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_3, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw () * 3, z0), + svstnt1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** stnt1_f32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_4, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw () * 4, z0), + svstnt1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** stnt1_f32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_28, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw () * 28, z0), + svstnt1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** stnt1_f32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_32, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 + svcntw () * 32, z0), + svstnt1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m3, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw () * 3, z0), + svstnt1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** stnt1_f32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m4, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw () * 4, z0), + svstnt1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** stnt1_f32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m32, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw () * 32, z0), + svstnt1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** stnt1_f32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_m36, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0 - svcntw () * 36, z0), + svstnt1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** stnt1_f32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f32_z28: +** stnt1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f32_pn15: +** stnt1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x4_t, float32_t, + svstnt1_f32_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f32_0: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_3, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_f32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_4, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_f32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_28, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_f32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_32, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m3, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_f32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m4, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_f32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m32, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_f32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_m36, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_f32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x4_t, float32_t, + svstnt1_vnum_f32_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a58750298acc7dc6c075566c6ad31e74d6d24120 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f64_base: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_base, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f64_index: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_index, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_1, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* +** stnt1_f64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_2, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** stnt1_f64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_14, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 + svcntd () * 14, z0), + svstnt1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_16, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 + svcntd () * 16, z0), + svstnt1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* +** stnt1_f64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** stnt1_f64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m16, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 - svcntd () * 16, z0), + svstnt1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** stnt1_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m18, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0 - svcntd () * 18, z0), + svstnt1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** stnt1_f64_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f64_z22: +** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f64_z28: +** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f64_pn15: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x2_t, float64_t, + svstnt1_f64_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f64_0: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_f64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_f64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_14, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_16, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_f64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_f64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m16, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_f64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m18, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x2_t, float64_t, + svstnt1_vnum_f64_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..42bfc51354e69cf54016cbb4faa49407403147fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_f64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_f64_base: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_base, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_f64_index: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_index, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_1, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_2, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_3, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd () * 3, z0), + svstnt1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** stnt1_f64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_4, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd () * 4, z0), + svstnt1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** stnt1_f64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_28, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd () * 28, z0), + svstnt1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** stnt1_f64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_32, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 + svcntd () * 32, z0), + svstnt1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_f64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m3, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd () * 3, z0), + svstnt1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** stnt1_f64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m4, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd () * 4, z0), + svstnt1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** stnt1_f64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m32, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd () * 32, z0), + svstnt1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** stnt1_f64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_m36, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0 - svcntd () * 36, z0), + svstnt1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** stnt1_f64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_f64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_f64_z28: +** stnt1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_f64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_f64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_f64_pn15: +** stnt1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x4_t, float64_t, + svstnt1_f64_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_f64_0: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_3, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_f64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_4, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_f64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_28, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_f64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_32, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_f64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m3, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_f64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m4, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_f64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m32, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_f64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_m36, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_f64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x4_t, float64_t, + svstnt1_vnum_f64_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dcb8f89c9b2e7c192151063fe81f223d30f827d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s16_base: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_base, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s16_index: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_index, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_1, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* +** stnt1_s16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_2, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** stnt1_s16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_14, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 + svcnth () * 14, z0), + svstnt1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_16, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 + svcnth () * 16, z0), + svstnt1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m1, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* +** stnt1_s16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m2, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** stnt1_s16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m16, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 - svcnth () * 16, z0), + svstnt1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** stnt1_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m18, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0 - svcnth () * 18, z0), + svstnt1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** stnt1_s16_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z17, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s16_z22: +** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z22, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s16_z28: +** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z28, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn0, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn7, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s16_pn15: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn15, svint16x2_t, int16_t, + svstnt1_s16_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s16_0: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_s16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_s16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_14, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_16, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_s16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_s16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m16, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_s16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m18, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x2_t, int16_t, + svstnt1_vnum_s16_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..87290a6b287bf3388ff9ac822dd1ff66b9d73151 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s16_base: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_base, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s16_index: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_index, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_1, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_2, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_3, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth () * 3, z0), + svstnt1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** stnt1_s16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_4, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth () * 4, z0), + svstnt1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** stnt1_s16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_28, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth () * 28, z0), + svstnt1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** stnt1_s16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_32, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 + svcnth () * 32, z0), + svstnt1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m1, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m2, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m3, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth () * 3, z0), + svstnt1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** stnt1_s16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m4, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth () * 4, z0), + svstnt1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** stnt1_s16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m32, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth () * 32, z0), + svstnt1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** stnt1_s16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_m36, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0 - svcnth () * 36, z0), + svstnt1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** stnt1_s16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z17, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z22, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s16_z28: +** stnt1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_z28, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn0, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn7, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s16_pn15: +** stnt1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s16_pn15, svint16x4_t, int16_t, + svstnt1_s16_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s16_0: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_3, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_s16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_4, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_s16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_28, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_s16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_32, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m3, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_s16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m4, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_s16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m32, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_s16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_m36, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_s16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x4_t, int16_t, + svstnt1_vnum_s16_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3c1a0387c2caa785b6890602205e64d8f8031a43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s32_base: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_base, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s32_index: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_index, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_1, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* +** stnt1_s32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_2, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** stnt1_s32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_14, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 + svcntw () * 14, z0), + svstnt1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_16, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 + svcntw () * 16, z0), + svstnt1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m1, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* +** stnt1_s32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m2, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** stnt1_s32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m16, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 - svcntw () * 16, z0), + svstnt1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** stnt1_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m18, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0 - svcntw () * 18, z0), + svstnt1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** stnt1_s32_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z17, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s32_z22: +** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z22, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s32_z28: +** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z28, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn0, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn7, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s32_pn15: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn15, svint32x2_t, int32_t, + svstnt1_s32_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s32_0: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_s32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_s32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_14, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_16, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_s32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_s32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m16, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_s32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m18, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x2_t, int32_t, + svstnt1_vnum_s32_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d9a08b0ded45edc2d8b7c2f63b15929c096aa4ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s32_base: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_base, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s32_index: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_index, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_1, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_2, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_3, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw () * 3, z0), + svstnt1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** stnt1_s32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_4, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw () * 4, z0), + svstnt1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** stnt1_s32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_28, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw () * 28, z0), + svstnt1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** stnt1_s32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_32, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 + svcntw () * 32, z0), + svstnt1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m1, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m2, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m3, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw () * 3, z0), + svstnt1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** stnt1_s32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m4, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw () * 4, z0), + svstnt1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** stnt1_s32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m32, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw () * 32, z0), + svstnt1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** stnt1_s32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_m36, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0 - svcntw () * 36, z0), + svstnt1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** stnt1_s32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z17, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z22, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s32_z28: +** stnt1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_z28, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn0, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn7, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s32_pn15: +** stnt1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s32_pn15, svint32x4_t, int32_t, + svstnt1_s32_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s32_0: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_3, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_s32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_4, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_s32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_28, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_s32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_32, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m3, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_s32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m4, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_s32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m32, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_s32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_m36, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_s32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x4_t, int32_t, + svstnt1_vnum_s32_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..79668a44601a277fc8ff9be11dee9fe4b33633c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s64_base: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_base, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s64_index: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_index, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_1, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* +** stnt1_s64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_2, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** stnt1_s64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_14, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 + svcntd () * 14, z0), + svstnt1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_16, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 + svcntd () * 16, z0), + svstnt1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m1, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* +** stnt1_s64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m2, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** stnt1_s64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m16, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 - svcntd () * 16, z0), + svstnt1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** stnt1_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m18, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0 - svcntd () * 18, z0), + svstnt1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** stnt1_s64_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z17, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s64_z22: +** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z22, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s64_z28: +** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z28, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn0, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn7, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s64_pn15: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn15, svint64x2_t, int64_t, + svstnt1_s64_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s64_0: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_s64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_s64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_14, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_16, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_s64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_s64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m16, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_s64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m18, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x2_t, int64_t, + svstnt1_vnum_s64_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0e5f6b487868c66aa6119f35aa9a5972fc97268f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s64_base: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_base, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s64_index: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_index, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_1, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_2, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_3, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd () * 3, z0), + svstnt1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** stnt1_s64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_4, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd () * 4, z0), + svstnt1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** stnt1_s64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_28, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd () * 28, z0), + svstnt1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** stnt1_s64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_32, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 + svcntd () * 32, z0), + svstnt1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m1, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m2, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m3, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd () * 3, z0), + svstnt1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** stnt1_s64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m4, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd () * 4, z0), + svstnt1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** stnt1_s64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m32, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd () * 32, z0), + svstnt1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** stnt1_s64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_m36, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0 - svcntd () * 36, z0), + svstnt1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** stnt1_s64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z17, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z22, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s64_z28: +** stnt1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_z28, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn0, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn7, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s64_pn15: +** stnt1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s64_pn15, svint64x4_t, int64_t, + svstnt1_s64_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s64_0: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_3, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_s64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_4, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_s64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_28, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_s64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_32, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m3, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_s64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m4, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_s64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m32, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_s64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_m36, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_s64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x4_t, int64_t, + svstnt1_vnum_s64_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5b243cda218e9628e0de1d4613740cf41cede68f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s8_base: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_base, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s8_index: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_index, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_1, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* +** stnt1_s8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_2, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** stnt1_s8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_14, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 + svcntb () * 14, z0), + svstnt1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_16, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 + svcntb () * 16, z0), + svstnt1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m1, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* +** stnt1_s8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m2, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** stnt1_s8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m16, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 - svcntb () * 16, z0), + svstnt1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** stnt1_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m18, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0 - svcntb () * 18, z0), + svstnt1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** stnt1_s8_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z17, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s8_z22: +** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z22, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s8_z28: +** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z28, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn0, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn7, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s8_pn15: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn15, svint8x2_t, int8_t, + svstnt1_s8_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s8_0: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_s8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_s8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_14, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_16, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_s8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_s8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m16, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_s8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m18, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x2_t, int8_t, + svstnt1_vnum_s8_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f96615449e2513e327da8c4a2f75c509d7c76f67 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_s8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_s8_base: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_base, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_s8_index: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_index, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_1, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_2, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_3, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb () * 3, z0), + svstnt1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** stnt1_s8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_4, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb () * 4, z0), + svstnt1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** stnt1_s8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_28, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb () * 28, z0), + svstnt1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** stnt1_s8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_32, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 + svcntb () * 32, z0), + svstnt1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m1, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m2, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_s8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m3, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb () * 3, z0), + svstnt1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** stnt1_s8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m4, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb () * 4, z0), + svstnt1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** stnt1_s8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m32, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb () * 32, z0), + svstnt1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** stnt1_s8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_m36, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0 - svcntb () * 36, z0), + svstnt1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** stnt1_s8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z17, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_s8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z22, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_s8_z28: +** stnt1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_z28, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_s8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn0, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_s8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn7, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_s8_pn15: +** stnt1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_s8_pn15, svint8x4_t, int8_t, + svstnt1_s8_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_s8_0: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_3, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_s8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_4, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_s8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_28, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_s8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_32, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_s8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m3, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_s8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m4, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_s8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m32, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_s8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_m36, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_s8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x4_t, int8_t, + svstnt1_vnum_s8_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ec3387ede10e12ad07fac2213eedbcbe3692af7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u16_base: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_base, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u16_index: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_index, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_1, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* +** stnt1_u16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_2, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* +** stnt1_u16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_14, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 + svcnth () * 14, z0), + svstnt1 (pn8, x0 + svcnth () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_16, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 + svcnth () * 16, z0), + svstnt1 (pn8, x0 + svcnth () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m1, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* +** stnt1_u16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m2, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* +** stnt1_u16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m16, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 - svcnth () * 16, z0), + svstnt1 (pn8, x0 - svcnth () * 16, z0)) + +/* +** stnt1_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m18, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0 - svcnth () * 18, z0), + svstnt1 (pn8, x0 - svcnth () * 18, z0)) + +/* +** stnt1_u16_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z17, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u16_z22: +** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z22, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u16_z28: +** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z28, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u16_pn15: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x2_t, uint16_t, + svstnt1_u16_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u16_0: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_1: +** incb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_u16_2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_u16_14: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_14, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_16: +** incb x0, all, mul #16 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_16, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_m1: +** decb x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_u16_m2: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_u16_m16: +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m16, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_u16_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m18, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x2_t, uint16_t, + svstnt1_vnum_u16_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0ce35c0cd20104e1afad9ff302efa777c0ebf18e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u16_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u16_base: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_base, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u16_index: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_index, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_1, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth (), z0), + svstnt1 (pn8, x0 + svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_2, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth () * 2, z0), + svstnt1 (pn8, x0 + svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_3, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth () * 3, z0), + svstnt1 (pn8, x0 + svcnth () * 3, z0)) + +/* +** stnt1_u16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_4, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth () * 4, z0), + svstnt1 (pn8, x0 + svcnth () * 4, z0)) + +/* +** stnt1_u16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_28, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth () * 28, z0), + svstnt1 (pn8, x0 + svcnth () * 28, z0)) + +/* +** stnt1_u16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_32, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 + svcnth () * 32, z0), + svstnt1 (pn8, x0 + svcnth () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m1, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth (), z0), + svstnt1 (pn8, x0 - svcnth (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m2, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth () * 2, z0), + svstnt1 (pn8, x0 - svcnth () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m3, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth () * 3, z0), + svstnt1 (pn8, x0 - svcnth () * 3, z0)) + +/* +** stnt1_u16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m4, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth () * 4, z0), + svstnt1 (pn8, x0 - svcnth () * 4, z0)) + +/* +** stnt1_u16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m32, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth () * 32, z0), + svstnt1 (pn8, x0 - svcnth () * 32, z0)) + +/* +** stnt1_u16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_m36, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0 - svcnth () * 36, z0), + svstnt1 (pn8, x0 - svcnth () * 36, z0)) + +/* +** stnt1_u16_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z17, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u16_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1h {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z22, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u16_z28: +** stnt1h {z28\.h - z31\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_z28, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u16_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u16_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u16_pn15: +** stnt1h {z0\.h - z3\.h}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x4_t, uint16_t, + svstnt1_u16_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u16_0: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_1: +** incb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_2: +** incb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_3: +** incb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_3, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_u16_4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_4, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_u16_28: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_28, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_u16_32: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_32, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_m1: +** decb x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_m2: +** decb x0, all, mul #2 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u16_m3: +** decb x0, all, mul #3 +** stnt1h {z0\.h - z3\.h}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m3, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_u16_m4: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m4, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_u16_m32: +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m32, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_u16_m36: +** [^{]* +** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_m36, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_u16_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1h {z0\.h - z3\.h}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x4_t, uint16_t, + svstnt1_vnum_u16_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e185e6df07eca034a5630acda897d850cf698eaa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u32_base: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_base, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u32_index: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_index, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_1, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* +** stnt1_u32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_2, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* +** stnt1_u32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_14, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 + svcntw () * 14, z0), + svstnt1 (pn8, x0 + svcntw () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_16, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 + svcntw () * 16, z0), + svstnt1 (pn8, x0 + svcntw () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m1, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* +** stnt1_u32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m2, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* +** stnt1_u32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m16, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 - svcntw () * 16, z0), + svstnt1 (pn8, x0 - svcntw () * 16, z0)) + +/* +** stnt1_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m18, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0 - svcntw () * 18, z0), + svstnt1 (pn8, x0 - svcntw () * 18, z0)) + +/* +** stnt1_u32_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z17, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u32_z22: +** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z22, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u32_z28: +** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z28, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u32_pn15: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x2_t, uint32_t, + svstnt1_u32_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u32_0: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_1: +** incb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_u32_2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_u32_14: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_14, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_16: +** incb x0, all, mul #16 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_16, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_m1: +** decb x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_u32_m2: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_u32_m16: +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m16, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_u32_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m18, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x2_t, uint32_t, + svstnt1_vnum_u32_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3c77f3ac48a7b1360a5a1678826bca36308391bb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u32_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u32_base: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_base, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u32_index: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_index, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_1, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw (), z0), + svstnt1 (pn8, x0 + svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_2, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw () * 2, z0), + svstnt1 (pn8, x0 + svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_3, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw () * 3, z0), + svstnt1 (pn8, x0 + svcntw () * 3, z0)) + +/* +** stnt1_u32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_4, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw () * 4, z0), + svstnt1 (pn8, x0 + svcntw () * 4, z0)) + +/* +** stnt1_u32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_28, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw () * 28, z0), + svstnt1 (pn8, x0 + svcntw () * 28, z0)) + +/* +** stnt1_u32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_32, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 + svcntw () * 32, z0), + svstnt1 (pn8, x0 + svcntw () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m1, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw (), z0), + svstnt1 (pn8, x0 - svcntw (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m2, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw () * 2, z0), + svstnt1 (pn8, x0 - svcntw () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m3, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw () * 3, z0), + svstnt1 (pn8, x0 - svcntw () * 3, z0)) + +/* +** stnt1_u32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m4, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw () * 4, z0), + svstnt1 (pn8, x0 - svcntw () * 4, z0)) + +/* +** stnt1_u32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m32, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw () * 32, z0), + svstnt1 (pn8, x0 - svcntw () * 32, z0)) + +/* +** stnt1_u32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_m36, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0 - svcntw () * 36, z0), + svstnt1 (pn8, x0 - svcntw () * 36, z0)) + +/* +** stnt1_u32_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z17, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u32_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1w {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z22, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u32_z28: +** stnt1w {z28\.s - z31\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_z28, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u32_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u32_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u32_pn15: +** stnt1w {z0\.s - z3\.s}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x4_t, uint32_t, + svstnt1_u32_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u32_0: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_1: +** incb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_2: +** incb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_3: +** incb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_3, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_u32_4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_4, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_u32_28: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_28, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_u32_32: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_32, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_m1: +** decb x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_m2: +** decb x0, all, mul #2 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u32_m3: +** decb x0, all, mul #3 +** stnt1w {z0\.s - z3\.s}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m3, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_u32_m4: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m4, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_u32_m32: +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m32, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_u32_m36: +** [^{]* +** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_m36, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_u32_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1w {z0\.s - z3\.s}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x4_t, uint32_t, + svstnt1_vnum_u32_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f23c484cafe12716725f518fb6af3e3b570a7ae9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u64_base: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_base, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u64_index: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_index, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_1, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* +** stnt1_u64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_2, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* +** stnt1_u64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_14, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 + svcntd () * 14, z0), + svstnt1 (pn8, x0 + svcntd () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_16, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 + svcntd () * 16, z0), + svstnt1 (pn8, x0 + svcntd () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m1, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* +** stnt1_u64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m2, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* +** stnt1_u64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m16, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 - svcntd () * 16, z0), + svstnt1 (pn8, x0 - svcntd () * 16, z0)) + +/* +** stnt1_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m18, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0 - svcntd () * 18, z0), + svstnt1 (pn8, x0 - svcntd () * 18, z0)) + +/* +** stnt1_u64_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z17, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u64_z22: +** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z22, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u64_z28: +** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z28, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u64_pn15: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x2_t, uint64_t, + svstnt1_u64_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u64_0: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_1: +** incb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_u64_2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_u64_14: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_14, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_16: +** incb x0, all, mul #16 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_16, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_m1: +** decb x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_u64_m2: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_u64_m16: +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m16, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_u64_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m18, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x2_t, uint64_t, + svstnt1_vnum_u64_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..aaef67edbea011ce8edd6322f6c86873ee77eea9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u64_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u64_base: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_base, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u64_index: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_index, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_1, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd (), z0), + svstnt1 (pn8, x0 + svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_2, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd () * 2, z0), + svstnt1 (pn8, x0 + svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_3, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd () * 3, z0), + svstnt1 (pn8, x0 + svcntd () * 3, z0)) + +/* +** stnt1_u64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_4, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd () * 4, z0), + svstnt1 (pn8, x0 + svcntd () * 4, z0)) + +/* +** stnt1_u64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_28, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd () * 28, z0), + svstnt1 (pn8, x0 + svcntd () * 28, z0)) + +/* +** stnt1_u64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_32, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 + svcntd () * 32, z0), + svstnt1 (pn8, x0 + svcntd () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m1, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd (), z0), + svstnt1 (pn8, x0 - svcntd (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m2, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd () * 2, z0), + svstnt1 (pn8, x0 - svcntd () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m3, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd () * 3, z0), + svstnt1 (pn8, x0 - svcntd () * 3, z0)) + +/* +** stnt1_u64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m4, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd () * 4, z0), + svstnt1 (pn8, x0 - svcntd () * 4, z0)) + +/* +** stnt1_u64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m32, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd () * 32, z0), + svstnt1 (pn8, x0 - svcntd () * 32, z0)) + +/* +** stnt1_u64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_m36, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0 - svcntd () * 36, z0), + svstnt1 (pn8, x0 - svcntd () * 36, z0)) + +/* +** stnt1_u64_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z17, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u64_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1d {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z22, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u64_z28: +** stnt1d {z28\.d - z31\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_z28, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u64_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u64_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u64_pn15: +** stnt1d {z0\.d - z3\.d}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x4_t, uint64_t, + svstnt1_u64_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u64_0: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_1: +** incb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_2: +** incb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_3: +** incb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_3, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_u64_4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_4, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_u64_28: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_28, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_u64_32: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_32, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_m1: +** decb x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_m2: +** decb x0, all, mul #2 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u64_m3: +** decb x0, all, mul #3 +** stnt1d {z0\.d - z3\.d}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m3, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_u64_m4: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m4, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_u64_m32: +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m32, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_u64_m36: +** [^{]* +** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_m36, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_u64_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1d {z0\.d - z3\.d}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x4_t, uint64_t, + svstnt1_vnum_u64_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..5431aaa27a6f313d403e4c96fe14a7993754c4d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x2.c @@ -0,0 +1,262 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u8_base: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_base, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u8_index: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_index, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_1, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* +** stnt1_u8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_2, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* +** stnt1_u8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_14, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 + svcntb () * 14, z0), + svstnt1 (pn8, x0 + svcntb () * 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_16, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 + svcntb () * 16, z0), + svstnt1 (pn8, x0 + svcntb () * 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m1, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* +** stnt1_u8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m2, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* +** stnt1_u8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m16, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 - svcntb () * 16, z0), + svstnt1 (pn8, x0 - svcntb () * 16, z0)) + +/* +** stnt1_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m18, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0 - svcntb () * 18, z0), + svstnt1 (pn8, x0 - svcntb () * 18, z0)) + +/* +** stnt1_u8_z17: +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z17, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u8_z22: +** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z22, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u8_z28: +** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z28, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u8_pn15: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x2_t, uint8_t, + svstnt1_u8_x2 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u8_0: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_1: +** incb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* +** stnt1_vnum_u8_2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* +** stnt1_vnum_u8_14: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_14, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, 14, z0), + svstnt1_vnum (pn8, x0, 14, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_16: +** incb x0, all, mul #16 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_16, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, 16, z0), + svstnt1_vnum (pn8, x0, 16, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_m1: +** decb x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* +** stnt1_vnum_u8_m2: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* +** stnt1_vnum_u8_m16: +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m16, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, -16, z0), + svstnt1_vnum (pn8, x0, -16, z0)) + +/* +** stnt1_vnum_u8_m18: +** addvl (x[0-9]+), x0, #-18 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m18, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, -18, z0), + svstnt1_vnum (pn8, x0, -18, z0)) + +/* +** stnt1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x2_t, uint8_t, + svstnt1_vnum_u8_x2 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fcf3f634a71cf7a280b417764b6777330ba89dcc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_u8_x4.c @@ -0,0 +1,354 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + +#include "test_sme2_acle.h" + +/* +** stnt1_u8_base: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_base, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0, z0), + svstnt1 (pn8, x0, z0)) + +/* +** stnt1_u8_index: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_index, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + x1, z0), + svstnt1 (pn8, x0 + x1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_1, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb (), z0), + svstnt1 (pn8, x0 + svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_2, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb () * 2, z0), + svstnt1 (pn8, x0 + svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_3, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb () * 3, z0), + svstnt1 (pn8, x0 + svcntb () * 3, z0)) + +/* +** stnt1_u8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_4, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb () * 4, z0), + svstnt1 (pn8, x0 + svcntb () * 4, z0)) + +/* +** stnt1_u8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_28, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb () * 28, z0), + svstnt1 (pn8, x0 + svcntb () * 28, z0)) + +/* +** stnt1_u8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_32, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 + svcntb () * 32, z0), + svstnt1 (pn8, x0 + svcntb () * 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m1, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb (), z0), + svstnt1 (pn8, x0 - svcntb (), z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m2, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb () * 2, z0), + svstnt1 (pn8, x0 - svcntb () * 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_u8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m3, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb () * 3, z0), + svstnt1 (pn8, x0 - svcntb () * 3, z0)) + +/* +** stnt1_u8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m4, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb () * 4, z0), + svstnt1 (pn8, x0 - svcntb () * 4, z0)) + +/* +** stnt1_u8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m32, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb () * 32, z0), + svstnt1 (pn8, x0 - svcntb () * 32, z0)) + +/* +** stnt1_u8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_m36, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0 - svcntb () * 36, z0), + svstnt1 (pn8, x0 - svcntb () * 36, z0)) + +/* +** stnt1_u8_z17: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z17, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0, z17), + svstnt1 (pn8, x0, z17)) + +/* +** stnt1_u8_z22: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** stnt1b {z[^\n]+}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z22, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0, z22), + svstnt1 (pn8, x0, z22)) + +/* +** stnt1_u8_z28: +** stnt1b {z28\.b - z31\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_z28, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn8, x0, z28), + svstnt1 (pn8, x0, z28)) + +/* +** stnt1_u8_pn0: +** mov p([89]|1[0-5])\.b, p0\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn0, x0, z0), + svstnt1 (pn0, x0, z0)) + +/* +** stnt1_u8_pn7: +** mov p([89]|1[0-5])\.b, p7\.b +** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn7, x0, z0), + svstnt1 (pn7, x0, z0)) + +/* +** stnt1_u8_pn15: +** stnt1b {z0\.b - z3\.b}, pn15, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x4_t, uint8_t, + svstnt1_u8_x4 (pn15, x0, z0), + svstnt1 (pn15, x0, z0)) + +/* +** stnt1_vnum_u8_0: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 0, z0), + svstnt1_vnum (pn8, x0, 0, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_1: +** incb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 1, z0), + svstnt1_vnum (pn8, x0, 1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_2: +** incb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 2, z0), + svstnt1_vnum (pn8, x0, 2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_3: +** incb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_3, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 3, z0), + svstnt1_vnum (pn8, x0, 3, z0)) + +/* +** stnt1_vnum_u8_4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_4, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 4, z0), + svstnt1_vnum (pn8, x0, 4, z0)) + +/* +** stnt1_vnum_u8_28: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_28, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 28, z0), + svstnt1_vnum (pn8, x0, 28, z0)) + +/* +** stnt1_vnum_u8_32: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_32, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, 32, z0), + svstnt1_vnum (pn8, x0, 32, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_m1: +** decb x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -1, z0), + svstnt1_vnum (pn8, x0, -1, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_m2: +** decb x0, all, mul #2 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -2, z0), + svstnt1_vnum (pn8, x0, -2, z0)) + +/* Moving the constant into a register would also be OK. */ +/* +** stnt1_vnum_u8_m3: +** decb x0, all, mul #3 +** stnt1b {z0\.b - z3\.b}, pn8, \[x0\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m3, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -3, z0), + svstnt1_vnum (pn8, x0, -3, z0)) + +/* +** stnt1_vnum_u8_m4: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m4, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -4, z0), + svstnt1_vnum (pn8, x0, -4, z0)) + +/* +** stnt1_vnum_u8_m32: +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m32, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -32, z0), + svstnt1_vnum (pn8, x0, -32, z0)) + +/* +** stnt1_vnum_u8_m36: +** [^{]* +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\] +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_m36, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, -36, z0), + svstnt1_vnum (pn8, x0, -36, z0)) + +/* +** stnt1_vnum_u8_x1: +** cntb (x[0-9]+) +** ( +** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 +** stnt1b {z0\.b - z3\.b}, pn8, \[\2\] +** | +** mul (x[0-9]+), (?:x1, \1|\1, x1) +** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\] +** ) +** ret +*/ +TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x4_t, uint8_t, + svstnt1_vnum_u8_x4 (pn8, x0, x1, z0), + svstnt1_vnum (pn8, x0, x1, z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c new file mode 100644 index 0000000000000000000000000000000000000000..c8ecacb10a01db4d98be08f28741f8ec06f17f5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/str_zt.c @@ -0,0 +1,36 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#define STREAMING_COMPATIBLE +#define SHARED_ZT0 +#include "test_sme2_acle.h" + +/* +** str_zt0_x0: +** str zt0, \[x0\] +** ret +*/ +PROTO (str_zt0_x0, void, (char *x0)) { svstr_zt (0, x0); } + +/* +** str_zt0_x0p1: +** add (x[0-9]+), x0, #?1 +** str zt0, \[\1\] +** ret +*/ +PROTO (str_zt0_x0p1, void, (char *x0)) { svstr_zt (0, x0 + 1); } + +/* +** str_zt0_x0p64: +** add (x[0-9]+), x0, #?64 +** str zt0, \[\1\] +** ret +*/ +PROTO (str_zt0_x0p64, void, (char *x0)) { svstr_zt (0, x0 + 64); } + +/* +** str_zt0_x0_vl1: +** incb x0 +** str zt0, \[x0\] +** ret +*/ +PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntb()); } diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..75b42055dd81cc660f8fa018a35adb7fc90693e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svint32x2_t, + svsub_write_za32_s32_vg1x2 (0, z0, z0), + svsub_write_za32_vg1x2 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w0, z0, z0), + svsub_write_za32_vg1x2 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8, z0, z4), + svsub_write_za32_vg1x2 (w8, z0, z4)) + +/* +** sub_write_w8_z4_z18: +** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z18, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8, z4, z18), + svsub_write_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z23_z0: +** ... +** sub za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z23_z0, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8, z23, z0), + svsub_write_za32_vg1x2 (w8, z23, z0)) + +/* +** sub_write_w8_z18_z23: +** ... +** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z23, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8, z18, z23), + svsub_write_za32_vg1x2 (w8, z18, z23)) + +/* +** sub_write_w8_z4_z28: +** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z28, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8, z4, z28), + svsub_write_za32_vg1x2 (w8, z4, z28)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8 + 7, z4, z0), + svsub_write_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8 + 8, z4, z4), + svsub_write_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svint32x2_t, + svsub_write_za32_s32_vg1x2 (w8 - 1, z4, z0), + svsub_write_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (0, z1, z0), + svsub_write_za32_vg1x2 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w0, z1, z0), + svsub_write_za32_vg1x2 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w8, z1, z0), + svsub_write_za32_vg1x2 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w8 + 7, z1, z0), + svsub_write_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w8 + 8, z1, z0), + svsub_write_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w0 - 1, z1, z0), + svsub_write_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w8, z0, z15), + svsub_write_za32_vg1x2 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint32x2_t, svint32_t, + svsub_write_single_za32_s32_vg1x2 (w8, z20, z16), + svsub_write_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9ef49ab07fc4c3eefd35704c413d3c35414b1587 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_s32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svint32x4_t, + svsub_write_za32_s32_vg1x4 (0, z0, z0), + svsub_write_za32_vg1x4 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w0, z0, z0), + svsub_write_za32_vg1x4 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8, z0, z4), + svsub_write_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z0_z18: +** ... +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z18, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8, z0, z18), + svsub_write_za32_vg1x4 (w8, z0, z18)) + +/* +** sub_write_w8_z18_z28: +** ... +** sub za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z28, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8, z18, z28), + svsub_write_za32_vg1x4 (w8, z18, z28)) + +/* +** sub_write_w8_z28_z23: +** ... +** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z28_z23, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8, z28, z23), + svsub_write_za32_vg1x4 (w8, z28, z23)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8 + 7, z4, z0), + svsub_write_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8 + 8, z4, z4), + svsub_write_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svint32x4_t, + svsub_write_za32_s32_vg1x4 (w8 - 1, z4, z0), + svsub_write_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (0, z1, z0), + svsub_write_za32_vg1x4 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w0, z1, z0), + svsub_write_za32_vg1x4 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w8, z1, z0), + svsub_write_za32_vg1x4 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w8 + 7, z1, z0), + svsub_write_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w8 + 8, z1, z0), + svsub_write_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w0 - 1, z1, z0), + svsub_write_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w8, z0, z15), + svsub_write_za32_vg1x4 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint32x4_t, svint32_t, + svsub_write_single_za32_s32_vg1x4 (w8, z20, z16), + svsub_write_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c655e46231733a42e10607879c3cc47fc97934c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x2.c @@ -0,0 +1,180 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (0, z0, z0), + svsub_write_za32_vg1x2 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w0, z0, z0), + svsub_write_za32_vg1x2 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8, z0, z4), + svsub_write_za32_vg1x2 (w8, z0, z4)) + +/* +** sub_write_w8_z4_z18: +** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z18, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8, z4, z18), + svsub_write_za32_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z23_z0: +** ... +** sub za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z23_z0, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8, z23, z0), + svsub_write_za32_vg1x2 (w8, z23, z0)) + +/* +** sub_write_w8_z18_z23: +** ... +** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z23, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8, z18, z23), + svsub_write_za32_vg1x2 (w8, z18, z23)) + +/* +** sub_write_w8_z4_z28: +** sub za\.s\[w8, 0, vgx2\], {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z28, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8, z4, z28), + svsub_write_za32_vg1x2 (w8, z4, z28)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.s\[w8, 7, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8 + 7, z4, z0), + svsub_write_za32_vg1x2 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8 + 8, z4, z4), + svsub_write_za32_vg1x2 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx2\], {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint32x2_t, + svsub_write_za32_u32_vg1x2 (w8 - 1, z4, z0), + svsub_write_za32_vg1x2 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (0, z1, z0), + svsub_write_za32_vg1x2 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w0, z1, z0), + svsub_write_za32_vg1x2 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.s\[w8, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w8, z1, z0), + svsub_write_za32_vg1x2 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.s\[w8, 7, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w8 + 7, z1, z0), + svsub_write_za32_vg1x2 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w8 + 8, z1, z0), + svsub_write_za32_vg1x2 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.s\[\1, 0, vgx2\], {z1\.s - z2\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w0 - 1, z1, z0), + svsub_write_za32_vg1x2 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w8, z0, z15), + svsub_write_za32_vg1x2 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.s\[w8, 0, vgx2\], {z20\.s - z21\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint32x2_t, svuint32_t, + svsub_write_single_za32_u32_vg1x2 (w8, z20, z16), + svsub_write_za32_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..51f5a4d5a63f2617e38934911bda47d66db7a821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za32_u32_vg1x4.c @@ -0,0 +1,172 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (0, z0, z0), + svsub_write_za32_vg1x4 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w0, z0, z0), + svsub_write_za32_vg1x4 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8, z0, z4), + svsub_write_za32_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z0_z18: +** ... +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z18, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8, z0, z18), + svsub_write_za32_vg1x4 (w8, z0, z18)) + +/* +** sub_write_w8_z18_z28: +** ... +** sub za\.s\[w8, 0, vgx4\], [^\n]+, {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z28, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8, z18, z28), + svsub_write_za32_vg1x4 (w8, z18, z28)) + +/* +** sub_write_w8_z28_z23: +** ... +** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z28_z23, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8, z28, z23), + svsub_write_za32_vg1x4 (w8, z28, z23)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.s\[w8, 7, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8 + 7, z4, z0), + svsub_write_za32_vg1x4 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8 + 8, z4, z4), + svsub_write_za32_vg1x4 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx4\], {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint32x4_t, + svsub_write_za32_u32_vg1x4 (w8 - 1, z4, z0), + svsub_write_za32_vg1x4 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (0, z1, z0), + svsub_write_za32_vg1x4 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w0, z1, z0), + svsub_write_za32_vg1x4 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.s\[w8, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w8, z1, z0), + svsub_write_za32_vg1x4 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.s\[w8, 7, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w8 + 7, z1, z0), + svsub_write_za32_vg1x4 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w8 + 8, z1, z0), + svsub_write_za32_vg1x4 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.s\[\1, 0, vgx4\], {z1\.s - z4\.s}, z0\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w0 - 1, z1, z0), + svsub_write_za32_vg1x4 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s}, z15\.s +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w8, z0, z15), + svsub_write_za32_vg1x4 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.s\[w8, 0, vgx4\], {z20\.s - z23\.s}, \1\.s +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint32x4_t, svuint32_t, + svsub_write_single_za32_u32_vg1x4 (w8, z20, z16), + svsub_write_za32_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..db3ec8a6c0b8b253c88b0c199dc7597c656d05ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svint64x2_t, + svsub_write_za64_s64_vg1x2 (0, z0, z0), + svsub_write_za64_vg1x2 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w0, z0, z0), + svsub_write_za64_vg1x2 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8, z0, z4), + svsub_write_za64_vg1x2 (w8, z0, z4)) + +/* +** sub_write_w8_z4_z18: +** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z18, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8, z4, z18), + svsub_write_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z23_z0: +** ... +** sub za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z23_z0, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8, z23, z0), + svsub_write_za64_vg1x2 (w8, z23, z0)) + +/* +** sub_write_w8_z18_z23: +** ... +** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z23, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8, z18, z23), + svsub_write_za64_vg1x2 (w8, z18, z23)) + +/* +** sub_write_w8_z4_z28: +** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z28, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8, z4, z28), + svsub_write_za64_vg1x2 (w8, z4, z28)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8 + 7, z4, z0), + svsub_write_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8 + 8, z4, z4), + svsub_write_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svint64x2_t, + svsub_write_za64_s64_vg1x2 (w8 - 1, z4, z0), + svsub_write_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (0, z1, z0), + svsub_write_za64_vg1x2 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w0, z1, z0), + svsub_write_za64_vg1x2 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w8, z1, z0), + svsub_write_za64_vg1x2 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w8 + 7, z1, z0), + svsub_write_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w8 + 8, z1, z0), + svsub_write_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w0 - 1, z1, z0), + svsub_write_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w8, z0, z15), + svsub_write_za64_vg1x2 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint64x2_t, svint64_t, + svsub_write_single_za64_s64_vg1x2 (w8, z20, z16), + svsub_write_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..13b2890b71cf6432308e13e0a452af87804ef685 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_s64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svint64x4_t, + svsub_write_za64_s64_vg1x4 (0, z0, z0), + svsub_write_za64_vg1x4 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w0, z0, z0), + svsub_write_za64_vg1x4 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8, z0, z4), + svsub_write_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z0_z18: +** ... +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z18, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8, z0, z18), + svsub_write_za64_vg1x4 (w8, z0, z18)) + +/* +** sub_write_w8_z18_z28: +** ... +** sub za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z28, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8, z18, z28), + svsub_write_za64_vg1x4 (w8, z18, z28)) + +/* +** sub_write_w8_z28_z23: +** ... +** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z28_z23, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8, z28, z23), + svsub_write_za64_vg1x4 (w8, z28, z23)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8 + 7, z4, z0), + svsub_write_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8 + 8, z4, z4), + svsub_write_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svint64x4_t, + svsub_write_za64_s64_vg1x4 (w8 - 1, z4, z0), + svsub_write_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (0, z1, z0), + svsub_write_za64_vg1x4 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w0, z1, z0), + svsub_write_za64_vg1x4 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w8, z1, z0), + svsub_write_za64_vg1x4 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w8 + 7, z1, z0), + svsub_write_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w8 + 8, z1, z0), + svsub_write_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w0 - 1, z1, z0), + svsub_write_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w8, z0, z15), + svsub_write_za64_vg1x4 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svint64x4_t, svint64_t, + svsub_write_single_za64_s64_vg1x4 (w8, z20, z16), + svsub_write_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..32672b56ba29b2e42c460aff38cb34c01eae6224 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x2.c @@ -0,0 +1,182 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (0, z0, z0), + svsub_write_za64_vg1x2 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w0, z0, z0), + svsub_write_za64_vg1x2 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8, z0, z4), + svsub_write_za64_vg1x2 (w8, z0, z4)) + +/* +** sub_write_w8_z4_z18: +** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z18, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8, z4, z18), + svsub_write_za64_vg1x2 (w8, z4, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z23_z0: +** ... +** sub za\.d\[w8, 0, vgx2\], [^\n]+, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z23_z0, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8, z23, z0), + svsub_write_za64_vg1x2 (w8, z23, z0)) + +/* +** sub_write_w8_z18_z23: +** ... +** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z23, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8, z18, z23), + svsub_write_za64_vg1x2 (w8, z18, z23)) + +/* +** sub_write_w8_z4_z28: +** sub za\.d\[w8, 0, vgx2\], {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z4_z28, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8, z4, z28), + svsub_write_za64_vg1x2 (w8, z4, z28)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.d\[w8, 7, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8 + 7, z4, z0), + svsub_write_za64_vg1x2 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8 + 8, z4, z4), + svsub_write_za64_vg1x2 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx2\], {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint64x2_t, + svsub_write_za64_u64_vg1x2 (w8 - 1, z4, z0), + svsub_write_za64_vg1x2 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (0, z1, z0), + svsub_write_za64_vg1x2 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w0, z1, z0), + svsub_write_za64_vg1x2 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.d\[w8, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w8, z1, z0), + svsub_write_za64_vg1x2 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.d\[w8, 7, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w8 + 7, z1, z0), + svsub_write_za64_vg1x2 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w8 + 8, z1, z0), + svsub_write_za64_vg1x2 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.d\[\1, 0, vgx2\], {z1\.d - z2\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w0 - 1, z1, z0), + svsub_write_za64_vg1x2 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w8, z0, z15), + svsub_write_za64_vg1x2 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.d\[w8, 0, vgx2\], {z20\.d - z21\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint64x2_t, svuint64_t, + svsub_write_single_za64_u64_vg1x2 (w8, z20, z16), + svsub_write_za64_vg1x2 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..84b54104d85a299917aecc7b1cad4b8ff2f3122e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_write_za64_u64_vg1x4.c @@ -0,0 +1,174 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_write_0_z0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_0_z0_z0, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (0, z0, z0), + svsub_write_za64_vg1x4 (0, z0, z0)) + +/* +** sub_write_w0_z0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w0_z0_z0, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w0, z0, z0), + svsub_write_za64_vg1x4 (w0, z0, z0)) + +/* +** sub_write_w8_z0_z4: +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z4, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8, z0, z4), + svsub_write_za64_vg1x4 (w8, z0, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_write_w8_z0_z18: +** ... +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z0_z18, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8, z0, z18), + svsub_write_za64_vg1x4 (w8, z0, z18)) + +/* +** sub_write_w8_z18_z28: +** ... +** sub za\.d\[w8, 0, vgx4\], [^\n]+, {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8_z18_z28, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8, z18, z28), + svsub_write_za64_vg1x4 (w8, z18, z28)) + +/* +** sub_write_w8_z28_z23: +** ... +** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_write_w8_z28_z23, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8, z28, z23), + svsub_write_za64_vg1x4 (w8, z28, z23)) + +/* +** sub_write_w8p7_z4_z0: +** sub za\.d\[w8, 7, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p7_z4_z0, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8 + 7, z4, z0), + svsub_write_za64_vg1x4 (w8 + 7, z4, z0)) + +/* +** sub_write_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8p8_z4_z4, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8 + 8, z4, z4), + svsub_write_za64_vg1x4 (w8 + 8, z4, z4)) + +/* +** sub_write_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx4\], {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_write_w8m1_z4_z0, svuint64x4_t, + svsub_write_za64_u64_vg1x4 (w8 - 1, z4, z0), + svsub_write_za64_vg1x4 (w8 - 1, z4, z0)) + +/* +** sub_write_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_0_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (0, z1, z0), + svsub_write_za64_vg1x4 (0, z1, z0)) + +/* +** sub_write_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w0, z1, z0), + svsub_write_za64_vg1x4 (w0, z1, z0)) + +/* +** sub_write_single_w8_z1_z0: +** sub za\.d\[w8, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w8, z1, z0), + svsub_write_za64_vg1x4 (w8, z1, z0)) + +/* +** sub_write_single_w8p7_z1_z0: +** sub za\.d\[w8, 7, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p7_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w8 + 7, z1, z0), + svsub_write_za64_vg1x4 (w8 + 7, z1, z0)) + +/* +** sub_write_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8p8_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w8 + 8, z1, z0), + svsub_write_za64_vg1x4 (w8 + 8, z1, z0)) + +/* +** sub_write_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sub za\.d\[\1, 0, vgx4\], {z1\.d - z4\.d}, z0\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w0m1_z1_z0, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w0 - 1, z1, z0), + svsub_write_za64_vg1x4 (w0 - 1, z1, z0)) + +/* +** sub_write_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d}, z15\.d +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (sub_write_single_w8_z0_z15, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w8, z0, z15), + svsub_write_za64_vg1x4 (w8, z0, z15)) + +/* +** sub_write_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sub za\.d\[w8, 0, vgx4\], {z20\.d - z23\.d}, \1\.d +** ret +*/ +TEST_ZA_SINGLE (sub_write_single_w8_z20_z16, svuint64x4_t, svuint64_t, + svsub_write_single_za64_u64_vg1x4 (w8, z20, z16), + svsub_write_za64_vg1x4 (w8, z20, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..0d88996d5f5f5b9940afe9ecba0fb37d710f8bf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (0, z0), + svsub_za32_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w0, z0), + svsub_za32_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w7, z0), + svsub_za32_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8, z0), + svsub_za32_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w11, z0), + svsub_za32_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w12, z0), + svsub_za32_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8 + 7, z0), + svsub_za32_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8 + 8, z0), + svsub_za32_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8 - 1, z0), + svsub_za32_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** fsub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8, z18), + svsub_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8, z23), + svsub_za32_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat32x2_t, + svsub_za32_f32_vg1x2 (w8, z28), + svsub_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f2ad7ff7e75c0edb65928194cd65ae1517e4bc8e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_f32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (0, z0), + svsub_za32_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w0, z0), + svsub_za32_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w7, z0), + svsub_za32_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8, z0), + svsub_za32_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w11, z0), + svsub_za32_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w12, z0), + svsub_za32_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8 + 7, z0), + svsub_za32_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8 + 8, z0), + svsub_za32_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8 - 1, z0), + svsub_za32_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** fsub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8, z4), + svsub_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8, z18), + svsub_za32_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8, z23), + svsub_za32_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat32x4_t, + svsub_za32_f32_vg1x4 (w8, z28), + svsub_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..f941c43c0f6d253d334e2d4b8eaedc66954b18d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (0, z0), + svsub_za32_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w0, z0), + svsub_za32_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w7, z0), + svsub_za32_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w8, z0), + svsub_za32_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w11, z0), + svsub_za32_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w12, z0), + svsub_za32_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w8 + 7, z0), + svsub_za32_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w8 + 8, z0), + svsub_za32_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svint32x2_t, + svsub_za32_s32_vg1x2 (w8 - 1, z0), + svsub_za32_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svint32x2_t, + svsub_za32_s32_vg1x2 (w8, z18), + svsub_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svint32x2_t, + svsub_za32_s32_vg1x2 (w8, z23), + svsub_za32_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svint32x2_t, + svsub_za32_s32_vg1x2 (w8, z28), + svsub_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..798159c1cf5740cc688de5620e577194f8244f2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_s32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (0, z0), + svsub_za32_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w0, z0), + svsub_za32_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w7, z0), + svsub_za32_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w8, z0), + svsub_za32_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w11, z0), + svsub_za32_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w12, z0), + svsub_za32_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w8 + 7, z0), + svsub_za32_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w8 + 8, z0), + svsub_za32_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svint32x4_t, + svsub_za32_s32_vg1x4 (w8 - 1, z0), + svsub_za32_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** sub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svint32x4_t, + svsub_za32_s32_vg1x4 (w8, z4), + svsub_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svint32x4_t, + svsub_za32_s32_vg1x4 (w8, z18), + svsub_za32_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svint32x4_t, + svsub_za32_s32_vg1x4 (w8, z23), + svsub_za32_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svint32x4_t, + svsub_za32_s32_vg1x4 (w8, z28), + svsub_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2defb790ea69b70158c46ca42aac24597e4920fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (0, z0), + svsub_za32_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w0, z0), + svsub_za32_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w7, z0), + svsub_za32_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.s\[w8, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8, z0), + svsub_za32_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.s\[w11, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w11, z0), + svsub_za32_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w12, z0), + svsub_za32_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.s\[w8, 7, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8 + 7, z0), + svsub_za32_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8 + 8, z0), + svsub_za32_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx2\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8 - 1, z0), + svsub_za32_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** sub za\.s\[w8, 0, vgx2\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8, z18), + svsub_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8, z23), + svsub_za32_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.s\[w8, 0, vgx2\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svuint32x2_t, + svsub_za32_u32_vg1x2 (w8, z28), + svsub_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..615f67e92a22a3a06a7301f76e2a046e6c766642 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za32_u32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (0, z0), + svsub_za32_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w0, z0), + svsub_za32_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w7, z0), + svsub_za32_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.s\[w8, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8, z0), + svsub_za32_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.s\[w11, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w11, z0), + svsub_za32_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w12, z0), + svsub_za32_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.s\[w8, 7, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8 + 7, z0), + svsub_za32_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8 + 8, z0), + svsub_za32_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.s\[\1, 0, vgx4\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8 - 1, z0), + svsub_za32_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** sub za\.s\[w8, 0, vgx4\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8, z4), + svsub_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8, z18), + svsub_za32_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.s\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8, z23), + svsub_za32_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.s\[w8, 0, vgx4\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svuint32x4_t, + svsub_za32_u32_vg1x4 (w8, z28), + svsub_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6f1364907707af1d7a66e498103b0190b9fd5bca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (0, z0), + svsub_za64_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w0, z0), + svsub_za64_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w7, z0), + svsub_za64_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8, z0), + svsub_za64_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w11, z0), + svsub_za64_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w12, z0), + svsub_za64_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8 + 7, z0), + svsub_za64_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8 + 8, z0), + svsub_za64_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8 - 1, z0), + svsub_za64_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** fsub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8, z18), + svsub_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8, z23), + svsub_za64_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat64x2_t, + svsub_za64_f64_vg1x2 (w8, z28), + svsub_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..032122ddec5c91b3747b67eb820e7de4ea8d419b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_f64_vg1x4.c @@ -0,0 +1,141 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#pragma GCC target "+sme-f64f64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (0, z0), + svsub_za64_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w0, z0), + svsub_za64_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w7, z0), + svsub_za64_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** fsub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8, z0), + svsub_za64_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** fsub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w11, z0), + svsub_za64_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w12, z0), + svsub_za64_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** fsub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8 + 7, z0), + svsub_za64_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8 + 8, z0), + svsub_za64_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** fsub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8 - 1, z0), + svsub_za64_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** fsub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8, z4), + svsub_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8, z18), + svsub_za64_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fsub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8, z23), + svsub_za64_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** fsub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svfloat64x4_t, + svsub_za64_f64_vg1x4 (w8, z28), + svsub_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..72442746a03e31b9b923301b0d54d94e95dca435 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (0, z0), + svsub_za64_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w0, z0), + svsub_za64_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w7, z0), + svsub_za64_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w8, z0), + svsub_za64_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w11, z0), + svsub_za64_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w12, z0), + svsub_za64_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w8 + 7, z0), + svsub_za64_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w8 + 8, z0), + svsub_za64_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svint64x2_t, + svsub_za64_s64_vg1x2 (w8 - 1, z0), + svsub_za64_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svint64x2_t, + svsub_za64_s64_vg1x2 (w8, z18), + svsub_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svint64x2_t, + svsub_za64_s64_vg1x2 (w8, z23), + svsub_za64_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svint64x2_t, + svsub_za64_s64_vg1x2 (w8, z28), + svsub_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..57971ee7126efcff38a3dc7c4bf0e6009fe8bf1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_s64_vg1x4.c @@ -0,0 +1,139 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (0, z0), + svsub_za64_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w0, z0), + svsub_za64_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w7, z0), + svsub_za64_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w8, z0), + svsub_za64_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w11, z0), + svsub_za64_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w12, z0), + svsub_za64_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w8 + 7, z0), + svsub_za64_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w8 + 8, z0), + svsub_za64_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svint64x4_t, + svsub_za64_s64_vg1x4 (w8 - 1, z0), + svsub_za64_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** sub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svint64x4_t, + svsub_za64_s64_vg1x4 (w8, z4), + svsub_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svint64x4_t, + svsub_za64_s64_vg1x4 (w8, z18), + svsub_za64_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svint64x4_t, + svsub_za64_s64_vg1x4 (w8, z23), + svsub_za64_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svint64x4_t, + svsub_za64_s64_vg1x4 (w8, z28), + svsub_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..968e252800ea6b464a2be132355be855f5066c28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x2.c @@ -0,0 +1,124 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (0, z0), + svsub_za64_vg1x2 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w0, z0), + svsub_za64_vg1x2 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w7, z0), + svsub_za64_vg1x2 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8, z0), + svsub_za64_vg1x2 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w11, z0), + svsub_za64_vg1x2 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w12, z0), + svsub_za64_vg1x2 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8 + 7, z0), + svsub_za64_vg1x2 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8 + 8, z0), + svsub_za64_vg1x2 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8 - 1, z0), + svsub_za64_vg1x2 (w8 - 1, z0)) + +/* +** sub_w8_z18: +** sub za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8, z18), + svsub_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8, z23), + svsub_za64_vg1x2 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svuint64x2_t, + svsub_za64_u64_vg1x2 (w8, z28), + svsub_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e9a60599ceeb67012dac656163a2aebe37cb55f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za64_u64_vg1x4.c @@ -0,0 +1,139 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** sub_0_z0: +** mov (w8|w9|w10|w11), #?0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_0_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (0, z0), + svsub_za64_vg1x4 (0, z0)) + +/* +** sub_w0_z0: +** mov (w8|w9|w10|w11), w0 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w0_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w0, z0), + svsub_za64_vg1x4 (w0, z0)) + +/* +** sub_w7_z0: +** mov (w8|w9|w10|w11), w7 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w7_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w7, z0), + svsub_za64_vg1x4 (w7, z0)) + +/* +** sub_w8_z0: +** sub za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8, z0), + svsub_za64_vg1x4 (w8, z0)) + +/* +** sub_w11_z0: +** sub za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w11_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w11, z0), + svsub_za64_vg1x4 (w11, z0)) + + +/* +** sub_w12_z0: +** mov (w8|w9|w10|w11), w12 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w12_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w12, z0), + svsub_za64_vg1x4 (w12, z0)) + +/* +** sub_w8p7_z0: +** sub za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p7_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8 + 7, z0), + svsub_za64_vg1x4 (w8 + 7, z0)) + +/* +** sub_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8p8_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8 + 8, z0), + svsub_za64_vg1x4 (w8 + 8, z0)) + +/* +** sub_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** sub za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (sub_w8m1_z0, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8 - 1, z0), + svsub_za64_vg1x4 (w8 - 1, z0)) + +/* +** sub_w8_z4: +** sub za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z4, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8, z4), + svsub_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** sub_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z18, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8, z18), + svsub_za64_vg1x4 (w8, z18)) + +/* +** sub_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sub za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (sub_w8_z23, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8, z23), + svsub_za64_vg1x4 (w8, z23)) + +/* +** sub_w8_z28: +** sub za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (sub_w8_z28, svuint64x4_t, + svsub_za64_u64_vg1x4 (w8, z28), + svsub_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..0bf00bfdc1d6dd8c76b8a893122d798fce37eec7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sudot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_0_z0_z4_0, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (0, z0, z4, 0), + svsudot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** sudot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w0_z0_z7_1, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w0, z0, z7, 1), + svsudot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** sudot_lane_w8_z28_z4_2: +** sudot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z28_z4_2, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8, z28, z4, 2), + svsudot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** sudot_lane_w8p7_z0_z4_3: +** sudot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8p7_z0_z4_3, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8 + 7, z0, z4, 3), + svsudot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** sudot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8p8_z0_z4_0, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8 + 8, z0, z4, 0), + svsudot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** sudot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sudot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w0m1_z0_z4_1, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w0 - 1, z0, z4, 1), + svsudot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** sudot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sudot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (sudot_lane_w8_z4_z15_2, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8, z4, z15, 2), + svsudot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** sudot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sudot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z28_z16_3, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8, z28, z16, 3), + svsudot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** sudot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** sudot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z17_z7_0, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8, z17, z7, 0), + svsudot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** sudot_lane_w8_z22_z4_1: +** sudot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z22_z4_1, svint8x2_t, svuint8_t, + svsudot_lane_za32_s8_vg1x2 (w8, z22, z4, 1), + svsudot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f6330406421fad6e59bb5807c8a00ca98095666c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_lane_za32_s8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** sudot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_0_z0_z4_0, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (0, z0, z4, 0), + svsudot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** sudot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w0_z0_z7_1, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w0, z0, z7, 1), + svsudot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** sudot_lane_w8_z28_z4_2: +** sudot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z28_z4_2, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8, z28, z4, 2), + svsudot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** sudot_lane_w8p7_z0_z4_3: +** sudot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8p7_z0_z4_3, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3), + svsudot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** sudot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8p8_z0_z4_0, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0), + svsudot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** sudot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** sudot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w0m1_z0_z4_1, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1), + svsudot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** sudot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** sudot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (sudot_lane_w8_z4_z15_2, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8, z4, z15, 2), + svsudot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** sudot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** sudot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z28_z16_3, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8, z28, z16, 3), + svsudot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** sudot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sudot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z17_z7_0, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8, z17, z7, 0), + svsudot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** sudot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** sudot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (sudot_lane_w8_z22_z4_1, svint8x4_t, svuint8_t, + svsudot_lane_za32_s8_vg1x4 (w8, z22, z4, 1), + svsudot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..0077bac1f3de6386eea0bb5b8fea5f44cecaa09f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z4: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z4, svint8x2_t, + svsudot_za32_s8_vg1x2 (0, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x2 (0, z0, svreinterpret_u8 (z4))) + +/* +** dot_w0_z0_z4: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z4, svint8x2_t, + svsudot_za32_s8_vg1x2 (w0, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x2 (w0, z0, svreinterpret_u8 (z4))) + +/* +** dot_w8_z0_z18: +** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z18)), + svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z18))) + +/* +** dot_w8_z4_z18: +** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z4, svreinterpret_u8 (z18)), + svsudot_za32_vg1x2 (w8, z4, svreinterpret_u8 (z18))) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** usdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z23)), + svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z23))) + +/* +** dot_w8_z23_z0: +** ... +** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z23, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8, z23, svreinterpret_u8 (z0))) + +/* +** dot_w8_z18_z28: +** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z18, svreinterpret_u8 (z28)), + svsudot_za32_vg1x2 (w8, z18, svreinterpret_u8 (z28))) + +/* +** dot_w8_z28_z4: +** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8, z28, svreinterpret_u8 (z4)), + svsudot_za32_vg1x2 (w8, z28, svreinterpret_u8 (z4))) + +/* +** dot_w8p1_z4_z0: +** usdot za\.s\[w8, 1, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8 + 1, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 1, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p2_z4_z0: +** usdot za\.s\[w8, 2, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8 + 2, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 2, z4, svreinterpret_u8 (z0))) + +/* +** dot_w11p4_z4_z0: +** usdot za\.s\[w11, 4, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w11 + 4, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w11 + 4, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p7_z4_z0: +** usdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8 + 7, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 7, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p8_z0_z4: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z0_z4, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8 + 8, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x2 (w8 + 8, z0, svreinterpret_u8 (z4))) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint8x2_t, + svsudot_za32_s8_vg1x2 (w8 - 1, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 - 1, z4, svreinterpret_u8 (z0))) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (0, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (0, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w0, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w0, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8_z1_z0: +** sudot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p1_z1_z0: +** sudot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8 + 1, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 1, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p2_z20_z0: +** sudot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8 + 2, z20, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 2, z20, svreinterpret_u8 (z0))) + +/* +** dot_single_w11p4_z27_z0: +** sudot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w11 + 4, z27, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w11 + 4, z27, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p7_z1_z0: +** sudot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8 + 7, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 7, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8 + 8, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w8 + 8, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sudot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w0 - 1, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x2 (w0 - 1, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sudot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8, z0, svreinterpret_u8 (z15)), + svsudot_za32_vg1x2 (w8, z0, svreinterpret_u8 (z15))) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sudot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x2_t, svint8_t, + svsudot_single_za32_s8_vg1x2 (w8, z20, svreinterpret_u8 (z16)), + svsudot_za32_vg1x2 (w8, z20, svreinterpret_u8 (z16))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..788ea01b8995c894ee2bdd00b53b78a017bc4b29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sudot_za32_s8_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z4: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z4, svint8x4_t, + svsudot_za32_s8_vg1x4 (0, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x4 (0, z0, svreinterpret_u8 (z4))) + +/* +** dot_w0_z0_z4: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z4, svint8x4_t, + svsudot_za32_s8_vg1x4 (w0, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x4 (w0, z0, svreinterpret_u8 (z4))) + +/* +** dot_w8_z4_z0: +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8, z4, svreinterpret_u8 (z0))) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z18)), + svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z18))) + +/* +** dot_w8_z18_z0: +** ... +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z18, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8, z18, svreinterpret_u8 (z0))) + +/* +** dot_w8_z0_z23: +** ... +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z23)), + svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z23))) + +/* +** dot_w8_z23_z0: +** ... +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z23, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8, z23, svreinterpret_u8 (z0))) + +/* +** dot_w8_z4_z28: +** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z4, svreinterpret_u8 (z28)), + svsudot_za32_vg1x4 (w8, z4, svreinterpret_u8 (z28))) + +/* +** dot_w8_z28_z0: +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8, z28, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8, z28, svreinterpret_u8 (z0))) + +/* +** dot_w8p1_z4_z0: +** usdot za\.s\[w8, 1, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8 + 1, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 1, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p2_z4_z0: +** usdot za\.s\[w8, 2, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8 + 2, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 2, z4, svreinterpret_u8 (z0))) + +/* +** dot_w11p4_z4_z0: +** usdot za\.s\[w11, 4, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w11 + 4, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w11 + 4, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p7_z4_z0: +** usdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8 + 7, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 7, z4, svreinterpret_u8 (z0))) + +/* +** dot_w8p8_z0_z4: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z0_z4, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8 + 8, z0, svreinterpret_u8 (z4)), + svsudot_za32_vg1x4 (w8 + 8, z0, svreinterpret_u8 (z4))) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svint8x4_t, + svsudot_za32_s8_vg1x4 (w8 - 1, z4, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 - 1, z4, svreinterpret_u8 (z0))) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (0, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (0, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w0, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w0, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8_z1_z0: +** sudot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p1_z1_z0: +** sudot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8 + 1, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 1, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p4_z20_z0: +** sudot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8 + 4, z20, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 4, z20, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p6_z27_z0: +** sudot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8 + 6, z27, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 6, z27, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p7_z1_z0: +** sudot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8 + 7, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 7, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8 + 8, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w8 + 8, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** sudot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w0 - 1, z1, svreinterpret_u8 (z0)), + svsudot_za32_vg1x4 (w0 - 1, z1, svreinterpret_u8 (z0))) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** sudot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8, z0, svreinterpret_u8 (z15)), + svsudot_za32_vg1x4 (w8, z0, svreinterpret_u8 (z15))) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** sudot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svint8x4_t, svint8_t, + svsudot_single_za32_s8_vg1x4 (w8, z20, svreinterpret_u8 (z16)), + svsudot_za32_vg1x4 (w8, z20, svreinterpret_u8 (z16))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5ab1c3f63c0f96adbee093243dd9488781f82a61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/suvdot_lane_za32_s8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** suvdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_0_z0_z4_0, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (0, z0, z4, 0), + svsuvdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** suvdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w0_z0_z7_1, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1), + svsuvdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** suvdot_lane_w8_z28_z4_2: +** suvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8_z28_z4_2, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2), + svsuvdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** suvdot_lane_w8p7_z0_z4_3: +** suvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8p7_z0_z4_3, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3), + svsuvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** suvdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8p8_z0_z4_0, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0), + svsuvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** suvdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** suvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w0m1_z0_z4_1, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1), + svsuvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** suvdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** suvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (suvdot_lane_w8_z4_z15_2, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2), + svsuvdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** suvdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** suvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8_z28_z16_3, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3), + svsuvdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** suvdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** suvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8_z17_z7_0, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0), + svsuvdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** suvdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** suvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (suvdot_lane_w8_z22_z4_1, svint8x4_t, svuint8_t, + svsuvdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1), + svsuvdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h new file mode 100644 index 0000000000000000000000000000000000000000..8b982caf4384eb19c94ca225bdeaca0e26181348 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h @@ -0,0 +1,124 @@ +#ifndef TEST_SME2_ACLE_H +#define TEST_SME2_ACLE_H 1 + +#include "../../sme/acle-asm/test_sme_acle.h" + +#define TEST_ZA_X1(NAME, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w7 __asm ("w7"); \ + register int w8 __asm ("w8"); \ + register int w9 __asm ("w9"); \ + register int w10 __asm ("w10"); \ + register int w11 __asm ("w11"); \ + register int w12 __asm ("w12"); \ + register ZTYPE z0 __asm ("z0"); \ + register ZTYPE z3 __asm ("z3"); \ + register ZTYPE z7 __asm ("z7"); \ + register ZTYPE z16 __asm ("z16"); \ + register ZTYPE z23 __asm ("z23"); \ + register ZTYPE z31 __asm ("z31"); \ + __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w9), \ + "=r" (w10), "=r" (w11), "=r" (w12), \ + "=w" (z0), "=w" (z3), "=w" (z7), \ + "=w" (z16), "=w" (z23), "=w" (z31)); \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_ZA_XN(NAME, TTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w7 __asm ("w7"); \ + register int w8 __asm ("w8"); \ + register int w11 __asm ("w11"); \ + register int w12 __asm ("w12"); \ + register int w15 __asm ("w15"); \ + register int w16 __asm ("w16"); \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z4 __asm ("z4"); \ + register TTYPE z18 __asm ("z18"); \ + register TTYPE z23 __asm ("z23"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w11), \ + "=r" (w12), "=r" (w15), "=r" (w16), \ + "=w" (z0), "=w" (z4), "=w" (z18), \ + "=w" (z23), "=w" (z28)); \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_READ_ZA_XN(NAME, TTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w7 __asm ("w7"); \ + register int w8 __asm ("w8"); \ + register int w11 __asm ("w11"); \ + register int w12 __asm ("w12"); \ + register int w15 __asm ("w15"); \ + register int w16 __asm ("w16"); \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z4 __asm ("z4"); \ + register TTYPE z18 __asm ("z18"); \ + register TTYPE z23 __asm ("z23"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=r" (w7), "=r" (w8), "=r" (w11), \ + "=r" (w12), "=r" (w15), "=r" (w16)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0), "w" (z4), "w" (z18), \ + "w" (z23), "w" (z28)); \ + } + +#define TEST_ZA_SINGLE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w8 __asm ("w8"); \ + register int w11 __asm ("w11"); \ + register ZTYPE z0 __asm ("z0"); \ + register TTYPE z1 __asm ("z1"); \ + register ZTYPE z16 __asm ("z16"); \ + register TTYPE z20 __asm ("z20"); \ + register TTYPE z27 __asm ("z27"); \ + __asm volatile ("" : "=r" (w8), "=r" (w11), "=w" (z0), \ + "=w" (z1), "=w" (z16), "=w" (z20), \ + "=w" (z27)); \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_ZA_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w8 __asm ("w8"); \ + register TTYPE z0 __asm ("z0"); \ + register ZTYPE z15 __asm ("z15"); \ + __asm volatile ("" : "=r" (w8), "=w" (z0), "=w" (z15)); \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_ZA_LANE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w8 __asm ("w8"); \ + register int w11 __asm ("w11"); \ + register TTYPE z0 __asm ("z0"); \ + register ZTYPE z4 __asm ("z4"); \ + register ZTYPE z7 __asm ("z7"); \ + register ZTYPE z16 __asm ("z16"); \ + register TTYPE z17 __asm ("z17"); \ + register TTYPE z22 __asm ("z22"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=r" (w8), "=r" (w11), "=w" (z0), \ + "=w" (z4), "=w" (z7), "=w" (z16), \ + "=w" (z17), "=w" (z22), "=w" (z28)); \ + INVOKE (CODE1, CODE2); \ + } + +#define TEST_ZA_LANE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (int w0)) \ + { \ + register int w8 __asm ("w8"); \ + register TTYPE z4 __asm ("z4"); \ + register ZTYPE z15 __asm ("z15"); \ + __asm volatile ("" : "=r" (w8), "=w" (z4), "=w" (z15)); \ + INVOKE (CODE1, CODE2); \ + } + +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c835cda8d5da276369220e04a140b93a3c7ccfde --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.s - z1\.s}, z4\.h +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint32x2_t, svint16_t, z0, + svunpk_s32_s16_x2 (z4), + svunpk_s32 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.s - z5\.s}, z0\.h +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint16_t, svint32x2_t, z4, + svunpk_s32_s16_x2 (z0), + svunpk_s32 (z0)) + +/* +** unpk_z18_z23: +** sunpk {z18\.s - z19\.s}, z23\.h +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svint32x2_t, svint16_t, z18, + svunpk_s32_s16_x2 (z23), + svunpk_s32 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, z28\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint16_t, svint32x2_t, z23, + svunpk_s32_s16_x2 (z28), + svunpk_s32 (z28)) + +/* +** unpk_z28_z4: +** sunpk {z28\.s - z29\.s}, z4\.h +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint32x2_t, svint16_t, z28, + svunpk_s32_s16_x2 (z4), + svunpk_s32 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..eb195c4274d383994b748901ad64787ea0945954 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s16_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.s - z3\.s}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint32x4_t, svint16x2_t, z0, + svunpk_s32_s16_x4 (z4), + svunpk_s32 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.s - z7\.s}, {z0\.h - z1\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint16x2_t, svint32x4_t, z4, + svunpk_s32_s16_x4 (z0), + svunpk_s32 (z0)) + +/* +** unpk_z4_z18: +** sunpk {z4\.s - z7\.s}, {z18\.h - z19\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svint16x2_t, svint32x4_t, z4, + svunpk_s32_s16_x4 (z18), + svunpk_s32 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sunpk {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svint32x4_t, svint16x2_t, z28, + svunpk_s32_s16_x4 (z23), + svunpk_s32 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint16x2_t, svint32x4_t, z23, + svunpk_s32_s16_x4 (z28), + svunpk_s32 (z28)) + +/* +** unpk_z23_z18: +** sunpk {z[^\n]+}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svint16x2_t, svint32x4_t, z23, + svunpk_s32_s16_x4 (z18), + svunpk_s32 (z18)) + +/* +** unpk_z28_z4: +** sunpk {z28\.s - z31\.s}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint32x4_t, svint16x2_t, z28, + svunpk_s32_s16_x4 (z4), + svunpk_s32 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9a5043be303b3dc53b8041e955fbbe9ccbdf13a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.d - z1\.d}, z4\.s +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint64x2_t, svint32_t, z0, + svunpk_s64_s32_x2 (z4), + svunpk_s64 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.d - z5\.d}, z0\.s +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint32_t, svint64x2_t, z4, + svunpk_s64_s32_x2 (z0), + svunpk_s64 (z0)) + +/* +** unpk_z18_z23: +** sunpk {z18\.d - z19\.d}, z23\.s +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svint64x2_t, svint32_t, z18, + svunpk_s64_s32_x2 (z23), + svunpk_s64 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, z28\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint32_t, svint64x2_t, z23, + svunpk_s64_s32_x2 (z28), + svunpk_s64 (z28)) + +/* +** unpk_z28_z4: +** sunpk {z28\.d - z29\.d}, z4\.s +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint64x2_t, svint32_t, z28, + svunpk_s64_s32_x2 (z4), + svunpk_s64 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6e15de1fbf0e2aaaa44c55e97703cc390bacc14c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s32_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.d - z3\.d}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint64x4_t, svint32x2_t, z0, + svunpk_s64_s32_x4 (z4), + svunpk_s64 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.d - z7\.d}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint32x2_t, svint64x4_t, z4, + svunpk_s64_s32_x4 (z0), + svunpk_s64 (z0)) + +/* +** unpk_z4_z18: +** sunpk {z4\.d - z7\.d}, {z18\.s - z19\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svint32x2_t, svint64x4_t, z4, + svunpk_s64_s32_x4 (z18), + svunpk_s64 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sunpk {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svint64x4_t, svint32x2_t, z28, + svunpk_s64_s32_x4 (z23), + svunpk_s64 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint32x2_t, svint64x4_t, z23, + svunpk_s64_s32_x4 (z28), + svunpk_s64 (z28)) + +/* +** unpk_z23_z18: +** sunpk {z[^\n]+}, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svint32x2_t, svint64x4_t, z23, + svunpk_s64_s32_x4 (z18), + svunpk_s64 (z18)) + +/* +** unpk_z28_z4: +** sunpk {z28\.d - z31\.d}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint64x4_t, svint32x2_t, z28, + svunpk_s64_s32_x4 (z4), + svunpk_s64 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dc87f4810ae1efad66029b9b9435dd3ef458d39e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.h - z1\.h}, z4\.b +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint16x2_t, svint8_t, z0, + svunpk_s16_s8_x2 (z4), + svunpk_s16 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.h - z5\.h}, z0\.b +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint8_t, svint16x2_t, z4, + svunpk_s16_s8_x2 (z0), + svunpk_s16 (z0)) + +/* +** unpk_z18_z23: +** sunpk {z18\.h - z19\.h}, z23\.b +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svint16x2_t, svint8_t, z18, + svunpk_s16_s8_x2 (z23), + svunpk_s16 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, z28\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint8_t, svint16x2_t, z23, + svunpk_s16_s8_x2 (z28), + svunpk_s16 (z28)) + +/* +** unpk_z28_z4: +** sunpk {z28\.h - z29\.h}, z4\.b +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint16x2_t, svint8_t, z28, + svunpk_s16_s8_x2 (z4), + svunpk_s16 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3bace220417a1377b90fb26917231ac77cd16836 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_s8_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** sunpk {z0\.h - z3\.h}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svint16x4_t, svint8x2_t, z0, + svunpk_s16_s8_x4 (z4), + svunpk_s16 (z4)) + +/* +** unpk_z4_z0: +** sunpk {z4\.h - z7\.h}, {z0\.b - z1\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svint8x2_t, svint16x4_t, z4, + svunpk_s16_s8_x4 (z0), + svunpk_s16 (z0)) + +/* +** unpk_z4_z18: +** sunpk {z4\.h - z7\.h}, {z18\.b - z19\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svint8x2_t, svint16x4_t, z4, + svunpk_s16_s8_x4 (z18), + svunpk_s16 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** sunpk {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svint16x4_t, svint8x2_t, z28, + svunpk_s16_s8_x4 (z23), + svunpk_s16 (z23)) + +/* +** unpk_z23_z28: +** sunpk [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svint8x2_t, svint16x4_t, z23, + svunpk_s16_s8_x4 (z28), + svunpk_s16 (z28)) + +/* +** unpk_z23_z18: +** sunpk {z[^\n]+}, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svint8x2_t, svint16x4_t, z23, + svunpk_s16_s8_x4 (z18), + svunpk_s16 (z18)) + +/* +** unpk_z28_z4: +** sunpk {z28\.h - z31\.h}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svint16x4_t, svint8x2_t, z28, + svunpk_s16_s8_x4 (z4), + svunpk_s16 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ff37b17b3d70da7ffb1b843b98f81d3de73f82ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.s - z1\.s}, z4\.h +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint32x2_t, svuint16_t, z0, + svunpk_u32_u16_x2 (z4), + svunpk_u32 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.s - z5\.s}, z0\.h +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint16_t, svuint32x2_t, z4, + svunpk_u32_u16_x2 (z0), + svunpk_u32 (z0)) + +/* +** unpk_z18_z23: +** uunpk {z18\.s - z19\.s}, z23\.h +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svuint32x2_t, svuint16_t, z18, + svunpk_u32_u16_x2 (z23), + svunpk_u32 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, z28\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint16_t, svuint32x2_t, z23, + svunpk_u32_u16_x2 (z28), + svunpk_u32 (z28)) + +/* +** unpk_z28_z4: +** uunpk {z28\.s - z29\.s}, z4\.h +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint32x2_t, svuint16_t, z28, + svunpk_u32_u16_x2 (z4), + svunpk_u32 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ced14af86217d95b608267ac53c6bfc5e69b5225 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u16_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.s - z3\.s}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint32x4_t, svuint16x2_t, z0, + svunpk_u32_u16_x4 (z4), + svunpk_u32 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.s - z7\.s}, {z0\.h - z1\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint16x2_t, svuint32x4_t, z4, + svunpk_u32_u16_x4 (z0), + svunpk_u32 (z0)) + +/* +** unpk_z4_z18: +** uunpk {z4\.s - z7\.s}, {z18\.h - z19\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svuint16x2_t, svuint32x4_t, z4, + svunpk_u32_u16_x4 (z18), + svunpk_u32 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** uunpk {z28\.s - z31\.s}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svuint32x4_t, svuint16x2_t, z28, + svunpk_u32_u16_x4 (z23), + svunpk_u32 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, {z28\.h - z29\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint16x2_t, svuint32x4_t, z23, + svunpk_u32_u16_x4 (z28), + svunpk_u32 (z28)) + +/* +** unpk_z23_z18: +** uunpk {z[^\n]+}, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svuint16x2_t, svuint32x4_t, z23, + svunpk_u32_u16_x4 (z18), + svunpk_u32 (z18)) + +/* +** unpk_z28_z4: +** uunpk {z28\.s - z31\.s}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint32x4_t, svuint16x2_t, z28, + svunpk_u32_u16_x4 (z4), + svunpk_u32 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..48ad1d93e0ce95a3e9381544e95ed3ab65a117e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.d - z1\.d}, z4\.s +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint64x2_t, svuint32_t, z0, + svunpk_u64_u32_x2 (z4), + svunpk_u64 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.d - z5\.d}, z0\.s +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint32_t, svuint64x2_t, z4, + svunpk_u64_u32_x2 (z0), + svunpk_u64 (z0)) + +/* +** unpk_z18_z23: +** uunpk {z18\.d - z19\.d}, z23\.s +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svuint64x2_t, svuint32_t, z18, + svunpk_u64_u32_x2 (z23), + svunpk_u64 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, z28\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint32_t, svuint64x2_t, z23, + svunpk_u64_u32_x2 (z28), + svunpk_u64 (z28)) + +/* +** unpk_z28_z4: +** uunpk {z28\.d - z29\.d}, z4\.s +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint64x2_t, svuint32_t, z28, + svunpk_u64_u32_x2 (z4), + svunpk_u64 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..1f68172abaa6f689c1983cfdb567be8a94ad2b4e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u32_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.d - z3\.d}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint64x4_t, svuint32x2_t, z0, + svunpk_u64_u32_x4 (z4), + svunpk_u64 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.d - z7\.d}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint32x2_t, svuint64x4_t, z4, + svunpk_u64_u32_x4 (z0), + svunpk_u64 (z0)) + +/* +** unpk_z4_z18: +** uunpk {z4\.d - z7\.d}, {z18\.s - z19\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svuint32x2_t, svuint64x4_t, z4, + svunpk_u64_u32_x4 (z18), + svunpk_u64 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** uunpk {z28\.d - z31\.d}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svuint64x4_t, svuint32x2_t, z28, + svunpk_u64_u32_x4 (z23), + svunpk_u64 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, {z28\.s - z29\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint32x2_t, svuint64x4_t, z23, + svunpk_u64_u32_x4 (z28), + svunpk_u64 (z28)) + +/* +** unpk_z23_z18: +** uunpk {z[^\n]+}, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svuint32x2_t, svuint64x4_t, z23, + svunpk_u64_u32_x4 (z18), + svunpk_u64 (z18)) + +/* +** unpk_z28_z4: +** uunpk {z28\.d - z31\.d}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint64x4_t, svuint32x2_t, z28, + svunpk_u64_u32_x4 (z4), + svunpk_u64 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2e6dc4746b1a80affc873c1655fc2311f361b9e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x2.c @@ -0,0 +1,50 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.h - z1\.h}, z4\.b +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint16x2_t, svuint8_t, z0, + svunpk_u16_u8_x2 (z4), + svunpk_u16 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.h - z5\.h}, z0\.b +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint8_t, svuint16x2_t, z4, + svunpk_u16_u8_x2 (z0), + svunpk_u16 (z0)) + +/* +** unpk_z18_z23: +** uunpk {z18\.h - z19\.h}, z23\.b +** ret +*/ +TEST_DUAL_XN (unpk_z18_z23, svuint16x2_t, svuint8_t, z18, + svunpk_u16_u8_x2 (z23), + svunpk_u16 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, z28\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint8_t, svuint16x2_t, z23, + svunpk_u16_u8_x2 (z28), + svunpk_u16 (z28)) + +/* +** unpk_z28_z4: +** uunpk {z28\.h - z29\.h}, z4\.b +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint16x2_t, svuint8_t, z28, + svunpk_u16_u8_x2 (z4), + svunpk_u16 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6aa9c3d262131c2432608db4660b3e2f53ca18be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/unpk_u8_x4.c @@ -0,0 +1,76 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** unpk_z0_z4: +** uunpk {z0\.h - z3\.h}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z0_z4, svuint16x4_t, svuint8x2_t, z0, + svunpk_u16_u8_x4 (z4), + svunpk_u16 (z4)) + +/* +** unpk_z4_z0: +** uunpk {z4\.h - z7\.h}, {z0\.b - z1\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z0, svuint8x2_t, svuint16x4_t, z4, + svunpk_u16_u8_x4 (z0), + svunpk_u16 (z0)) + +/* +** unpk_z4_z18: +** uunpk {z4\.h - z7\.h}, {z18\.b - z19\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z4_z18, svuint8x2_t, svuint16x4_t, z4, + svunpk_u16_u8_x4 (z18), + svunpk_u16 (z18)) + +/* +** unpk_z28_z23: +** mov [^\n]+ +** mov [^\n]+ +** uunpk {z28\.h - z31\.h}, [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z28_z23, svuint16x4_t, svuint8x2_t, z28, + svunpk_u16_u8_x4 (z23), + svunpk_u16 (z23)) + +/* +** unpk_z23_z28: +** uunpk [^\n]+, {z28\.b - z29\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z28, svuint8x2_t, svuint16x4_t, z23, + svunpk_u16_u8_x4 (z28), + svunpk_u16 (z28)) + +/* +** unpk_z23_z18: +** uunpk {z[^\n]+}, {z18\.b - z19\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (unpk_z23_z18, svuint8x2_t, svuint16x4_t, z23, + svunpk_u16_u8_x4 (z18), + svunpk_u16 (z18)) + +/* +** unpk_z28_z4: +** uunpk {z28\.h - z31\.h}, {z4\.b - z5\.b} +** ret +*/ +TEST_DUAL_XN (unpk_z28_z4, svuint16x4_t, svuint8x2_t, z28, + svunpk_u16_u8_x4 (z4), + svunpk_u16 (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..38bedd2d1325103e25ff20afa7f83f681ad3e43e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** usdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_0_z0_z4_0, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (0, z0, z4, 0), + svusdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** usdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w0_z0_z7_1, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w0, z0, z7, 1), + svusdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** usdot_lane_w8_z28_z4_2: +** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z28_z4_2, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8, z28, z4, 2), + svusdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** usdot_lane_w8p7_z0_z4_3: +** usdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8p7_z0_z4_3, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8 + 7, z0, z4, 3), + svusdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** usdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8p8_z0_z4_0, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8 + 8, z0, z4, 0), + svusdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** usdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w0m1_z0_z4_1, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w0 - 1, z0, z4, 1), + svusdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** usdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (usdot_lane_w8_z4_z15_2, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8, z4, z15, 2), + svusdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** usdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z28_z16_3, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8, z28, z16, 3), + svusdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** usdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** usdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z17_z7_0, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8, z17, z7, 0), + svusdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** usdot_lane_w8_z22_z4_1: +** usdot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z22_z4_1, svuint8x2_t, svint8_t, + svusdot_lane_za32_u8_vg1x2 (w8, z22, z4, 1), + svusdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b087bccf5e55177d44590ecbb7382ab9662fa7c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_lane_za32_u8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** usdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_0_z0_z4_0, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (0, z0, z4, 0), + svusdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** usdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w0_z0_z7_1, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1), + svusdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** usdot_lane_w8_z28_z4_2: +** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z28_z4_2, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2), + svusdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** usdot_lane_w8p7_z0_z4_3: +** usdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8p7_z0_z4_3, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3), + svusdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** usdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8p8_z0_z4_0, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0), + svusdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** usdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w0m1_z0_z4_1, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1), + svusdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** usdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (usdot_lane_w8_z4_z15_2, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2), + svusdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** usdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z28_z16_3, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3), + svusdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** usdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z17_z7_0, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0), + svusdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** usdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usdot_lane_w8_z22_z4_1, svuint8x4_t, svint8_t, + svusdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1), + svusdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..14189ed6f19b8fd7c2e35e97b3009aa677aa9821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x2.c @@ -0,0 +1,243 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z4: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z4, svuint8x2_t, + svusdot_za32_u8_vg1x2 (0, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x2 (0, z0, svreinterpret_s8 (z4))) + +/* +** dot_w0_z0_z4: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z4, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w0, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x2 (w0, z0, svreinterpret_s8 (z4))) + +/* +** dot_w8_z0_z18: +** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z18)), + svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z18))) + +/* +** dot_w8_z4_z18: +** usdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z4, svreinterpret_s8 (z18)), + svusdot_za32_vg1x2 (w8, z4, svreinterpret_s8 (z18))) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** ... +** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z23)), + svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z23))) + +/* +** dot_w8_z23_z0: +** ... +** usdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z23, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8, z23, svreinterpret_s8 (z0))) + +/* +** dot_w8_z18_z28: +** usdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z18, svreinterpret_s8 (z28)), + svusdot_za32_vg1x2 (w8, z18, svreinterpret_s8 (z28))) + +/* +** dot_w8_z28_z4: +** usdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8, z28, svreinterpret_s8 (z4)), + svusdot_za32_vg1x2 (w8, z28, svreinterpret_s8 (z4))) + +/* +** dot_w8p1_z4_z0: +** usdot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8 + 1, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 1, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p2_z4_z0: +** usdot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8 + 2, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 2, z4, svreinterpret_s8 (z0))) + +/* +** dot_w11p4_z4_z0: +** usdot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w11 + 4, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w11 + 4, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p7_z4_z0: +** usdot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8 + 7, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 7, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p8_z0_z4: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z0_z4, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8 + 8, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x2 (w8 + 8, z0, svreinterpret_s8 (z4))) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** usdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x2_t, + svusdot_za32_u8_vg1x2 (w8 - 1, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 - 1, z4, svreinterpret_s8 (z0))) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (0, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (0, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w0, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w0, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8_z1_z0: +** usdot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p1_z1_z0: +** usdot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8 + 1, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 1, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p2_z20_z0: +** usdot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8 + 2, z20, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 2, z20, svreinterpret_s8 (z0))) + +/* +** dot_single_w11p4_z27_z0: +** usdot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w11 + 4, z27, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w11 + 4, z27, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p7_z1_z0: +** usdot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8 + 7, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 7, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8 + 8, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w8 + 8, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** usdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w0 - 1, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x2 (w0 - 1, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** usdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8, z0, svreinterpret_s8 (z15)), + svusdot_za32_vg1x2 (w8, z0, svreinterpret_s8 (z15))) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** usdot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x2_t, svuint8_t, + svusdot_single_za32_u8_vg1x2 (w8, z20, svreinterpret_s8 (z16)), + svusdot_za32_vg1x2 (w8, z20, svreinterpret_s8 (z16))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6e9ae0334e93c5c7886413c0760dbab4dfc281b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usdot_za32_u8_vg1x4.c @@ -0,0 +1,254 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** dot_0_z0_z4: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z4, svuint8x4_t, + svusdot_za32_u8_vg1x4 (0, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x4 (0, z0, svreinterpret_s8 (z4))) + +/* +** dot_w0_z0_z4: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z4, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w0, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x4 (w0, z0, svreinterpret_s8 (z4))) + +/* +** dot_w8_z4_z0: +** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8, z4, svreinterpret_s8 (z0))) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z18: +** ... +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z18, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z18)), + svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z18))) + +/* +** dot_w8_z18_z0: +** ... +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z18, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8, z18, svreinterpret_s8 (z0))) + +/* +** dot_w8_z0_z23: +** ... +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z23)), + svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z23))) + +/* +** dot_w8_z23_z0: +** ... +** usdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z23, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8, z23, svreinterpret_s8 (z0))) + +/* +** dot_w8_z4_z28: +** usdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z28, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z4, svreinterpret_s8 (z28)), + svusdot_za32_vg1x4 (w8, z4, svreinterpret_s8 (z28))) + +/* +** dot_w8_z28_z0: +** usdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8, z28, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8, z28, svreinterpret_s8 (z0))) + +/* +** dot_w8p1_z4_z0: +** usdot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8 + 1, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 1, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p2_z4_z0: +** usdot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8 + 2, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 2, z4, svreinterpret_s8 (z0))) + +/* +** dot_w11p4_z4_z0: +** usdot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w11 + 4, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w11 + 4, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p7_z4_z0: +** usdot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8 + 7, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 7, z4, svreinterpret_s8 (z0))) + +/* +** dot_w8p8_z0_z4: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z0_z4, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8 + 8, z0, svreinterpret_s8 (z4)), + svusdot_za32_vg1x4 (w8 + 8, z0, svreinterpret_s8 (z4))) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** usdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svuint8x4_t, + svusdot_za32_u8_vg1x4 (w8 - 1, z4, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 - 1, z4, svreinterpret_s8 (z0))) + +/* +** dot_single_0_z1_z0: +** mov (w8|w9|w10|w11), #?0 +** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (0, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (0, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w0_z1_z0: +** mov (w8|w9|w10|w11), w0 +** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w0, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w0, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8_z1_z0: +** usdot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p1_z1_z0: +** usdot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8 + 1, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 1, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p4_z20_z0: +** usdot za\.s\[w8, 4, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p4_z20_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8 + 4, z20, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 4, z20, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p6_z27_z0: +** usdot za\.s\[w8, 6, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p6_z27_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8 + 6, z27, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 6, z27, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p7_z1_z0: +** usdot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8 + 7, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 7, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8 + 8, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w8 + 8, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** usdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w0 - 1, z1, svreinterpret_s8 (z0)), + svusdot_za32_vg1x4 (w0 - 1, z1, svreinterpret_s8 (z0))) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** usdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8, z0, svreinterpret_s8 (z15)), + svusdot_za32_vg1x4 (w8, z0, svreinterpret_s8 (z15))) + +/* +** dot_single_w8_z20_z16: +** mov (z[0-7]).d, z16.d +** usdot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svuint8x4_t, svuint8_t, + svusdot_single_za32_u8_vg1x4 (w8, z20, svreinterpret_s8 (z16)), + svusdot_za32_vg1x4 (w8, z20, svreinterpret_s8 (z16))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..62938dc7927b17ee3503e46ca2964a727b607726 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/usvdot_lane_za32_u8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** usvdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_0_z0_z4_0, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (0, z0, z4, 0), + svusvdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** usvdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w0_z0_z7_1, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1), + svusvdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** usvdot_lane_w8_z28_z4_2: +** usvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8_z28_z4_2, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2), + svusvdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** usvdot_lane_w8p7_z0_z4_3: +** usvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8p7_z0_z4_3, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3), + svusvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** usvdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8p8_z0_z4_0, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0), + svusvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** usvdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** usvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w0m1_z0_z4_1, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1), + svusvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** usvdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** usvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (usvdot_lane_w8_z4_z15_2, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2), + svusvdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** usvdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** usvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8_z28_z16_3, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3), + svusvdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** usvdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** usvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8_z17_z7_0, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0), + svusvdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** usvdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** usvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (usvdot_lane_w8_z22_z4_1, svuint8x4_t, svint8_t, + svusvdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1), + svusvdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..17b952eeae9d8cdc0651d2026b771b2a17270dfc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z0_z0, svbfloat16x2_t, z0, + svuzp_bf16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (uzp_z0_z4, svbfloat16x2_t, z0, + svuzp_bf16_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z4_z18, svbfloat16x2_t, z4, + svuzp_bf16_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (uzp_z18_z23, svbfloat16x2_t, z18, + svuzp_bf16_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svbfloat16x2_t, z23, + svuzp_bf16_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z28_z0, svbfloat16x2_t, z28, + svuzp_bf16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svbfloat16x2_t, z28, + svuzp_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svbfloat16x2_t, z28, + svuzp_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bbdb1dfaf61cb6efc7fbe790369c63b3e133bee5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z0_z0, svbfloat16x4_t, z0, + svuzp_bf16_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (uzp_z0_z4, svbfloat16x4_t, z0, + svuzp_bf16_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svbfloat16x4_t, z4, + svuzp_bf16_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svbfloat16x4_t, z18, + svuzp_bf16_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svbfloat16x4_t, z23, + svuzp_bf16_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z28_z0, svbfloat16x4_t, z28, + svuzp_bf16_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..a4361ed1a9e188970b1053811986352e25f6d3f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat16x2_t, z0, + svuzp_f16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat16x2_t, z0, + svuzp_f16_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat16x2_t, z4, + svuzp_f16_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat16x2_t, z18, + svuzp_f16_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat16x2_t, z23, + svuzp_f16_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat16x2_t, z28, + svuzp_f16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svfloat16x2_t, z28, + svuzp_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svfloat16x2_t, z28, + svuzp_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a7abeda4179d4854ed2d0fdb08b3439052be7e83 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat16x4_t, z0, + svuzp_f16_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat16x4_t, z0, + svuzp_f16_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat16x4_t, z4, + svuzp_f16_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat16x4_t, z18, + svuzp_f16_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat16x4_t, z23, + svuzp_f16_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat16x4_t, z28, + svuzp_f16_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..dbc91650943b54cb046c36af049e76adc5c40aa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat32x2_t, z0, + svuzp_f32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat32x2_t, z0, + svuzp_f32_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat32x2_t, z4, + svuzp_f32_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat32x2_t, z18, + svuzp_f32_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat32x2_t, z23, + svuzp_f32_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat32x2_t, z28, + svuzp_f32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svfloat32x2_t, z28, + svuzp_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svfloat32x2_t, z28, + svuzp_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3c42d3ca9f8acb228e5258cb7f1f8337f1871fe7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat32x4_t, z0, + svuzp_f32_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat32x4_t, z0, + svuzp_f32_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat32x4_t, z4, + svuzp_f32_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat32x4_t, z18, + svuzp_f32_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat32x4_t, z23, + svuzp_f32_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat32x4_t, z28, + svuzp_f32_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c893d31f760d252beb7f51d52ee9eec8dd2d70a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat64x2_t, z0, + svuzp_f64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat64x2_t, z0, + svuzp_f64_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat64x2_t, z4, + svuzp_f64_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat64x2_t, z18, + svuzp_f64_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat64x2_t, z23, + svuzp_f64_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat64x2_t, z28, + svuzp_f64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svfloat64x2_t, z28, + svuzp_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svfloat64x2_t, z28, + svuzp_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..076a9a894276adaa537133055fb9aae81fd5fce5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z0_z0, svfloat64x4_t, z0, + svuzp_f64_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (uzp_z0_z4, svfloat64x4_t, z0, + svuzp_f64_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svfloat64x4_t, z4, + svuzp_f64_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svfloat64x4_t, z18, + svuzp_f64_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svfloat64x4_t, z23, + svuzp_f64_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z28_z0, svfloat64x4_t, z28, + svuzp_f64_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..54607d45b84706159b75a7c5715b40230f755dd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z0_z0, svint16x2_t, z0, + svuzp_s16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (uzp_z0_z4, svint16x2_t, z0, + svuzp_s16_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z4_z18, svint16x2_t, z4, + svuzp_s16_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (uzp_z18_z23, svint16x2_t, z18, + svuzp_s16_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint16x2_t, z23, + svuzp_s16_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z28_z0, svint16x2_t, z28, + svuzp_s16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svint16x2_t, z28, + svuzp_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svint16x2_t, z28, + svuzp_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..8c4af1edaf511416e839301deaf392238bc8d642 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z0_z0, svint16x4_t, z0, + svuzp_s16_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (uzp_z0_z4, svint16x4_t, z0, + svuzp_s16_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svint16x4_t, z4, + svuzp_s16_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svint16x4_t, z18, + svuzp_s16_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint16x4_t, z23, + svuzp_s16_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z28_z0, svint16x4_t, z28, + svuzp_s16_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9b1a81f1fdec831ae5a117057f97301ffde9a46b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z0_z0, svint32x2_t, z0, + svuzp_s32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (uzp_z0_z4, svint32x2_t, z0, + svuzp_s32_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z4_z18, svint32x2_t, z4, + svuzp_s32_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (uzp_z18_z23, svint32x2_t, z18, + svuzp_s32_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint32x2_t, z23, + svuzp_s32_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z28_z0, svint32x2_t, z28, + svuzp_s32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svint32x2_t, z28, + svuzp_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svint32x2_t, z28, + svuzp_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0f1ec0e7718644ec9b2f4bfc0fc0ebf765b30c7f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z0_z0, svint32x4_t, z0, + svuzp_s32_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (uzp_z0_z4, svint32x4_t, z0, + svuzp_s32_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svint32x4_t, z4, + svuzp_s32_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svint32x4_t, z18, + svuzp_s32_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint32x4_t, z23, + svuzp_s32_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z28_z0, svint32x4_t, z28, + svuzp_s32_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..91527ce31079a081de23def9543907e1cfad7594 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z0_z0, svint64x2_t, z0, + svuzp_s64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (uzp_z0_z4, svint64x2_t, z0, + svuzp_s64_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z4_z18, svint64x2_t, z4, + svuzp_s64_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (uzp_z18_z23, svint64x2_t, z18, + svuzp_s64_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint64x2_t, z23, + svuzp_s64_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z28_z0, svint64x2_t, z28, + svuzp_s64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svint64x2_t, z28, + svuzp_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svint64x2_t, z28, + svuzp_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..b28ed72d4d80b64fb046a16165bba8402842f7c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z0_z0, svint64x4_t, z0, + svuzp_s64_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (uzp_z0_z4, svint64x4_t, z0, + svuzp_s64_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svint64x4_t, z4, + svuzp_s64_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svint64x4_t, z18, + svuzp_s64_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint64x4_t, z23, + svuzp_s64_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z28_z0, svint64x4_t, z28, + svuzp_s64_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..406d227272e738cf153b651cae3158305c99c1a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z0_z0, svint8x2_t, z0, + svuzp_s8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (uzp_z0_z4, svint8x2_t, z0, + svuzp_s8_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z4_z18, svint8x2_t, z4, + svuzp_s8_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (uzp_z18_z23, svint8x2_t, z18, + svuzp_s8_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint8x2_t, z23, + svuzp_s8_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z28_z0, svint8x2_t, z28, + svuzp_s8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svint8x2_t, z28, + svuzp_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svint8x2_t, z28, + svuzp_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d29bbd5c4d7a144eb03d4c3435f59f60c155fce6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z0_z0, svint8x4_t, z0, + svuzp_s8_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (uzp_z0_z4, svint8x4_t, z0, + svuzp_s8_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svint8x4_t, z4, + svuzp_s8_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svint8x4_t, z18, + svuzp_s8_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svint8x4_t, z23, + svuzp_s8_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z28_z0, svint8x4_t, z28, + svuzp_s8_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9990a5982e15b87bd643eeb3e3267f92ea5bb6fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z0_z0, svuint16x2_t, z0, + svuzp_u16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (uzp_z0_z4, svuint16x2_t, z0, + svuzp_u16_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z4_z18, svuint16x2_t, z4, + svuzp_u16_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (uzp_z18_z23, svuint16x2_t, z18, + svuzp_u16_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint16x2_t, z23, + svuzp_u16_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (uzp_z28_z0, svuint16x2_t, z28, + svuzp_u16_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svuint16x2_t, z28, + svuzp_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svuint16x2_t, z28, + svuzp_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5e72ba831dd52d328830ff0e421105ba9e9a4425 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z0_z0, svuint16x4_t, z0, + svuzp_u16_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (uzp_z0_z4, svuint16x4_t, z0, + svuzp_u16_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svuint16x4_t, z4, + svuzp_u16_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svuint16x4_t, z18, + svuzp_u16_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint16x4_t, z23, + svuzp_u16_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (uzp_z28_z0, svuint16x4_t, z28, + svuzp_u16_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4bdcaa34f5d79a0fe71d184f345faff5e04738ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z0_z0, svuint32x2_t, z0, + svuzp_u32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (uzp_z0_z4, svuint32x2_t, z0, + svuzp_u32_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z4_z18, svuint32x2_t, z4, + svuzp_u32_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (uzp_z18_z23, svuint32x2_t, z18, + svuzp_u32_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint32x2_t, z23, + svuzp_u32_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (uzp_z28_z0, svuint32x2_t, z28, + svuzp_u32_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svuint32x2_t, z28, + svuzp_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svuint32x2_t, z28, + svuzp_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ede11a98bc01c234279a39e10630454c5fb091f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z0_z0, svuint32x4_t, z0, + svuzp_u32_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (uzp_z0_z4, svuint32x4_t, z0, + svuzp_u32_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svuint32x4_t, z4, + svuzp_u32_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svuint32x4_t, z18, + svuzp_u32_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint32x4_t, z23, + svuzp_u32_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (uzp_z28_z0, svuint32x4_t, z28, + svuzp_u32_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..95b69d1f711276a2fcdcd629390f7cd21d9f3dd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z0_z0, svuint64x2_t, z0, + svuzp_u64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (uzp_z0_z4, svuint64x2_t, z0, + svuzp_u64_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z4_z18, svuint64x2_t, z4, + svuzp_u64_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (uzp_z18_z23, svuint64x2_t, z18, + svuzp_u64_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint64x2_t, z23, + svuzp_u64_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (uzp_z28_z0, svuint64x2_t, z28, + svuzp_u64_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svuint64x2_t, z28, + svuzp_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svuint64x2_t, z28, + svuzp_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a5a72347c18943ebfc5960f59bb8afb98de71050 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z0_z0, svuint64x4_t, z0, + svuzp_u64_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (uzp_z0_z4, svuint64x4_t, z0, + svuzp_u64_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svuint64x4_t, z4, + svuzp_u64_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svuint64x4_t, z18, + svuzp_u64_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint64x4_t, z23, + svuzp_u64_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (uzp_z28_z0, svuint64x4_t, z28, + svuzp_u64_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..52002920d2360dbac65bb6c36dba862fa36b95db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z0_z0, svuint8x2_t, z0, + svuzp_u8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (uzp_z0_z4, svuint8x2_t, z0, + svuzp_u8_x2 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** uzp {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z4_z18, svuint8x2_t, z4, + svuzp_u8_x2 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** uzp {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (uzp_z18_z23, svuint8x2_t, z18, + svuzp_u8_x2 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint8x2_t, z23, + svuzp_u8_x2 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (uzp_z28_z0, svuint8x2_t, z28, + svuzp_u8_x2 (z0), + svuzp (z0)) + +/* +** uzp_z28_z0_z23: +** uzp {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (uzp_z28_z0_z23, svuint8x2_t, z28, + svuzp_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzp_z28_z5_z19: +** uzp {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (uzp_z28_z5_z19, svuint8x2_t, z28, + svuzp_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..59240fad07658ce92325feb74f9494d802edbed0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzp_z0_z0: +** uzp {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z0_z0, svuint8x4_t, z0, + svuzp_u8_x4 (z0), + svuzp (z0)) + +/* +** uzp_z0_z4: +** uzp {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (uzp_z0_z4, svuint8x4_t, z0, + svuzp_u8_x4 (z4), + svuzp (z4)) + +/* +** uzp_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (uzp_z4_z18, svuint8x4_t, z4, + svuzp_u8_x4 (z18), + svuzp (z18)) + +/* +** uzp_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z18_z23, svuint8x4_t, z18, + svuzp_u8_x4 (z23), + svuzp (z23)) + +/* +** uzp_z23_z28: +** uzp [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzp_z23_z28, svuint8x4_t, z23, + svuzp_u8_x4 (z28), + svuzp (z28)) + +/* +** uzp_z28_z0: +** uzp {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (uzp_z28_z0, svuint8x4_t, z28, + svuzp_u8_x4 (z0), + svuzp (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6d9ae624deee11b719526e9f2326dd4a8126822f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svbfloat16x2_t, z0, + svuzpq_bf16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svbfloat16x2_t, z0, + svuzpq_bf16_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svbfloat16x2_t, z4, + svuzpq_bf16_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svbfloat16x2_t, z18, + svuzpq_bf16_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svbfloat16x2_t, z23, + svuzpq_bf16_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svbfloat16x2_t, z28, + svuzpq_bf16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svbfloat16x2_t, z28, + svuzpq_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svbfloat16x2_t, z28, + svuzpq_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5a3c32f4363ca91ac78eb239a842d15a2fc5cf99 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svbfloat16x4_t, z0, + svuzpq_bf16_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svbfloat16x4_t, z0, + svuzpq_bf16_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svbfloat16x4_t, z4, + svuzpq_bf16_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svbfloat16x4_t, z18, + svuzpq_bf16_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svbfloat16x4_t, z23, + svuzpq_bf16_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svbfloat16x4_t, z28, + svuzpq_bf16_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e8add862b47ba6a75bdf6beaeeb4b890a4d90318 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat16x2_t, z0, + svuzpq_f16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat16x2_t, z0, + svuzpq_f16_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat16x2_t, z4, + svuzpq_f16_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat16x2_t, z18, + svuzpq_f16_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat16x2_t, z23, + svuzpq_f16_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat16x2_t, z28, + svuzpq_f16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svfloat16x2_t, z28, + svuzpq_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svfloat16x2_t, z28, + svuzpq_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d01f1e048c0efdb7dea7f43d63dc0686ac84d5ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat16x4_t, z0, + svuzpq_f16_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat16x4_t, z0, + svuzpq_f16_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat16x4_t, z4, + svuzpq_f16_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat16x4_t, z18, + svuzpq_f16_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat16x4_t, z23, + svuzpq_f16_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat16x4_t, z28, + svuzpq_f16_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d4487f45082bc2ba701df51eaaeeb2f27a414924 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat32x2_t, z0, + svuzpq_f32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat32x2_t, z0, + svuzpq_f32_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat32x2_t, z4, + svuzpq_f32_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat32x2_t, z18, + svuzpq_f32_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat32x2_t, z23, + svuzpq_f32_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat32x2_t, z28, + svuzpq_f32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svfloat32x2_t, z28, + svuzpq_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svfloat32x2_t, z28, + svuzpq_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..998294ef2ef651d945ee85f19ecd7c0af966d13f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat32x4_t, z0, + svuzpq_f32_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat32x4_t, z0, + svuzpq_f32_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat32x4_t, z4, + svuzpq_f32_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat32x4_t, z18, + svuzpq_f32_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat32x4_t, z23, + svuzpq_f32_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat32x4_t, z28, + svuzpq_f32_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8f7bc9ff23aa8e40d3bb4f59d5bb0268b350ac81 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat64x2_t, z0, + svuzpq_f64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat64x2_t, z0, + svuzpq_f64_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat64x2_t, z4, + svuzpq_f64_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat64x2_t, z18, + svuzpq_f64_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat64x2_t, z23, + svuzpq_f64_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat64x2_t, z28, + svuzpq_f64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svfloat64x2_t, z28, + svuzpq_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svfloat64x2_t, z28, + svuzpq_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..dc7b7bf56bb8ccfc1468b8b95c2e6c24fd22a276 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svfloat64x4_t, z0, + svuzpq_f64_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svfloat64x4_t, z0, + svuzpq_f64_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svfloat64x4_t, z4, + svuzpq_f64_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svfloat64x4_t, z18, + svuzpq_f64_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svfloat64x4_t, z23, + svuzpq_f64_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svfloat64x4_t, z28, + svuzpq_f64_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2cfe7afcfe116ed98b9feec0f18cb1ee203e9d6c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svint16x2_t, z0, + svuzpq_s16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svint16x2_t, z0, + svuzpq_s16_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svint16x2_t, z4, + svuzpq_s16_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svint16x2_t, z18, + svuzpq_s16_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint16x2_t, z23, + svuzpq_s16_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svint16x2_t, z28, + svuzpq_s16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svint16x2_t, z28, + svuzpq_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svint16x2_t, z28, + svuzpq_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..0cedde8264dd492c07e3b78d737ad2b2995c785b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svint16x4_t, z0, + svuzpq_s16_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svint16x4_t, z0, + svuzpq_s16_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svint16x4_t, z4, + svuzpq_s16_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svint16x4_t, z18, + svuzpq_s16_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint16x4_t, z23, + svuzpq_s16_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svint16x4_t, z28, + svuzpq_s16_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..bd583bb5d847123b732b2f900201712276756d90 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svint32x2_t, z0, + svuzpq_s32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svint32x2_t, z0, + svuzpq_s32_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svint32x2_t, z4, + svuzpq_s32_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svint32x2_t, z18, + svuzpq_s32_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint32x2_t, z23, + svuzpq_s32_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svint32x2_t, z28, + svuzpq_s32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svint32x2_t, z28, + svuzpq_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svint32x2_t, z28, + svuzpq_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..787ffed867920ef5e77844021a2a4fda82109f9d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svint32x4_t, z0, + svuzpq_s32_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svint32x4_t, z0, + svuzpq_s32_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svint32x4_t, z4, + svuzpq_s32_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svint32x4_t, z18, + svuzpq_s32_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint32x4_t, z23, + svuzpq_s32_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svint32x4_t, z28, + svuzpq_s32_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4b2aa57345c78c97a8d310cdfbc606a9077a9df9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svint64x2_t, z0, + svuzpq_s64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svint64x2_t, z0, + svuzpq_s64_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svint64x2_t, z4, + svuzpq_s64_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svint64x2_t, z18, + svuzpq_s64_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint64x2_t, z23, + svuzpq_s64_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svint64x2_t, z28, + svuzpq_s64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svint64x2_t, z28, + svuzpq_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svint64x2_t, z28, + svuzpq_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..838697a4a701fa758467e2f720fa48e3ca95c5a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svint64x4_t, z0, + svuzpq_s64_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svint64x4_t, z0, + svuzpq_s64_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svint64x4_t, z4, + svuzpq_s64_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svint64x4_t, z18, + svuzpq_s64_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint64x4_t, z23, + svuzpq_s64_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svint64x4_t, z28, + svuzpq_s64_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..fe5098716c47065378530ae94dc6d001f6e702a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svint8x2_t, z0, + svuzpq_s8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svint8x2_t, z0, + svuzpq_s8_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svint8x2_t, z4, + svuzpq_s8_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svint8x2_t, z18, + svuzpq_s8_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint8x2_t, z23, + svuzpq_s8_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svint8x2_t, z28, + svuzpq_s8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svint8x2_t, z28, + svuzpq_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svint8x2_t, z28, + svuzpq_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..592cdc168eb74b8a4c899ffbe7b2108e271e63ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svint8x4_t, z0, + svuzpq_s8_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svint8x4_t, z0, + svuzpq_s8_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svint8x4_t, z4, + svuzpq_s8_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svint8x4_t, z18, + svuzpq_s8_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svint8x4_t, z23, + svuzpq_s8_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svint8x4_t, z28, + svuzpq_s8_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..69a7aa64474118107643bf14eb1211120b5b9e19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint16x2_t, z0, + svuzpq_u16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint16x2_t, z0, + svuzpq_u16_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint16x2_t, z4, + svuzpq_u16_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint16x2_t, z18, + svuzpq_u16_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint16x2_t, z23, + svuzpq_u16_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint16x2_t, z28, + svuzpq_u16_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svuint16x2_t, z28, + svuzpq_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svuint16x2_t, z28, + svuzpq_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e448d62059e181d08501c9cc5481a7058bacb367 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint16x4_t, z0, + svuzpq_u16_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint16x4_t, z0, + svuzpq_u16_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint16x4_t, z4, + svuzpq_u16_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint16x4_t, z18, + svuzpq_u16_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint16x4_t, z23, + svuzpq_u16_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint16x4_t, z28, + svuzpq_u16_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..94a89c80bf214ec0eb0b4b6e4cae0120452c4478 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint32x2_t, z0, + svuzpq_u32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint32x2_t, z0, + svuzpq_u32_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint32x2_t, z4, + svuzpq_u32_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint32x2_t, z18, + svuzpq_u32_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint32x2_t, z23, + svuzpq_u32_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint32x2_t, z28, + svuzpq_u32_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svuint32x2_t, z28, + svuzpq_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svuint32x2_t, z28, + svuzpq_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..efb71c75f4dbe5c7ae7d82340220761832d29109 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint32x4_t, z0, + svuzpq_u32_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint32x4_t, z0, + svuzpq_u32_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint32x4_t, z4, + svuzpq_u32_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint32x4_t, z18, + svuzpq_u32_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint32x4_t, z23, + svuzpq_u32_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint32x4_t, z28, + svuzpq_u32_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..779906cacc9a6395d9a2d926ce911bf344b0b3f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint64x2_t, z0, + svuzpq_u64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint64x2_t, z0, + svuzpq_u64_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint64x2_t, z4, + svuzpq_u64_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint64x2_t, z18, + svuzpq_u64_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint64x2_t, z23, + svuzpq_u64_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint64x2_t, z28, + svuzpq_u64_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svuint64x2_t, z28, + svuzpq_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svuint64x2_t, z28, + svuzpq_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..039f42ef791ae1b38976b16e76691927d04c8828 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint64x4_t, z0, + svuzpq_u64_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint64x4_t, z0, + svuzpq_u64_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint64x4_t, z4, + svuzpq_u64_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint64x4_t, z18, + svuzpq_u64_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint64x4_t, z23, + svuzpq_u64_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint64x4_t, z28, + svuzpq_u64_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..04fe9e6dbfe79771cc12d7e90eaf4ebe93ab2f3c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint8x2_t, z0, + svuzpq_u8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint8x2_t, z0, + svuzpq_u8_x2 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** uzp {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint8x2_t, z4, + svuzpq_u8_x2 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** uzp {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint8x2_t, z18, + svuzpq_u8_x2 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint8x2_t, z23, + svuzpq_u8_x2 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint8x2_t, z28, + svuzpq_u8_x2 (z0), + svuzpq (z0)) + +/* +** uzpq_z28_z0_z23: +** uzp {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (uzpq_z28_z0_z23, svuint8x2_t, z28, + svuzpq_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** uzpq_z28_z5_z19: +** uzp {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (uzpq_z28_z5_z19, svuint8x2_t, z28, + svuzpq_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..133d95aff44b8e9fe9caa1e13ba9307148f85d7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** uzpq_z0_z0: +** uzp {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z0_z0, svuint8x4_t, z0, + svuzpq_u8_x4 (z0), + svuzpq (z0)) + +/* +** uzpq_z0_z4: +** uzp {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (uzpq_z0_z4, svuint8x4_t, z0, + svuzpq_u8_x4 (z4), + svuzpq (z4)) + +/* +** uzpq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (uzpq_z4_z18, svuint8x4_t, z4, + svuzpq_u8_x4 (z18), + svuzpq (z18)) + +/* +** uzpq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uzp {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z18_z23, svuint8x4_t, z18, + svuzpq_u8_x4 (z23), + svuzpq (z23)) + +/* +** uzpq_z23_z28: +** uzp [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (uzpq_z23_z28, svuint8x4_t, z23, + svuzpq_u8_x4 (z28), + svuzpq (z28)) + +/* +** uzpq_z28_z0: +** uzp {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (uzpq_z28_z0, svuint8x4_t, z28, + svuzpq_u8_x4 (z0), + svuzpq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9edf823850ac5d2e3889651efb69dd896f89223c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_bf16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (0, z0, z4, 0), + svvdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** bfvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** bfvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** bfvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** bfvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** bfvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** bfvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** bfvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svbfloat16x2_t, svbfloat16_t, + svvdot_lane_za32_bf16_vg1x2 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..75acf9f650c1128f239dac43dbea4f05d00b83e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_f16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (0, z0, z4, 0), + svvdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** fvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** fvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** fvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** fvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** fvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** fvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** fvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t, + svvdot_lane_za32_f16_vg1x2 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..cbb0c66bedec451cdffef7f9df55aad73bee20cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (0, z0, z4, 0), + svvdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** svdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** svdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** svdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** svdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** svdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** svdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** svdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint16x2_t, svint16_t, + svvdot_lane_za32_s16_vg1x2 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c43a3d05d896bced2a8c1bd8be86bbfb8bc02be0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_s8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (0, z0, z4, 0), + svvdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** svdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** svdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** svdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** svdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** svdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** svdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** svdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint8x4_t, svint8_t, + svvdot_lane_za32_s8_vg1x4 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1694a7d127beff303f491fcfd5c4f7b50c733df5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u16_vg1x2.c @@ -0,0 +1,102 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (0, z0, z4, 0), + svvdot_lane_za32_vg1x2 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x2 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** uvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x2 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** uvdot za\.s\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x2 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x2 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** uvdot za\.s\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x2 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** uvdot za\.s\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x2 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** uvdot za\.s\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x2 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** uvdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x2 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** uvdot za\.s\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint16x2_t, svuint16_t, + svvdot_lane_za32_u16_vg1x2 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x2 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d37ef5cd48ec6ca051402195634742acc72376b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za32_u8_vg1x4.c @@ -0,0 +1,108 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (0, z0, z4, 0), + svvdot_lane_za32_vg1x4 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w0, z0, z7, 1), + svvdot_lane_za32_vg1x4 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_2: +** uvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_2, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8, z28, z4, 2), + svvdot_lane_za32_vg1x4 (w8, z28, z4, 2)) + +/* +** vdot_lane_w8p7_z0_z4_3: +** uvdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_3, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8 + 7, z0, z4, 3), + svvdot_lane_za32_vg1x4 (w8 + 7, z0, z4, 3)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8 + 8, z0, z4, 0), + svvdot_lane_za32_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** uvdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w0 - 1, z0, z4, 1), + svvdot_lane_za32_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** uvdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_2, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8, z4, z15, 2), + svvdot_lane_za32_vg1x4 (w8, z4, z15, 2)) + +/* +** vdot_lane_w8_z28_z16_3: +** mov (z[0-7]).d, z16.d +** uvdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_3, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8, z28, z16, 3), + svvdot_lane_za32_vg1x4 (w8, z28, z16, 3)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uvdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8, z17, z7, 0), + svvdot_lane_za32_vg1x4 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uvdot za\.s\[w8, 0, vgx4\], [^\n]+, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint8x4_t, svuint8_t, + svvdot_lane_za32_u8_vg1x4 (w8, z22, z4, 1), + svvdot_lane_za32_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..972d56f741503d07f66b0f8233dd95737ae4d407 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_s16_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (0, z0, z4, 0), + svvdot_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w0, z0, z7, 1), + svvdot_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_0: +** svdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_0, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8, z28, z4, 0), + svvdot_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** vdot_lane_w8p7_z0_z4_1: +** svdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_1, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8 + 7, z0, z4, 1), + svvdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8 + 8, z0, z4, 0), + svvdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** svdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w0 - 1, z0, z4, 1), + svvdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** svdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_0, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8, z4, z15, 0), + svvdot_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** vdot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** svdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_1, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8, z28, z16, 1), + svvdot_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** svdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8, z17, z7, 0), + svvdot_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** svdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svint16x4_t, svint16_t, + svvdot_lane_za64_s16_vg1x4 (w8, z22, z4, 1), + svvdot_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..601593838045108e86c989eefdfe479785321c99 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za64_u16_vg1x4.c @@ -0,0 +1,110 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+sme-i16i64" + +#include "test_sme2_acle.h" + +/* +** vdot_lane_0_z0_z4_0: +** mov (w8|w9|w10|w11), #?0 +** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_0_z0_z4_0, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (0, z0, z4, 0), + svvdot_lane_za64_vg1x4 (0, z0, z4, 0)) + +/* +** vdot_lane_w0_z0_z7_1: +** mov (w8|w9|w10|w11), w0 +** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0_z0_z7_1, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w0, z0, z7, 1), + svvdot_lane_za64_vg1x4 (w0, z0, z7, 1)) + +/* +** vdot_lane_w8_z28_z4_0: +** uvdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z4_0, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8, z28, z4, 0), + svvdot_lane_za64_vg1x4 (w8, z28, z4, 0)) + +/* +** vdot_lane_w8p7_z0_z4_1: +** uvdot za\.d\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p7_z0_z4_1, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8 + 7, z0, z4, 1), + svvdot_lane_za64_vg1x4 (w8 + 7, z0, z4, 1)) + +/* +** vdot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8p8_z0_z4_0, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8 + 8, z0, z4, 0), + svvdot_lane_za64_vg1x4 (w8 + 8, z0, z4, 0)) + +/* +** vdot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** uvdot za\.d\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w0m1_z0_z4_1, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w0 - 1, z0, z4, 1), + svvdot_lane_za64_vg1x4 (w0 - 1, z0, z4, 1)) + +/* +** vdot_lane_w8_z4_z15_0: +** str d15, \[sp, #?-16\]! +** uvdot za\.d\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[0\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (vdot_lane_w8_z4_z15_0, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8, z4, z15, 0), + svvdot_lane_za64_vg1x4 (w8, z4, z15, 0)) + +/* +** vdot_lane_w8_z28_z16_1: +** mov (z[0-7]).d, z16.d +** uvdot za\.d\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z28_z16_1, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8, z28, z16, 1), + svvdot_lane_za64_vg1x4 (w8, z28, z16, 1)) + +/* +** vdot_lane_w8_z17_z7_0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uvdot za\.d\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z17_z7_0, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8, z17, z7, 0), + svvdot_lane_za64_vg1x4 (w8, z17, z7, 0)) + +/* +** vdot_lane_w8_z22_z4_1: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** uvdot za\.d\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\] +** ret +*/ +TEST_ZA_LANE (vdot_lane_w8_z22_z4_1, svuint16x4_t, svuint16_t, + svvdot_lane_za64_u16_vg1x4 (w8, z22, z4, 1), + svvdot_lane_za64_vg1x4 (w8, z22, z4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b16.c new file mode 100644 index 0000000000000000000000000000000000000000..1e186226f0c0e9011567818849d8fea4715d592c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b16.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_p1_rr_s64: +** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t, + p1 = svwhilege_b16_s64_x2 (x0, x1), + p1 = svwhilege_b16_x2 (x0, x1)) + +/* +** whilege_p4_rr_s64: +** whilege {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t, + p4 = svwhilege_b16_s64_x2 (x0, x1), + p4 = svwhilege_b16_x2 (x0, x1)) + +/* +** whilege_p9_rr_s64: +** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t, + p9 = svwhilege_b16_s64_x2 (x0, x1), + p9 = svwhilege_b16_x2 (x0, x1)) + +/* +** whilege_p14_rr_s64: +** whilege {p14\.h, p15\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t, + p14 = svwhilege_b16_s64_x2 (x0, x1), + p14 = svwhilege_b16_x2 (x0, x1)) + +/* +** whilege_p4_0r_s64: +** whilege {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t, + p4 = svwhilege_b16_x2 ((int64_t) 0, x1), + p4 = svwhilege_b16_s64_x2 (0, x1)) + +/* +** whilege_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilege {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t, + p4 = svwhilege_b16_x2 ((int64_t) 5, x1), + p4 = svwhilege_b16_s64_x2 (5, x1)) + +/* +** whilege_p4_r0_s64: +** whilege {p4\.h, p5\.h}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t, + p4 = svwhilege_b16_x2 (x0, (int64_t) 0), + p4 = svwhilege_b16_s64_x2 (x0, 0)) + +/* +** whilege_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilege {p14\.h, p15\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t, + p14 = svwhilege_b16_x2 (x0, (int64_t) 5), + p14 = svwhilege_b16_s64_x2 (x0, 5)) + +/* +** whilege_p4_rr_u64: +** whilehs {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t, + p4 = svwhilege_b16_u64_x2 (x0, x1), + p4 = svwhilege_b16_x2 (x0, x1)) + +/* +** whilege_p4_0r_u64: +** whilehs {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t, + p4 = svwhilege_b16_x2 ((uint64_t) 0, x1), + p4 = svwhilege_b16_u64_x2 (0, x1)) + +/* +** whilege_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t, + p4 = svwhilege_b16_x2 ((uint64_t) 5, x1), + p4 = svwhilege_b16_u64_x2 (5, x1)) + +/* +** whilege_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.h, p5\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t, + p4 = svwhilege_b16_x2 (x0, (uint64_t) 5), + p4 = svwhilege_b16_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b32.c new file mode 100644 index 0000000000000000000000000000000000000000..fc6cb42ba632860cd09ea4e6f4fa18c5707cb74b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b32.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_p1_rr_s64: +** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t, + p1 = svwhilege_b32_s64_x2 (x0, x1), + p1 = svwhilege_b32_x2 (x0, x1)) + +/* +** whilege_p4_rr_s64: +** whilege {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t, + p4 = svwhilege_b32_s64_x2 (x0, x1), + p4 = svwhilege_b32_x2 (x0, x1)) + +/* +** whilege_p9_rr_s64: +** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t, + p9 = svwhilege_b32_s64_x2 (x0, x1), + p9 = svwhilege_b32_x2 (x0, x1)) + +/* +** whilege_p14_rr_s64: +** whilege {p14\.s, p15\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t, + p14 = svwhilege_b32_s64_x2 (x0, x1), + p14 = svwhilege_b32_x2 (x0, x1)) + +/* +** whilege_p4_0r_s64: +** whilege {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t, + p4 = svwhilege_b32_x2 ((int64_t) 0, x1), + p4 = svwhilege_b32_s64_x2 (0, x1)) + +/* +** whilege_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilege {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t, + p4 = svwhilege_b32_x2 ((int64_t) 5, x1), + p4 = svwhilege_b32_s64_x2 (5, x1)) + +/* +** whilege_p4_r0_s64: +** whilege {p4\.s, p5\.s}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t, + p4 = svwhilege_b32_x2 (x0, (int64_t) 0), + p4 = svwhilege_b32_s64_x2 (x0, 0)) + +/* +** whilege_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilege {p14\.s, p15\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t, + p14 = svwhilege_b32_x2 (x0, (int64_t) 5), + p14 = svwhilege_b32_s64_x2 (x0, 5)) + +/* +** whilege_p4_rr_u64: +** whilehs {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t, + p4 = svwhilege_b32_u64_x2 (x0, x1), + p4 = svwhilege_b32_x2 (x0, x1)) + +/* +** whilege_p4_0r_u64: +** whilehs {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t, + p4 = svwhilege_b32_x2 ((uint64_t) 0, x1), + p4 = svwhilege_b32_u64_x2 (0, x1)) + +/* +** whilege_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t, + p4 = svwhilege_b32_x2 ((uint64_t) 5, x1), + p4 = svwhilege_b32_u64_x2 (5, x1)) + +/* +** whilege_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.s, p5\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t, + p4 = svwhilege_b32_x2 (x0, (uint64_t) 5), + p4 = svwhilege_b32_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b64.c new file mode 100644 index 0000000000000000000000000000000000000000..ecb8631ffc42cb79410dd705b7eca4fecab3eb35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b64.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_p1_rr_s64: +** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t, + p1 = svwhilege_b64_s64_x2 (x0, x1), + p1 = svwhilege_b64_x2 (x0, x1)) + +/* +** whilege_p4_rr_s64: +** whilege {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t, + p4 = svwhilege_b64_s64_x2 (x0, x1), + p4 = svwhilege_b64_x2 (x0, x1)) + +/* +** whilege_p9_rr_s64: +** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t, + p9 = svwhilege_b64_s64_x2 (x0, x1), + p9 = svwhilege_b64_x2 (x0, x1)) + +/* +** whilege_p14_rr_s64: +** whilege {p14\.d, p15\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t, + p14 = svwhilege_b64_s64_x2 (x0, x1), + p14 = svwhilege_b64_x2 (x0, x1)) + +/* +** whilege_p4_0r_s64: +** whilege {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t, + p4 = svwhilege_b64_x2 ((int64_t) 0, x1), + p4 = svwhilege_b64_s64_x2 (0, x1)) + +/* +** whilege_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilege {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t, + p4 = svwhilege_b64_x2 ((int64_t) 5, x1), + p4 = svwhilege_b64_s64_x2 (5, x1)) + +/* +** whilege_p4_r0_s64: +** whilege {p4\.d, p5\.d}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t, + p4 = svwhilege_b64_x2 (x0, (int64_t) 0), + p4 = svwhilege_b64_s64_x2 (x0, 0)) + +/* +** whilege_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilege {p14\.d, p15\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t, + p14 = svwhilege_b64_x2 (x0, (int64_t) 5), + p14 = svwhilege_b64_s64_x2 (x0, 5)) + +/* +** whilege_p4_rr_u64: +** whilehs {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t, + p4 = svwhilege_b64_u64_x2 (x0, x1), + p4 = svwhilege_b64_x2 (x0, x1)) + +/* +** whilege_p4_0r_u64: +** whilehs {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t, + p4 = svwhilege_b64_x2 ((uint64_t) 0, x1), + p4 = svwhilege_b64_u64_x2 (0, x1)) + +/* +** whilege_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t, + p4 = svwhilege_b64_x2 ((uint64_t) 5, x1), + p4 = svwhilege_b64_u64_x2 (5, x1)) + +/* +** whilege_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.d, p5\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t, + p4 = svwhilege_b64_x2 (x0, (uint64_t) 5), + p4 = svwhilege_b64_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b8.c new file mode 100644 index 0000000000000000000000000000000000000000..96e79975286ea58af6607e1fa23b054d9412ca91 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_b8.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_p1_rr_s64: +** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t, + p1 = svwhilege_b8_s64_x2 (x0, x1), + p1 = svwhilege_b8_x2 (x0, x1)) + +/* +** whilege_p4_rr_s64: +** whilege {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t, + p4 = svwhilege_b8_s64_x2 (x0, x1), + p4 = svwhilege_b8_x2 (x0, x1)) + +/* +** whilege_p9_rr_s64: +** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t, + p9 = svwhilege_b8_s64_x2 (x0, x1), + p9 = svwhilege_b8_x2 (x0, x1)) + +/* +** whilege_p14_rr_s64: +** whilege {p14\.b, p15\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t, + p14 = svwhilege_b8_s64_x2 (x0, x1), + p14 = svwhilege_b8_x2 (x0, x1)) + +/* +** whilege_p4_0r_s64: +** whilege {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t, + p4 = svwhilege_b8_x2 ((int64_t) 0, x1), + p4 = svwhilege_b8_s64_x2 (0, x1)) + +/* +** whilege_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilege {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t, + p4 = svwhilege_b8_x2 ((int64_t) 5, x1), + p4 = svwhilege_b8_s64_x2 (5, x1)) + +/* +** whilege_p4_r0_s64: +** whilege {p4\.b, p5\.b}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t, + p4 = svwhilege_b8_x2 (x0, (int64_t) 0), + p4 = svwhilege_b8_s64_x2 (x0, 0)) + +/* +** whilege_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilege {p14\.b, p15\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t, + p14 = svwhilege_b8_x2 (x0, (int64_t) 5), + p14 = svwhilege_b8_s64_x2 (x0, 5)) + +/* +** whilege_p4_rr_u64: +** whilehs {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t, + p4 = svwhilege_b8_u64_x2 (x0, x1), + p4 = svwhilege_b8_x2 (x0, x1)) + +/* +** whilege_p4_0r_u64: +** whilehs {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t, + p4 = svwhilege_b8_x2 ((uint64_t) 0, x1), + p4 = svwhilege_b8_u64_x2 (0, x1)) + +/* +** whilege_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t, + p4 = svwhilege_b8_x2 ((uint64_t) 5, x1), + p4 = svwhilege_b8_u64_x2 (5, x1)) + +/* +** whilege_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehs {p4\.b, p5\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t, + p4 = svwhilege_b8_x2 (x0, (uint64_t) 5), + p4 = svwhilege_b8_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..9bf9a3d5a16b17e3bf834f8c5ad9c7f6f121305e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c16.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_pn0_rr_2_s64: +** whilege pn[0-9]+\.h, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t, + pn0 = svwhilege_c16_s64 (x0, x1, 2), + pn0 = svwhilege_c16 (x0, x1, 2)) + +/* +** whilege_pn7_rr_4_s64: +** whilege pn[0-9]+\.h, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t, + pn7 = svwhilege_c16_s64 (x0, x1, 4), + pn7 = svwhilege_c16 (x0, x1, 4)) + +/* +** whilege_pn8_rr_2_s64: +** whilege pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t, + pn8 = svwhilege_c16_s64 (x0, x1, 2), + pn8 = svwhilege_c16 (x0, x1, 2)) + +/* +** whilege_pn15_rr_4_s64: +** whilege pn15\.h, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t, + pn15 = svwhilege_c16_s64 (x0, x1, 4), + pn15 = svwhilege_c16 (x0, x1, 4)) + +/* +** whilege_pn8_0r_2_s64: +** whilege pn8\.h, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t, + pn8 = svwhilege_c16 ((int64_t) 0, x1, 2), + pn8 = svwhilege_c16_s64 (0, x1, 2)) + +/* +** whilege_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn8\.h, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t, + pn8 = svwhilege_c16 ((int64_t) 5, x1, 4), + pn8 = svwhilege_c16_s64 (5, x1, 4)) + +/* +** whilege_pn8_r0_2_s64: +** whilege pn8\.h, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t, + pn8 = svwhilege_c16 (x0, (int64_t) 0, 2), + pn8 = svwhilege_c16_s64 (x0, 0, 2)) + +/* +** whilege_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn15\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t, + pn15 = svwhilege_c16 (x0, (int64_t) 5, 4), + pn15 = svwhilege_c16_s64 (x0, 5, 4)) + +/* +** whilege_pn8_rr_2_u64: +** whilehs pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t, + pn8 = svwhilege_c16_u64 (x0, x1, 2), + pn8 = svwhilege_c16 (x0, x1, 2)) + +/* +** whilege_pn8_0r_4_u64: +** whilehs pn8\.h, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t, + pn8 = svwhilege_c16 ((uint64_t) 0, x1, 4), + pn8 = svwhilege_c16_u64 (0, x1, 4)) + +/* +** whilege_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.h, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t, + pn8 = svwhilege_c16 ((uint64_t) 5, x1, 2), + pn8 = svwhilege_c16_u64 (5, x1, 2)) + +/* +** whilege_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t, + pn8 = svwhilege_c16 (x0, (uint64_t) 5, 4), + pn8 = svwhilege_c16_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..8c098e0004260b978eee0c161139c0b7a5aca0b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c32.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_pn0_rr_2_s64: +** whilege pn[0-9]+\.s, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t, + pn0 = svwhilege_c32_s64 (x0, x1, 2), + pn0 = svwhilege_c32 (x0, x1, 2)) + +/* +** whilege_pn7_rr_4_s64: +** whilege pn[0-9]+\.s, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t, + pn7 = svwhilege_c32_s64 (x0, x1, 4), + pn7 = svwhilege_c32 (x0, x1, 4)) + +/* +** whilege_pn8_rr_2_s64: +** whilege pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t, + pn8 = svwhilege_c32_s64 (x0, x1, 2), + pn8 = svwhilege_c32 (x0, x1, 2)) + +/* +** whilege_pn15_rr_4_s64: +** whilege pn15\.s, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t, + pn15 = svwhilege_c32_s64 (x0, x1, 4), + pn15 = svwhilege_c32 (x0, x1, 4)) + +/* +** whilege_pn8_0r_2_s64: +** whilege pn8\.s, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t, + pn8 = svwhilege_c32 ((int64_t) 0, x1, 2), + pn8 = svwhilege_c32_s64 (0, x1, 2)) + +/* +** whilege_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn8\.s, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t, + pn8 = svwhilege_c32 ((int64_t) 5, x1, 4), + pn8 = svwhilege_c32_s64 (5, x1, 4)) + +/* +** whilege_pn8_r0_2_s64: +** whilege pn8\.s, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t, + pn8 = svwhilege_c32 (x0, (int64_t) 0, 2), + pn8 = svwhilege_c32_s64 (x0, 0, 2)) + +/* +** whilege_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn15\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t, + pn15 = svwhilege_c32 (x0, (int64_t) 5, 4), + pn15 = svwhilege_c32_s64 (x0, 5, 4)) + +/* +** whilege_pn8_rr_2_u64: +** whilehs pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t, + pn8 = svwhilege_c32_u64 (x0, x1, 2), + pn8 = svwhilege_c32 (x0, x1, 2)) + +/* +** whilege_pn8_0r_4_u64: +** whilehs pn8\.s, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t, + pn8 = svwhilege_c32 ((uint64_t) 0, x1, 4), + pn8 = svwhilege_c32_u64 (0, x1, 4)) + +/* +** whilege_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.s, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t, + pn8 = svwhilege_c32 ((uint64_t) 5, x1, 2), + pn8 = svwhilege_c32_u64 (5, x1, 2)) + +/* +** whilege_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t, + pn8 = svwhilege_c32 (x0, (uint64_t) 5, 4), + pn8 = svwhilege_c32_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..a1a98d6ac36bd5b7cd75766c5f8ca25ed664d219 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c64.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_pn0_rr_2_s64: +** whilege pn[0-9]+\.d, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t, + pn0 = svwhilege_c64_s64 (x0, x1, 2), + pn0 = svwhilege_c64 (x0, x1, 2)) + +/* +** whilege_pn7_rr_4_s64: +** whilege pn[0-9]+\.d, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t, + pn7 = svwhilege_c64_s64 (x0, x1, 4), + pn7 = svwhilege_c64 (x0, x1, 4)) + +/* +** whilege_pn8_rr_2_s64: +** whilege pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t, + pn8 = svwhilege_c64_s64 (x0, x1, 2), + pn8 = svwhilege_c64 (x0, x1, 2)) + +/* +** whilege_pn15_rr_4_s64: +** whilege pn15\.d, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t, + pn15 = svwhilege_c64_s64 (x0, x1, 4), + pn15 = svwhilege_c64 (x0, x1, 4)) + +/* +** whilege_pn8_0r_2_s64: +** whilege pn8\.d, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t, + pn8 = svwhilege_c64 ((int64_t) 0, x1, 2), + pn8 = svwhilege_c64_s64 (0, x1, 2)) + +/* +** whilege_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn8\.d, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t, + pn8 = svwhilege_c64 ((int64_t) 5, x1, 4), + pn8 = svwhilege_c64_s64 (5, x1, 4)) + +/* +** whilege_pn8_r0_2_s64: +** whilege pn8\.d, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t, + pn8 = svwhilege_c64 (x0, (int64_t) 0, 2), + pn8 = svwhilege_c64_s64 (x0, 0, 2)) + +/* +** whilege_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn15\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t, + pn15 = svwhilege_c64 (x0, (int64_t) 5, 4), + pn15 = svwhilege_c64_s64 (x0, 5, 4)) + +/* +** whilege_pn8_rr_2_u64: +** whilehs pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t, + pn8 = svwhilege_c64_u64 (x0, x1, 2), + pn8 = svwhilege_c64 (x0, x1, 2)) + +/* +** whilege_pn8_0r_4_u64: +** whilehs pn8\.d, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t, + pn8 = svwhilege_c64 ((uint64_t) 0, x1, 4), + pn8 = svwhilege_c64_u64 (0, x1, 4)) + +/* +** whilege_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.d, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t, + pn8 = svwhilege_c64 ((uint64_t) 5, x1, 2), + pn8 = svwhilege_c64_u64 (5, x1, 2)) + +/* +** whilege_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t, + pn8 = svwhilege_c64 (x0, (uint64_t) 5, 4), + pn8 = svwhilege_c64_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..1d52f8bb6b141460b5ef25d1024d2c9f33bcbd71 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilege_c8.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilege_pn0_rr_2_s64: +** whilege pn[0-9]+\.b, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t, + pn0 = svwhilege_c8_s64 (x0, x1, 2), + pn0 = svwhilege_c8 (x0, x1, 2)) + +/* +** whilege_pn7_rr_4_s64: +** whilege pn[0-9]+\.b, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t, + pn7 = svwhilege_c8_s64 (x0, x1, 4), + pn7 = svwhilege_c8 (x0, x1, 4)) + +/* +** whilege_pn8_rr_2_s64: +** whilege pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t, + pn8 = svwhilege_c8_s64 (x0, x1, 2), + pn8 = svwhilege_c8 (x0, x1, 2)) + +/* +** whilege_pn15_rr_4_s64: +** whilege pn15\.b, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t, + pn15 = svwhilege_c8_s64 (x0, x1, 4), + pn15 = svwhilege_c8 (x0, x1, 4)) + +/* +** whilege_pn8_0r_2_s64: +** whilege pn8\.b, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t, + pn8 = svwhilege_c8 ((int64_t) 0, x1, 2), + pn8 = svwhilege_c8_s64 (0, x1, 2)) + +/* +** whilege_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn8\.b, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t, + pn8 = svwhilege_c8 ((int64_t) 5, x1, 4), + pn8 = svwhilege_c8_s64 (5, x1, 4)) + +/* +** whilege_pn8_r0_2_s64: +** whilege pn8\.b, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t, + pn8 = svwhilege_c8 (x0, (int64_t) 0, 2), + pn8 = svwhilege_c8_s64 (x0, 0, 2)) + +/* +** whilege_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilege pn15\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t, + pn15 = svwhilege_c8 (x0, (int64_t) 5, 4), + pn15 = svwhilege_c8_s64 (x0, 5, 4)) + +/* +** whilege_pn8_rr_2_u64: +** whilehs pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t, + pn8 = svwhilege_c8_u64 (x0, x1, 2), + pn8 = svwhilege_c8 (x0, x1, 2)) + +/* +** whilege_pn8_0r_4_u64: +** whilehs pn8\.b, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t, + pn8 = svwhilege_c8 ((uint64_t) 0, x1, 4), + pn8 = svwhilege_c8_u64 (0, x1, 4)) + +/* +** whilege_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.b, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t, + pn8 = svwhilege_c8 ((uint64_t) 5, x1, 2), + pn8 = svwhilege_c8_u64 (5, x1, 2)) + +/* +** whilege_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehs pn8\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t, + pn8 = svwhilege_c8 (x0, (uint64_t) 5, 4), + pn8 = svwhilege_c8_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b16.c new file mode 100644 index 0000000000000000000000000000000000000000..eee8417e692ff38c8b04dfb296d820b5753c8ef5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b16.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_p1_rr_s64: +** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t, + p1 = svwhilegt_b16_s64_x2 (x0, x1), + p1 = svwhilegt_b16_x2 (x0, x1)) + +/* +** whilegt_p4_rr_s64: +** whilegt {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t, + p4 = svwhilegt_b16_s64_x2 (x0, x1), + p4 = svwhilegt_b16_x2 (x0, x1)) + +/* +** whilegt_p9_rr_s64: +** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t, + p9 = svwhilegt_b16_s64_x2 (x0, x1), + p9 = svwhilegt_b16_x2 (x0, x1)) + +/* +** whilegt_p14_rr_s64: +** whilegt {p14\.h, p15\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t, + p14 = svwhilegt_b16_s64_x2 (x0, x1), + p14 = svwhilegt_b16_x2 (x0, x1)) + +/* +** whilegt_p4_0r_s64: +** whilegt {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t, + p4 = svwhilegt_b16_x2 ((int64_t) 0, x1), + p4 = svwhilegt_b16_s64_x2 (0, x1)) + +/* +** whilegt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilegt {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t, + p4 = svwhilegt_b16_x2 ((int64_t) 5, x1), + p4 = svwhilegt_b16_s64_x2 (5, x1)) + +/* +** whilegt_p4_r0_s64: +** whilegt {p4\.h, p5\.h}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t, + p4 = svwhilegt_b16_x2 (x0, (int64_t) 0), + p4 = svwhilegt_b16_s64_x2 (x0, 0)) + +/* +** whilegt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilegt {p14\.h, p15\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t, + p14 = svwhilegt_b16_x2 (x0, (int64_t) 5), + p14 = svwhilegt_b16_s64_x2 (x0, 5)) + +/* +** whilegt_p4_rr_u64: +** whilehi {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t, + p4 = svwhilegt_b16_u64_x2 (x0, x1), + p4 = svwhilegt_b16_x2 (x0, x1)) + +/* +** whilegt_p4_0r_u64: +** whilehi {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t, + p4 = svwhilegt_b16_x2 ((uint64_t) 0, x1), + p4 = svwhilegt_b16_u64_x2 (0, x1)) + +/* +** whilegt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t, + p4 = svwhilegt_b16_x2 ((uint64_t) 5, x1), + p4 = svwhilegt_b16_u64_x2 (5, x1)) + +/* +** whilegt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.h, p5\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t, + p4 = svwhilegt_b16_x2 (x0, (uint64_t) 5), + p4 = svwhilegt_b16_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b32.c new file mode 100644 index 0000000000000000000000000000000000000000..d82ad0e5ab11b708ea4c30e4a491933d32302589 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b32.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_p1_rr_s64: +** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t, + p1 = svwhilegt_b32_s64_x2 (x0, x1), + p1 = svwhilegt_b32_x2 (x0, x1)) + +/* +** whilegt_p4_rr_s64: +** whilegt {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t, + p4 = svwhilegt_b32_s64_x2 (x0, x1), + p4 = svwhilegt_b32_x2 (x0, x1)) + +/* +** whilegt_p9_rr_s64: +** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t, + p9 = svwhilegt_b32_s64_x2 (x0, x1), + p9 = svwhilegt_b32_x2 (x0, x1)) + +/* +** whilegt_p14_rr_s64: +** whilegt {p14\.s, p15\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t, + p14 = svwhilegt_b32_s64_x2 (x0, x1), + p14 = svwhilegt_b32_x2 (x0, x1)) + +/* +** whilegt_p4_0r_s64: +** whilegt {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t, + p4 = svwhilegt_b32_x2 ((int64_t) 0, x1), + p4 = svwhilegt_b32_s64_x2 (0, x1)) + +/* +** whilegt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilegt {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t, + p4 = svwhilegt_b32_x2 ((int64_t) 5, x1), + p4 = svwhilegt_b32_s64_x2 (5, x1)) + +/* +** whilegt_p4_r0_s64: +** whilegt {p4\.s, p5\.s}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t, + p4 = svwhilegt_b32_x2 (x0, (int64_t) 0), + p4 = svwhilegt_b32_s64_x2 (x0, 0)) + +/* +** whilegt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilegt {p14\.s, p15\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t, + p14 = svwhilegt_b32_x2 (x0, (int64_t) 5), + p14 = svwhilegt_b32_s64_x2 (x0, 5)) + +/* +** whilegt_p4_rr_u64: +** whilehi {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t, + p4 = svwhilegt_b32_u64_x2 (x0, x1), + p4 = svwhilegt_b32_x2 (x0, x1)) + +/* +** whilegt_p4_0r_u64: +** whilehi {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t, + p4 = svwhilegt_b32_x2 ((uint64_t) 0, x1), + p4 = svwhilegt_b32_u64_x2 (0, x1)) + +/* +** whilegt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t, + p4 = svwhilegt_b32_x2 ((uint64_t) 5, x1), + p4 = svwhilegt_b32_u64_x2 (5, x1)) + +/* +** whilegt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.s, p5\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t, + p4 = svwhilegt_b32_x2 (x0, (uint64_t) 5), + p4 = svwhilegt_b32_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b64.c new file mode 100644 index 0000000000000000000000000000000000000000..b25fa79a1452e16e0cf95abde29d9d4005c0d9df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b64.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_p1_rr_s64: +** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t, + p1 = svwhilegt_b64_s64_x2 (x0, x1), + p1 = svwhilegt_b64_x2 (x0, x1)) + +/* +** whilegt_p4_rr_s64: +** whilegt {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t, + p4 = svwhilegt_b64_s64_x2 (x0, x1), + p4 = svwhilegt_b64_x2 (x0, x1)) + +/* +** whilegt_p9_rr_s64: +** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t, + p9 = svwhilegt_b64_s64_x2 (x0, x1), + p9 = svwhilegt_b64_x2 (x0, x1)) + +/* +** whilegt_p14_rr_s64: +** whilegt {p14\.d, p15\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t, + p14 = svwhilegt_b64_s64_x2 (x0, x1), + p14 = svwhilegt_b64_x2 (x0, x1)) + +/* +** whilegt_p4_0r_s64: +** whilegt {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t, + p4 = svwhilegt_b64_x2 ((int64_t) 0, x1), + p4 = svwhilegt_b64_s64_x2 (0, x1)) + +/* +** whilegt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilegt {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t, + p4 = svwhilegt_b64_x2 ((int64_t) 5, x1), + p4 = svwhilegt_b64_s64_x2 (5, x1)) + +/* +** whilegt_p4_r0_s64: +** whilegt {p4\.d, p5\.d}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t, + p4 = svwhilegt_b64_x2 (x0, (int64_t) 0), + p4 = svwhilegt_b64_s64_x2 (x0, 0)) + +/* +** whilegt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilegt {p14\.d, p15\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t, + p14 = svwhilegt_b64_x2 (x0, (int64_t) 5), + p14 = svwhilegt_b64_s64_x2 (x0, 5)) + +/* +** whilegt_p4_rr_u64: +** whilehi {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t, + p4 = svwhilegt_b64_u64_x2 (x0, x1), + p4 = svwhilegt_b64_x2 (x0, x1)) + +/* +** whilegt_p4_0r_u64: +** whilehi {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t, + p4 = svwhilegt_b64_x2 ((uint64_t) 0, x1), + p4 = svwhilegt_b64_u64_x2 (0, x1)) + +/* +** whilegt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t, + p4 = svwhilegt_b64_x2 ((uint64_t) 5, x1), + p4 = svwhilegt_b64_u64_x2 (5, x1)) + +/* +** whilegt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.d, p5\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t, + p4 = svwhilegt_b64_x2 (x0, (uint64_t) 5), + p4 = svwhilegt_b64_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b8.c new file mode 100644 index 0000000000000000000000000000000000000000..cfc4246e348f99e94d32694dfb8620efcf06ef21 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_b8.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_p1_rr_s64: +** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t, + p1 = svwhilegt_b8_s64_x2 (x0, x1), + p1 = svwhilegt_b8_x2 (x0, x1)) + +/* +** whilegt_p4_rr_s64: +** whilegt {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t, + p4 = svwhilegt_b8_s64_x2 (x0, x1), + p4 = svwhilegt_b8_x2 (x0, x1)) + +/* +** whilegt_p9_rr_s64: +** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t, + p9 = svwhilegt_b8_s64_x2 (x0, x1), + p9 = svwhilegt_b8_x2 (x0, x1)) + +/* +** whilegt_p14_rr_s64: +** whilegt {p14\.b, p15\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t, + p14 = svwhilegt_b8_s64_x2 (x0, x1), + p14 = svwhilegt_b8_x2 (x0, x1)) + +/* +** whilegt_p4_0r_s64: +** whilegt {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t, + p4 = svwhilegt_b8_x2 ((int64_t) 0, x1), + p4 = svwhilegt_b8_s64_x2 (0, x1)) + +/* +** whilegt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilegt {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t, + p4 = svwhilegt_b8_x2 ((int64_t) 5, x1), + p4 = svwhilegt_b8_s64_x2 (5, x1)) + +/* +** whilegt_p4_r0_s64: +** whilegt {p4\.b, p5\.b}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t, + p4 = svwhilegt_b8_x2 (x0, (int64_t) 0), + p4 = svwhilegt_b8_s64_x2 (x0, 0)) + +/* +** whilegt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilegt {p14\.b, p15\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t, + p14 = svwhilegt_b8_x2 (x0, (int64_t) 5), + p14 = svwhilegt_b8_s64_x2 (x0, 5)) + +/* +** whilegt_p4_rr_u64: +** whilehi {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t, + p4 = svwhilegt_b8_u64_x2 (x0, x1), + p4 = svwhilegt_b8_x2 (x0, x1)) + +/* +** whilegt_p4_0r_u64: +** whilehi {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t, + p4 = svwhilegt_b8_x2 ((uint64_t) 0, x1), + p4 = svwhilegt_b8_u64_x2 (0, x1)) + +/* +** whilegt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t, + p4 = svwhilegt_b8_x2 ((uint64_t) 5, x1), + p4 = svwhilegt_b8_u64_x2 (5, x1)) + +/* +** whilegt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilehi {p4\.b, p5\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t, + p4 = svwhilegt_b8_x2 (x0, (uint64_t) 5), + p4 = svwhilegt_b8_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..838aa19cfa69b1b6f0b0f01531af60763bd80eaa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c16.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_pn0_rr_2_s64: +** whilegt pn[0-9]+\.h, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t, + pn0 = svwhilegt_c16_s64 (x0, x1, 2), + pn0 = svwhilegt_c16 (x0, x1, 2)) + +/* +** whilegt_pn7_rr_4_s64: +** whilegt pn[0-9]+\.h, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t, + pn7 = svwhilegt_c16_s64 (x0, x1, 4), + pn7 = svwhilegt_c16 (x0, x1, 4)) + +/* +** whilegt_pn8_rr_2_s64: +** whilegt pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t, + pn8 = svwhilegt_c16_s64 (x0, x1, 2), + pn8 = svwhilegt_c16 (x0, x1, 2)) + +/* +** whilegt_pn15_rr_4_s64: +** whilegt pn15\.h, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t, + pn15 = svwhilegt_c16_s64 (x0, x1, 4), + pn15 = svwhilegt_c16 (x0, x1, 4)) + +/* +** whilegt_pn8_0r_2_s64: +** whilegt pn8\.h, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t, + pn8 = svwhilegt_c16 ((int64_t) 0, x1, 2), + pn8 = svwhilegt_c16_s64 (0, x1, 2)) + +/* +** whilegt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn8\.h, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t, + pn8 = svwhilegt_c16 ((int64_t) 5, x1, 4), + pn8 = svwhilegt_c16_s64 (5, x1, 4)) + +/* +** whilegt_pn8_r0_2_s64: +** whilegt pn8\.h, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t, + pn8 = svwhilegt_c16 (x0, (int64_t) 0, 2), + pn8 = svwhilegt_c16_s64 (x0, 0, 2)) + +/* +** whilegt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn15\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t, + pn15 = svwhilegt_c16 (x0, (int64_t) 5, 4), + pn15 = svwhilegt_c16_s64 (x0, 5, 4)) + +/* +** whilegt_pn8_rr_2_u64: +** whilehi pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilegt_c16_u64 (x0, x1, 2), + pn8 = svwhilegt_c16 (x0, x1, 2)) + +/* +** whilegt_pn8_0r_4_u64: +** whilehi pn8\.h, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilegt_c16 ((uint64_t) 0, x1, 4), + pn8 = svwhilegt_c16_u64 (0, x1, 4)) + +/* +** whilegt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.h, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilegt_c16 ((uint64_t) 5, x1, 2), + pn8 = svwhilegt_c16_u64 (5, x1, 2)) + +/* +** whilegt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilegt_c16 (x0, (uint64_t) 5, 4), + pn8 = svwhilegt_c16_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..008e5ea82f1c746f7b7ac521742636449d0f96ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c32.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_pn0_rr_2_s64: +** whilegt pn[0-9]+\.s, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t, + pn0 = svwhilegt_c32_s64 (x0, x1, 2), + pn0 = svwhilegt_c32 (x0, x1, 2)) + +/* +** whilegt_pn7_rr_4_s64: +** whilegt pn[0-9]+\.s, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t, + pn7 = svwhilegt_c32_s64 (x0, x1, 4), + pn7 = svwhilegt_c32 (x0, x1, 4)) + +/* +** whilegt_pn8_rr_2_s64: +** whilegt pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t, + pn8 = svwhilegt_c32_s64 (x0, x1, 2), + pn8 = svwhilegt_c32 (x0, x1, 2)) + +/* +** whilegt_pn15_rr_4_s64: +** whilegt pn15\.s, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t, + pn15 = svwhilegt_c32_s64 (x0, x1, 4), + pn15 = svwhilegt_c32 (x0, x1, 4)) + +/* +** whilegt_pn8_0r_2_s64: +** whilegt pn8\.s, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t, + pn8 = svwhilegt_c32 ((int64_t) 0, x1, 2), + pn8 = svwhilegt_c32_s64 (0, x1, 2)) + +/* +** whilegt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn8\.s, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t, + pn8 = svwhilegt_c32 ((int64_t) 5, x1, 4), + pn8 = svwhilegt_c32_s64 (5, x1, 4)) + +/* +** whilegt_pn8_r0_2_s64: +** whilegt pn8\.s, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t, + pn8 = svwhilegt_c32 (x0, (int64_t) 0, 2), + pn8 = svwhilegt_c32_s64 (x0, 0, 2)) + +/* +** whilegt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn15\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t, + pn15 = svwhilegt_c32 (x0, (int64_t) 5, 4), + pn15 = svwhilegt_c32_s64 (x0, 5, 4)) + +/* +** whilegt_pn8_rr_2_u64: +** whilehi pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilegt_c32_u64 (x0, x1, 2), + pn8 = svwhilegt_c32 (x0, x1, 2)) + +/* +** whilegt_pn8_0r_4_u64: +** whilehi pn8\.s, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilegt_c32 ((uint64_t) 0, x1, 4), + pn8 = svwhilegt_c32_u64 (0, x1, 4)) + +/* +** whilegt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.s, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilegt_c32 ((uint64_t) 5, x1, 2), + pn8 = svwhilegt_c32_u64 (5, x1, 2)) + +/* +** whilegt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilegt_c32 (x0, (uint64_t) 5, 4), + pn8 = svwhilegt_c32_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..a89d6ed71f94e8143078d9b880ecee85363168e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c64.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_pn0_rr_2_s64: +** whilegt pn[0-9]+\.d, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t, + pn0 = svwhilegt_c64_s64 (x0, x1, 2), + pn0 = svwhilegt_c64 (x0, x1, 2)) + +/* +** whilegt_pn7_rr_4_s64: +** whilegt pn[0-9]+\.d, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t, + pn7 = svwhilegt_c64_s64 (x0, x1, 4), + pn7 = svwhilegt_c64 (x0, x1, 4)) + +/* +** whilegt_pn8_rr_2_s64: +** whilegt pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t, + pn8 = svwhilegt_c64_s64 (x0, x1, 2), + pn8 = svwhilegt_c64 (x0, x1, 2)) + +/* +** whilegt_pn15_rr_4_s64: +** whilegt pn15\.d, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t, + pn15 = svwhilegt_c64_s64 (x0, x1, 4), + pn15 = svwhilegt_c64 (x0, x1, 4)) + +/* +** whilegt_pn8_0r_2_s64: +** whilegt pn8\.d, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t, + pn8 = svwhilegt_c64 ((int64_t) 0, x1, 2), + pn8 = svwhilegt_c64_s64 (0, x1, 2)) + +/* +** whilegt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn8\.d, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t, + pn8 = svwhilegt_c64 ((int64_t) 5, x1, 4), + pn8 = svwhilegt_c64_s64 (5, x1, 4)) + +/* +** whilegt_pn8_r0_2_s64: +** whilegt pn8\.d, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t, + pn8 = svwhilegt_c64 (x0, (int64_t) 0, 2), + pn8 = svwhilegt_c64_s64 (x0, 0, 2)) + +/* +** whilegt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn15\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t, + pn15 = svwhilegt_c64 (x0, (int64_t) 5, 4), + pn15 = svwhilegt_c64_s64 (x0, 5, 4)) + +/* +** whilegt_pn8_rr_2_u64: +** whilehi pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilegt_c64_u64 (x0, x1, 2), + pn8 = svwhilegt_c64 (x0, x1, 2)) + +/* +** whilegt_pn8_0r_4_u64: +** whilehi pn8\.d, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilegt_c64 ((uint64_t) 0, x1, 4), + pn8 = svwhilegt_c64_u64 (0, x1, 4)) + +/* +** whilegt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.d, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilegt_c64 ((uint64_t) 5, x1, 2), + pn8 = svwhilegt_c64_u64 (5, x1, 2)) + +/* +** whilegt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilegt_c64 (x0, (uint64_t) 5, 4), + pn8 = svwhilegt_c64_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..f6447573bc98ad1c1bf51d0df8af065fcd4ad15a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilegt_c8.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilegt_pn0_rr_2_s64: +** whilegt pn[0-9]+\.b, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t, + pn0 = svwhilegt_c8_s64 (x0, x1, 2), + pn0 = svwhilegt_c8 (x0, x1, 2)) + +/* +** whilegt_pn7_rr_4_s64: +** whilegt pn[0-9]+\.b, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t, + pn7 = svwhilegt_c8_s64 (x0, x1, 4), + pn7 = svwhilegt_c8 (x0, x1, 4)) + +/* +** whilegt_pn8_rr_2_s64: +** whilegt pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t, + pn8 = svwhilegt_c8_s64 (x0, x1, 2), + pn8 = svwhilegt_c8 (x0, x1, 2)) + +/* +** whilegt_pn15_rr_4_s64: +** whilegt pn15\.b, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t, + pn15 = svwhilegt_c8_s64 (x0, x1, 4), + pn15 = svwhilegt_c8 (x0, x1, 4)) + +/* +** whilegt_pn8_0r_2_s64: +** whilegt pn8\.b, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t, + pn8 = svwhilegt_c8 ((int64_t) 0, x1, 2), + pn8 = svwhilegt_c8_s64 (0, x1, 2)) + +/* +** whilegt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn8\.b, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t, + pn8 = svwhilegt_c8 ((int64_t) 5, x1, 4), + pn8 = svwhilegt_c8_s64 (5, x1, 4)) + +/* +** whilegt_pn8_r0_2_s64: +** whilegt pn8\.b, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t, + pn8 = svwhilegt_c8 (x0, (int64_t) 0, 2), + pn8 = svwhilegt_c8_s64 (x0, 0, 2)) + +/* +** whilegt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilegt pn15\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t, + pn15 = svwhilegt_c8 (x0, (int64_t) 5, 4), + pn15 = svwhilegt_c8_s64 (x0, 5, 4)) + +/* +** whilegt_pn8_rr_2_u64: +** whilehi pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilegt_c8_u64 (x0, x1, 2), + pn8 = svwhilegt_c8 (x0, x1, 2)) + +/* +** whilegt_pn8_0r_4_u64: +** whilehi pn8\.b, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilegt_c8 ((uint64_t) 0, x1, 4), + pn8 = svwhilegt_c8_u64 (0, x1, 4)) + +/* +** whilegt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.b, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilegt_c8 ((uint64_t) 5, x1, 2), + pn8 = svwhilegt_c8_u64 (5, x1, 2)) + +/* +** whilegt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilehi pn8\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilegt_c8 (x0, (uint64_t) 5, 4), + pn8 = svwhilegt_c8_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b16.c new file mode 100644 index 0000000000000000000000000000000000000000..c74a21ee4829f6d70574c55af5f9279ce35ae16b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b16.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_p1_rr_s64: +** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t, + p1 = svwhilele_b16_s64_x2 (x0, x1), + p1 = svwhilele_b16_x2 (x0, x1)) + +/* +** whilele_p4_rr_s64: +** whilele {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t, + p4 = svwhilele_b16_s64_x2 (x0, x1), + p4 = svwhilele_b16_x2 (x0, x1)) + +/* +** whilele_p9_rr_s64: +** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t, + p9 = svwhilele_b16_s64_x2 (x0, x1), + p9 = svwhilele_b16_x2 (x0, x1)) + +/* +** whilele_p14_rr_s64: +** whilele {p14\.h, p15\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t, + p14 = svwhilele_b16_s64_x2 (x0, x1), + p14 = svwhilele_b16_x2 (x0, x1)) + +/* +** whilele_p4_0r_s64: +** whilele {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t, + p4 = svwhilele_b16_x2 ((int64_t) 0, x1), + p4 = svwhilele_b16_s64_x2 (0, x1)) + +/* +** whilele_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilele {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t, + p4 = svwhilele_b16_x2 ((int64_t) 5, x1), + p4 = svwhilele_b16_s64_x2 (5, x1)) + +/* +** whilele_p4_r0_s64: +** whilele {p4\.h, p5\.h}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t, + p4 = svwhilele_b16_x2 (x0, (int64_t) 0), + p4 = svwhilele_b16_s64_x2 (x0, 0)) + +/* +** whilele_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilele {p14\.h, p15\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t, + p14 = svwhilele_b16_x2 (x0, (int64_t) 5), + p14 = svwhilele_b16_s64_x2 (x0, 5)) + +/* +** whilele_p4_rr_u64: +** whilels {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t, + p4 = svwhilele_b16_u64_x2 (x0, x1), + p4 = svwhilele_b16_x2 (x0, x1)) + +/* +** whilele_p4_0r_u64: +** whilels {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t, + p4 = svwhilele_b16_x2 ((uint64_t) 0, x1), + p4 = svwhilele_b16_u64_x2 (0, x1)) + +/* +** whilele_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t, + p4 = svwhilele_b16_x2 ((uint64_t) 5, x1), + p4 = svwhilele_b16_u64_x2 (5, x1)) + +/* +** whilele_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.h, p5\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t, + p4 = svwhilele_b16_x2 (x0, (uint64_t) 5), + p4 = svwhilele_b16_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b32.c new file mode 100644 index 0000000000000000000000000000000000000000..0f5ba6116c9d00374b4827edea6a8e5c661b4383 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b32.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_p1_rr_s64: +** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t, + p1 = svwhilele_b32_s64_x2 (x0, x1), + p1 = svwhilele_b32_x2 (x0, x1)) + +/* +** whilele_p4_rr_s64: +** whilele {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t, + p4 = svwhilele_b32_s64_x2 (x0, x1), + p4 = svwhilele_b32_x2 (x0, x1)) + +/* +** whilele_p9_rr_s64: +** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t, + p9 = svwhilele_b32_s64_x2 (x0, x1), + p9 = svwhilele_b32_x2 (x0, x1)) + +/* +** whilele_p14_rr_s64: +** whilele {p14\.s, p15\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t, + p14 = svwhilele_b32_s64_x2 (x0, x1), + p14 = svwhilele_b32_x2 (x0, x1)) + +/* +** whilele_p4_0r_s64: +** whilele {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t, + p4 = svwhilele_b32_x2 ((int64_t) 0, x1), + p4 = svwhilele_b32_s64_x2 (0, x1)) + +/* +** whilele_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilele {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t, + p4 = svwhilele_b32_x2 ((int64_t) 5, x1), + p4 = svwhilele_b32_s64_x2 (5, x1)) + +/* +** whilele_p4_r0_s64: +** whilele {p4\.s, p5\.s}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t, + p4 = svwhilele_b32_x2 (x0, (int64_t) 0), + p4 = svwhilele_b32_s64_x2 (x0, 0)) + +/* +** whilele_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilele {p14\.s, p15\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t, + p14 = svwhilele_b32_x2 (x0, (int64_t) 5), + p14 = svwhilele_b32_s64_x2 (x0, 5)) + +/* +** whilele_p4_rr_u64: +** whilels {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t, + p4 = svwhilele_b32_u64_x2 (x0, x1), + p4 = svwhilele_b32_x2 (x0, x1)) + +/* +** whilele_p4_0r_u64: +** whilels {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t, + p4 = svwhilele_b32_x2 ((uint64_t) 0, x1), + p4 = svwhilele_b32_u64_x2 (0, x1)) + +/* +** whilele_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t, + p4 = svwhilele_b32_x2 ((uint64_t) 5, x1), + p4 = svwhilele_b32_u64_x2 (5, x1)) + +/* +** whilele_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.s, p5\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t, + p4 = svwhilele_b32_x2 (x0, (uint64_t) 5), + p4 = svwhilele_b32_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b64.c new file mode 100644 index 0000000000000000000000000000000000000000..e26a8f60d47f70a0759d13b8aeb5f29310238c89 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b64.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_p1_rr_s64: +** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t, + p1 = svwhilele_b64_s64_x2 (x0, x1), + p1 = svwhilele_b64_x2 (x0, x1)) + +/* +** whilele_p4_rr_s64: +** whilele {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t, + p4 = svwhilele_b64_s64_x2 (x0, x1), + p4 = svwhilele_b64_x2 (x0, x1)) + +/* +** whilele_p9_rr_s64: +** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t, + p9 = svwhilele_b64_s64_x2 (x0, x1), + p9 = svwhilele_b64_x2 (x0, x1)) + +/* +** whilele_p14_rr_s64: +** whilele {p14\.d, p15\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t, + p14 = svwhilele_b64_s64_x2 (x0, x1), + p14 = svwhilele_b64_x2 (x0, x1)) + +/* +** whilele_p4_0r_s64: +** whilele {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t, + p4 = svwhilele_b64_x2 ((int64_t) 0, x1), + p4 = svwhilele_b64_s64_x2 (0, x1)) + +/* +** whilele_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilele {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t, + p4 = svwhilele_b64_x2 ((int64_t) 5, x1), + p4 = svwhilele_b64_s64_x2 (5, x1)) + +/* +** whilele_p4_r0_s64: +** whilele {p4\.d, p5\.d}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t, + p4 = svwhilele_b64_x2 (x0, (int64_t) 0), + p4 = svwhilele_b64_s64_x2 (x0, 0)) + +/* +** whilele_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilele {p14\.d, p15\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t, + p14 = svwhilele_b64_x2 (x0, (int64_t) 5), + p14 = svwhilele_b64_s64_x2 (x0, 5)) + +/* +** whilele_p4_rr_u64: +** whilels {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t, + p4 = svwhilele_b64_u64_x2 (x0, x1), + p4 = svwhilele_b64_x2 (x0, x1)) + +/* +** whilele_p4_0r_u64: +** whilels {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t, + p4 = svwhilele_b64_x2 ((uint64_t) 0, x1), + p4 = svwhilele_b64_u64_x2 (0, x1)) + +/* +** whilele_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t, + p4 = svwhilele_b64_x2 ((uint64_t) 5, x1), + p4 = svwhilele_b64_u64_x2 (5, x1)) + +/* +** whilele_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.d, p5\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t, + p4 = svwhilele_b64_x2 (x0, (uint64_t) 5), + p4 = svwhilele_b64_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b8.c new file mode 100644 index 0000000000000000000000000000000000000000..b6cc1e005955e20139d9295ab9a778bb85fc528a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_b8.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_p1_rr_s64: +** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t, + p1 = svwhilele_b8_s64_x2 (x0, x1), + p1 = svwhilele_b8_x2 (x0, x1)) + +/* +** whilele_p4_rr_s64: +** whilele {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t, + p4 = svwhilele_b8_s64_x2 (x0, x1), + p4 = svwhilele_b8_x2 (x0, x1)) + +/* +** whilele_p9_rr_s64: +** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t, + p9 = svwhilele_b8_s64_x2 (x0, x1), + p9 = svwhilele_b8_x2 (x0, x1)) + +/* +** whilele_p14_rr_s64: +** whilele {p14\.b, p15\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t, + p14 = svwhilele_b8_s64_x2 (x0, x1), + p14 = svwhilele_b8_x2 (x0, x1)) + +/* +** whilele_p4_0r_s64: +** whilele {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t, + p4 = svwhilele_b8_x2 ((int64_t) 0, x1), + p4 = svwhilele_b8_s64_x2 (0, x1)) + +/* +** whilele_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilele {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t, + p4 = svwhilele_b8_x2 ((int64_t) 5, x1), + p4 = svwhilele_b8_s64_x2 (5, x1)) + +/* +** whilele_p4_r0_s64: +** whilele {p4\.b, p5\.b}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t, + p4 = svwhilele_b8_x2 (x0, (int64_t) 0), + p4 = svwhilele_b8_s64_x2 (x0, 0)) + +/* +** whilele_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilele {p14\.b, p15\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t, + p14 = svwhilele_b8_x2 (x0, (int64_t) 5), + p14 = svwhilele_b8_s64_x2 (x0, 5)) + +/* +** whilele_p4_rr_u64: +** whilels {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t, + p4 = svwhilele_b8_u64_x2 (x0, x1), + p4 = svwhilele_b8_x2 (x0, x1)) + +/* +** whilele_p4_0r_u64: +** whilels {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t, + p4 = svwhilele_b8_x2 ((uint64_t) 0, x1), + p4 = svwhilele_b8_u64_x2 (0, x1)) + +/* +** whilele_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t, + p4 = svwhilele_b8_x2 ((uint64_t) 5, x1), + p4 = svwhilele_b8_u64_x2 (5, x1)) + +/* +** whilele_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilels {p4\.b, p5\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t, + p4 = svwhilele_b8_x2 (x0, (uint64_t) 5), + p4 = svwhilele_b8_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..768fd91f81128cbf490fa3fc282dd58c9d7530fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c16.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_pn0_rr_2_s64: +** whilele pn[0-9]+\.h, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t, + pn0 = svwhilele_c16_s64 (x0, x1, 2), + pn0 = svwhilele_c16 (x0, x1, 2)) + +/* +** whilele_pn7_rr_4_s64: +** whilele pn[0-9]+\.h, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t, + pn7 = svwhilele_c16_s64 (x0, x1, 4), + pn7 = svwhilele_c16 (x0, x1, 4)) + +/* +** whilele_pn8_rr_2_s64: +** whilele pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t, + pn8 = svwhilele_c16_s64 (x0, x1, 2), + pn8 = svwhilele_c16 (x0, x1, 2)) + +/* +** whilele_pn15_rr_4_s64: +** whilele pn15\.h, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t, + pn15 = svwhilele_c16_s64 (x0, x1, 4), + pn15 = svwhilele_c16 (x0, x1, 4)) + +/* +** whilele_pn8_0r_2_s64: +** whilele pn8\.h, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t, + pn8 = svwhilele_c16 ((int64_t) 0, x1, 2), + pn8 = svwhilele_c16_s64 (0, x1, 2)) + +/* +** whilele_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn8\.h, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t, + pn8 = svwhilele_c16 ((int64_t) 5, x1, 4), + pn8 = svwhilele_c16_s64 (5, x1, 4)) + +/* +** whilele_pn8_r0_2_s64: +** whilele pn8\.h, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t, + pn8 = svwhilele_c16 (x0, (int64_t) 0, 2), + pn8 = svwhilele_c16_s64 (x0, 0, 2)) + +/* +** whilele_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn15\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t, + pn15 = svwhilele_c16 (x0, (int64_t) 5, 4), + pn15 = svwhilele_c16_s64 (x0, 5, 4)) + +/* +** whilele_pn8_rr_2_u64: +** whilels pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t, + pn8 = svwhilele_c16_u64 (x0, x1, 2), + pn8 = svwhilele_c16 (x0, x1, 2)) + +/* +** whilele_pn8_0r_4_u64: +** whilels pn8\.h, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t, + pn8 = svwhilele_c16 ((uint64_t) 0, x1, 4), + pn8 = svwhilele_c16_u64 (0, x1, 4)) + +/* +** whilele_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.h, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t, + pn8 = svwhilele_c16 ((uint64_t) 5, x1, 2), + pn8 = svwhilele_c16_u64 (5, x1, 2)) + +/* +** whilele_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t, + pn8 = svwhilele_c16 (x0, (uint64_t) 5, 4), + pn8 = svwhilele_c16_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..3669d858d209b6b1fcaea2a0665b7cfeb83958e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c32.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_pn0_rr_2_s64: +** whilele pn[0-9]+\.s, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t, + pn0 = svwhilele_c32_s64 (x0, x1, 2), + pn0 = svwhilele_c32 (x0, x1, 2)) + +/* +** whilele_pn7_rr_4_s64: +** whilele pn[0-9]+\.s, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t, + pn7 = svwhilele_c32_s64 (x0, x1, 4), + pn7 = svwhilele_c32 (x0, x1, 4)) + +/* +** whilele_pn8_rr_2_s64: +** whilele pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t, + pn8 = svwhilele_c32_s64 (x0, x1, 2), + pn8 = svwhilele_c32 (x0, x1, 2)) + +/* +** whilele_pn15_rr_4_s64: +** whilele pn15\.s, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t, + pn15 = svwhilele_c32_s64 (x0, x1, 4), + pn15 = svwhilele_c32 (x0, x1, 4)) + +/* +** whilele_pn8_0r_2_s64: +** whilele pn8\.s, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t, + pn8 = svwhilele_c32 ((int64_t) 0, x1, 2), + pn8 = svwhilele_c32_s64 (0, x1, 2)) + +/* +** whilele_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn8\.s, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t, + pn8 = svwhilele_c32 ((int64_t) 5, x1, 4), + pn8 = svwhilele_c32_s64 (5, x1, 4)) + +/* +** whilele_pn8_r0_2_s64: +** whilele pn8\.s, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t, + pn8 = svwhilele_c32 (x0, (int64_t) 0, 2), + pn8 = svwhilele_c32_s64 (x0, 0, 2)) + +/* +** whilele_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn15\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t, + pn15 = svwhilele_c32 (x0, (int64_t) 5, 4), + pn15 = svwhilele_c32_s64 (x0, 5, 4)) + +/* +** whilele_pn8_rr_2_u64: +** whilels pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t, + pn8 = svwhilele_c32_u64 (x0, x1, 2), + pn8 = svwhilele_c32 (x0, x1, 2)) + +/* +** whilele_pn8_0r_4_u64: +** whilels pn8\.s, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t, + pn8 = svwhilele_c32 ((uint64_t) 0, x1, 4), + pn8 = svwhilele_c32_u64 (0, x1, 4)) + +/* +** whilele_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.s, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t, + pn8 = svwhilele_c32 ((uint64_t) 5, x1, 2), + pn8 = svwhilele_c32_u64 (5, x1, 2)) + +/* +** whilele_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t, + pn8 = svwhilele_c32 (x0, (uint64_t) 5, 4), + pn8 = svwhilele_c32_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..f614a5571197bc122dfcdba5971b19951ed4818a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c64.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_pn0_rr_2_s64: +** whilele pn[0-9]+\.d, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t, + pn0 = svwhilele_c64_s64 (x0, x1, 2), + pn0 = svwhilele_c64 (x0, x1, 2)) + +/* +** whilele_pn7_rr_4_s64: +** whilele pn[0-9]+\.d, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t, + pn7 = svwhilele_c64_s64 (x0, x1, 4), + pn7 = svwhilele_c64 (x0, x1, 4)) + +/* +** whilele_pn8_rr_2_s64: +** whilele pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t, + pn8 = svwhilele_c64_s64 (x0, x1, 2), + pn8 = svwhilele_c64 (x0, x1, 2)) + +/* +** whilele_pn15_rr_4_s64: +** whilele pn15\.d, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t, + pn15 = svwhilele_c64_s64 (x0, x1, 4), + pn15 = svwhilele_c64 (x0, x1, 4)) + +/* +** whilele_pn8_0r_2_s64: +** whilele pn8\.d, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t, + pn8 = svwhilele_c64 ((int64_t) 0, x1, 2), + pn8 = svwhilele_c64_s64 (0, x1, 2)) + +/* +** whilele_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn8\.d, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t, + pn8 = svwhilele_c64 ((int64_t) 5, x1, 4), + pn8 = svwhilele_c64_s64 (5, x1, 4)) + +/* +** whilele_pn8_r0_2_s64: +** whilele pn8\.d, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t, + pn8 = svwhilele_c64 (x0, (int64_t) 0, 2), + pn8 = svwhilele_c64_s64 (x0, 0, 2)) + +/* +** whilele_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn15\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t, + pn15 = svwhilele_c64 (x0, (int64_t) 5, 4), + pn15 = svwhilele_c64_s64 (x0, 5, 4)) + +/* +** whilele_pn8_rr_2_u64: +** whilels pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t, + pn8 = svwhilele_c64_u64 (x0, x1, 2), + pn8 = svwhilele_c64 (x0, x1, 2)) + +/* +** whilele_pn8_0r_4_u64: +** whilels pn8\.d, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t, + pn8 = svwhilele_c64 ((uint64_t) 0, x1, 4), + pn8 = svwhilele_c64_u64 (0, x1, 4)) + +/* +** whilele_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.d, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t, + pn8 = svwhilele_c64 ((uint64_t) 5, x1, 2), + pn8 = svwhilele_c64_u64 (5, x1, 2)) + +/* +** whilele_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t, + pn8 = svwhilele_c64 (x0, (uint64_t) 5, 4), + pn8 = svwhilele_c64_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..e334f5b97518b6a157450fb6de1a241d3f4d99b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilele_c8.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilele_pn0_rr_2_s64: +** whilele pn[0-9]+\.b, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t, + pn0 = svwhilele_c8_s64 (x0, x1, 2), + pn0 = svwhilele_c8 (x0, x1, 2)) + +/* +** whilele_pn7_rr_4_s64: +** whilele pn[0-9]+\.b, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t, + pn7 = svwhilele_c8_s64 (x0, x1, 4), + pn7 = svwhilele_c8 (x0, x1, 4)) + +/* +** whilele_pn8_rr_2_s64: +** whilele pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t, + pn8 = svwhilele_c8_s64 (x0, x1, 2), + pn8 = svwhilele_c8 (x0, x1, 2)) + +/* +** whilele_pn15_rr_4_s64: +** whilele pn15\.b, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t, + pn15 = svwhilele_c8_s64 (x0, x1, 4), + pn15 = svwhilele_c8 (x0, x1, 4)) + +/* +** whilele_pn8_0r_2_s64: +** whilele pn8\.b, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t, + pn8 = svwhilele_c8 ((int64_t) 0, x1, 2), + pn8 = svwhilele_c8_s64 (0, x1, 2)) + +/* +** whilele_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn8\.b, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t, + pn8 = svwhilele_c8 ((int64_t) 5, x1, 4), + pn8 = svwhilele_c8_s64 (5, x1, 4)) + +/* +** whilele_pn8_r0_2_s64: +** whilele pn8\.b, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t, + pn8 = svwhilele_c8 (x0, (int64_t) 0, 2), + pn8 = svwhilele_c8_s64 (x0, 0, 2)) + +/* +** whilele_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilele pn15\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t, + pn15 = svwhilele_c8 (x0, (int64_t) 5, 4), + pn15 = svwhilele_c8_s64 (x0, 5, 4)) + +/* +** whilele_pn8_rr_2_u64: +** whilels pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t, + pn8 = svwhilele_c8_u64 (x0, x1, 2), + pn8 = svwhilele_c8 (x0, x1, 2)) + +/* +** whilele_pn8_0r_4_u64: +** whilels pn8\.b, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t, + pn8 = svwhilele_c8 ((uint64_t) 0, x1, 4), + pn8 = svwhilele_c8_u64 (0, x1, 4)) + +/* +** whilele_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.b, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t, + pn8 = svwhilele_c8 ((uint64_t) 5, x1, 2), + pn8 = svwhilele_c8_u64 (5, x1, 2)) + +/* +** whilele_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilels pn8\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t, + pn8 = svwhilele_c8 (x0, (uint64_t) 5, 4), + pn8 = svwhilele_c8_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b16.c new file mode 100644 index 0000000000000000000000000000000000000000..4ed6b4d9b97089b709f12c6d1c23490b9b9b1903 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b16.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_p1_rr_s64: +** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t, + p1 = svwhilelt_b16_s64_x2 (x0, x1), + p1 = svwhilelt_b16_x2 (x0, x1)) + +/* +** whilelt_p4_rr_s64: +** whilelt {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t, + p4 = svwhilelt_b16_s64_x2 (x0, x1), + p4 = svwhilelt_b16_x2 (x0, x1)) + +/* +** whilelt_p9_rr_s64: +** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t, + p9 = svwhilelt_b16_s64_x2 (x0, x1), + p9 = svwhilelt_b16_x2 (x0, x1)) + +/* +** whilelt_p14_rr_s64: +** whilelt {p14\.h, p15\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t, + p14 = svwhilelt_b16_s64_x2 (x0, x1), + p14 = svwhilelt_b16_x2 (x0, x1)) + +/* +** whilelt_p4_0r_s64: +** whilelt {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t, + p4 = svwhilelt_b16_x2 ((int64_t) 0, x1), + p4 = svwhilelt_b16_s64_x2 (0, x1)) + +/* +** whilelt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilelt {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t, + p4 = svwhilelt_b16_x2 ((int64_t) 5, x1), + p4 = svwhilelt_b16_s64_x2 (5, x1)) + +/* +** whilelt_p4_r0_s64: +** whilelt {p4\.h, p5\.h}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t, + p4 = svwhilelt_b16_x2 (x0, (int64_t) 0), + p4 = svwhilelt_b16_s64_x2 (x0, 0)) + +/* +** whilelt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilelt {p14\.h, p15\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t, + p14 = svwhilelt_b16_x2 (x0, (int64_t) 5), + p14 = svwhilelt_b16_s64_x2 (x0, 5)) + +/* +** whilelt_p4_rr_u64: +** whilelo {p4\.h, p5\.h}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t, + p4 = svwhilelt_b16_u64_x2 (x0, x1), + p4 = svwhilelt_b16_x2 (x0, x1)) + +/* +** whilelt_p4_0r_u64: +** whilelo {p4\.h, p5\.h}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t, + p4 = svwhilelt_b16_x2 ((uint64_t) 0, x1), + p4 = svwhilelt_b16_u64_x2 (0, x1)) + +/* +** whilelt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.h, p5\.h}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t, + p4 = svwhilelt_b16_x2 ((uint64_t) 5, x1), + p4 = svwhilelt_b16_u64_x2 (5, x1)) + +/* +** whilelt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.h, p5\.h}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t, + p4 = svwhilelt_b16_x2 (x0, (uint64_t) 5), + p4 = svwhilelt_b16_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b32.c new file mode 100644 index 0000000000000000000000000000000000000000..39bc144ff9bc06f709a6304798f52d30ac1f7d2e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b32.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_p1_rr_s64: +** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t, + p1 = svwhilelt_b32_s64_x2 (x0, x1), + p1 = svwhilelt_b32_x2 (x0, x1)) + +/* +** whilelt_p4_rr_s64: +** whilelt {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t, + p4 = svwhilelt_b32_s64_x2 (x0, x1), + p4 = svwhilelt_b32_x2 (x0, x1)) + +/* +** whilelt_p9_rr_s64: +** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t, + p9 = svwhilelt_b32_s64_x2 (x0, x1), + p9 = svwhilelt_b32_x2 (x0, x1)) + +/* +** whilelt_p14_rr_s64: +** whilelt {p14\.s, p15\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t, + p14 = svwhilelt_b32_s64_x2 (x0, x1), + p14 = svwhilelt_b32_x2 (x0, x1)) + +/* +** whilelt_p4_0r_s64: +** whilelt {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t, + p4 = svwhilelt_b32_x2 ((int64_t) 0, x1), + p4 = svwhilelt_b32_s64_x2 (0, x1)) + +/* +** whilelt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilelt {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t, + p4 = svwhilelt_b32_x2 ((int64_t) 5, x1), + p4 = svwhilelt_b32_s64_x2 (5, x1)) + +/* +** whilelt_p4_r0_s64: +** whilelt {p4\.s, p5\.s}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t, + p4 = svwhilelt_b32_x2 (x0, (int64_t) 0), + p4 = svwhilelt_b32_s64_x2 (x0, 0)) + +/* +** whilelt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilelt {p14\.s, p15\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t, + p14 = svwhilelt_b32_x2 (x0, (int64_t) 5), + p14 = svwhilelt_b32_s64_x2 (x0, 5)) + +/* +** whilelt_p4_rr_u64: +** whilelo {p4\.s, p5\.s}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t, + p4 = svwhilelt_b32_u64_x2 (x0, x1), + p4 = svwhilelt_b32_x2 (x0, x1)) + +/* +** whilelt_p4_0r_u64: +** whilelo {p4\.s, p5\.s}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t, + p4 = svwhilelt_b32_x2 ((uint64_t) 0, x1), + p4 = svwhilelt_b32_u64_x2 (0, x1)) + +/* +** whilelt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.s, p5\.s}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t, + p4 = svwhilelt_b32_x2 ((uint64_t) 5, x1), + p4 = svwhilelt_b32_u64_x2 (5, x1)) + +/* +** whilelt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.s, p5\.s}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t, + p4 = svwhilelt_b32_x2 (x0, (uint64_t) 5), + p4 = svwhilelt_b32_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b64.c new file mode 100644 index 0000000000000000000000000000000000000000..9acd245edbf5c58582a049929206ade752c825b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b64.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_p1_rr_s64: +** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t, + p1 = svwhilelt_b64_s64_x2 (x0, x1), + p1 = svwhilelt_b64_x2 (x0, x1)) + +/* +** whilelt_p4_rr_s64: +** whilelt {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t, + p4 = svwhilelt_b64_s64_x2 (x0, x1), + p4 = svwhilelt_b64_x2 (x0, x1)) + +/* +** whilelt_p9_rr_s64: +** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t, + p9 = svwhilelt_b64_s64_x2 (x0, x1), + p9 = svwhilelt_b64_x2 (x0, x1)) + +/* +** whilelt_p14_rr_s64: +** whilelt {p14\.d, p15\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t, + p14 = svwhilelt_b64_s64_x2 (x0, x1), + p14 = svwhilelt_b64_x2 (x0, x1)) + +/* +** whilelt_p4_0r_s64: +** whilelt {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t, + p4 = svwhilelt_b64_x2 ((int64_t) 0, x1), + p4 = svwhilelt_b64_s64_x2 (0, x1)) + +/* +** whilelt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilelt {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t, + p4 = svwhilelt_b64_x2 ((int64_t) 5, x1), + p4 = svwhilelt_b64_s64_x2 (5, x1)) + +/* +** whilelt_p4_r0_s64: +** whilelt {p4\.d, p5\.d}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t, + p4 = svwhilelt_b64_x2 (x0, (int64_t) 0), + p4 = svwhilelt_b64_s64_x2 (x0, 0)) + +/* +** whilelt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilelt {p14\.d, p15\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t, + p14 = svwhilelt_b64_x2 (x0, (int64_t) 5), + p14 = svwhilelt_b64_s64_x2 (x0, 5)) + +/* +** whilelt_p4_rr_u64: +** whilelo {p4\.d, p5\.d}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t, + p4 = svwhilelt_b64_u64_x2 (x0, x1), + p4 = svwhilelt_b64_x2 (x0, x1)) + +/* +** whilelt_p4_0r_u64: +** whilelo {p4\.d, p5\.d}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t, + p4 = svwhilelt_b64_x2 ((uint64_t) 0, x1), + p4 = svwhilelt_b64_u64_x2 (0, x1)) + +/* +** whilelt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.d, p5\.d}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t, + p4 = svwhilelt_b64_x2 ((uint64_t) 5, x1), + p4 = svwhilelt_b64_u64_x2 (5, x1)) + +/* +** whilelt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.d, p5\.d}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t, + p4 = svwhilelt_b64_x2 (x0, (uint64_t) 5), + p4 = svwhilelt_b64_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b8.c new file mode 100644 index 0000000000000000000000000000000000000000..209665581145f5f8c24f2cb37a52f648ff1348ef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_b8.c @@ -0,0 +1,119 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_p1_rr_s64: +** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t, + p1 = svwhilelt_b8_s64_x2 (x0, x1), + p1 = svwhilelt_b8_x2 (x0, x1)) + +/* +** whilelt_p4_rr_s64: +** whilelt {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t, + p4 = svwhilelt_b8_s64_x2 (x0, x1), + p4 = svwhilelt_b8_x2 (x0, x1)) + +/* +** whilelt_p9_rr_s64: +** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1 +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t, + p9 = svwhilelt_b8_s64_x2 (x0, x1), + p9 = svwhilelt_b8_x2 (x0, x1)) + +/* +** whilelt_p14_rr_s64: +** whilelt {p14\.b, p15\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t, + p14 = svwhilelt_b8_s64_x2 (x0, x1), + p14 = svwhilelt_b8_x2 (x0, x1)) + +/* +** whilelt_p4_0r_s64: +** whilelt {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t, + p4 = svwhilelt_b8_x2 ((int64_t) 0, x1), + p4 = svwhilelt_b8_s64_x2 (0, x1)) + +/* +** whilelt_p4_5r_s64: +** mov (x[0-9]+), #?5 +** whilelt {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t, + p4 = svwhilelt_b8_x2 ((int64_t) 5, x1), + p4 = svwhilelt_b8_s64_x2 (5, x1)) + +/* +** whilelt_p4_r0_s64: +** whilelt {p4\.b, p5\.b}, x0, xzr +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t, + p4 = svwhilelt_b8_x2 (x0, (int64_t) 0), + p4 = svwhilelt_b8_s64_x2 (x0, 0)) + +/* +** whilelt_p14_r5_s64: +** mov (x[0-9]+), #?5 +** whilelt {p14\.b, p15\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t, + p14 = svwhilelt_b8_x2 (x0, (int64_t) 5), + p14 = svwhilelt_b8_s64_x2 (x0, 5)) + +/* +** whilelt_p4_rr_u64: +** whilelo {p4\.b, p5\.b}, x0, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t, + p4 = svwhilelt_b8_u64_x2 (x0, x1), + p4 = svwhilelt_b8_x2 (x0, x1)) + +/* +** whilelt_p4_0r_u64: +** whilelo {p4\.b, p5\.b}, xzr, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t, + p4 = svwhilelt_b8_x2 ((uint64_t) 0, x1), + p4 = svwhilelt_b8_u64_x2 (0, x1)) + +/* +** whilelt_p4_5r_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.b, p5\.b}, \1, x1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t, + p4 = svwhilelt_b8_x2 ((uint64_t) 5, x1), + p4 = svwhilelt_b8_u64_x2 (5, x1)) + +/* +** whilelt_p4_r5_u64: +** mov (x[0-9]+), #?5 +** whilelo {p4\.b, p5\.b}, x0, \1 +** ret +*/ +TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t, + p4 = svwhilelt_b8_x2 (x0, (uint64_t) 5), + p4 = svwhilelt_b8_u64_x2 (x0, 5)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c16.c new file mode 100644 index 0000000000000000000000000000000000000000..4e7ce6591f1ec3424d4391d90acbec35f3b98b36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c16.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_pn0_rr_2_s64: +** whilelt pn[0-9]+\.h, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t, + pn0 = svwhilelt_c16_s64 (x0, x1, 2), + pn0 = svwhilelt_c16 (x0, x1, 2)) + +/* +** whilelt_pn7_rr_4_s64: +** whilelt pn[0-9]+\.h, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t, + pn7 = svwhilelt_c16_s64 (x0, x1, 4), + pn7 = svwhilelt_c16 (x0, x1, 4)) + +/* +** whilelt_pn8_rr_2_s64: +** whilelt pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t, + pn8 = svwhilelt_c16_s64 (x0, x1, 2), + pn8 = svwhilelt_c16 (x0, x1, 2)) + +/* +** whilelt_pn15_rr_4_s64: +** whilelt pn15\.h, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t, + pn15 = svwhilelt_c16_s64 (x0, x1, 4), + pn15 = svwhilelt_c16 (x0, x1, 4)) + +/* +** whilelt_pn8_0r_2_s64: +** whilelt pn8\.h, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t, + pn8 = svwhilelt_c16 ((int64_t) 0, x1, 2), + pn8 = svwhilelt_c16_s64 (0, x1, 2)) + +/* +** whilelt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn8\.h, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t, + pn8 = svwhilelt_c16 ((int64_t) 5, x1, 4), + pn8 = svwhilelt_c16_s64 (5, x1, 4)) + +/* +** whilelt_pn8_r0_2_s64: +** whilelt pn8\.h, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t, + pn8 = svwhilelt_c16 (x0, (int64_t) 0, 2), + pn8 = svwhilelt_c16_s64 (x0, 0, 2)) + +/* +** whilelt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn15\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t, + pn15 = svwhilelt_c16 (x0, (int64_t) 5, 4), + pn15 = svwhilelt_c16_s64 (x0, 5, 4)) + +/* +** whilelt_pn8_rr_2_u64: +** whilelo pn8\.h, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilelt_c16_u64 (x0, x1, 2), + pn8 = svwhilelt_c16 (x0, x1, 2)) + +/* +** whilelt_pn8_0r_4_u64: +** whilelo pn8\.h, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilelt_c16 ((uint64_t) 0, x1, 4), + pn8 = svwhilelt_c16_u64 (0, x1, 4)) + +/* +** whilelt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.h, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilelt_c16 ((uint64_t) 5, x1, 2), + pn8 = svwhilelt_c16_u64 (5, x1, 2)) + +/* +** whilelt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.h, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilelt_c16 (x0, (uint64_t) 5, 4), + pn8 = svwhilelt_c16_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c32.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c32.c new file mode 100644 index 0000000000000000000000000000000000000000..f2a63f6da544413e79e34c1bf99d83cebe278642 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c32.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_pn0_rr_2_s64: +** whilelt pn[0-9]+\.s, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t, + pn0 = svwhilelt_c32_s64 (x0, x1, 2), + pn0 = svwhilelt_c32 (x0, x1, 2)) + +/* +** whilelt_pn7_rr_4_s64: +** whilelt pn[0-9]+\.s, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t, + pn7 = svwhilelt_c32_s64 (x0, x1, 4), + pn7 = svwhilelt_c32 (x0, x1, 4)) + +/* +** whilelt_pn8_rr_2_s64: +** whilelt pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t, + pn8 = svwhilelt_c32_s64 (x0, x1, 2), + pn8 = svwhilelt_c32 (x0, x1, 2)) + +/* +** whilelt_pn15_rr_4_s64: +** whilelt pn15\.s, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t, + pn15 = svwhilelt_c32_s64 (x0, x1, 4), + pn15 = svwhilelt_c32 (x0, x1, 4)) + +/* +** whilelt_pn8_0r_2_s64: +** whilelt pn8\.s, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t, + pn8 = svwhilelt_c32 ((int64_t) 0, x1, 2), + pn8 = svwhilelt_c32_s64 (0, x1, 2)) + +/* +** whilelt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn8\.s, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t, + pn8 = svwhilelt_c32 ((int64_t) 5, x1, 4), + pn8 = svwhilelt_c32_s64 (5, x1, 4)) + +/* +** whilelt_pn8_r0_2_s64: +** whilelt pn8\.s, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t, + pn8 = svwhilelt_c32 (x0, (int64_t) 0, 2), + pn8 = svwhilelt_c32_s64 (x0, 0, 2)) + +/* +** whilelt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn15\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t, + pn15 = svwhilelt_c32 (x0, (int64_t) 5, 4), + pn15 = svwhilelt_c32_s64 (x0, 5, 4)) + +/* +** whilelt_pn8_rr_2_u64: +** whilelo pn8\.s, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilelt_c32_u64 (x0, x1, 2), + pn8 = svwhilelt_c32 (x0, x1, 2)) + +/* +** whilelt_pn8_0r_4_u64: +** whilelo pn8\.s, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilelt_c32 ((uint64_t) 0, x1, 4), + pn8 = svwhilelt_c32_u64 (0, x1, 4)) + +/* +** whilelt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.s, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilelt_c32 ((uint64_t) 5, x1, 2), + pn8 = svwhilelt_c32_u64 (5, x1, 2)) + +/* +** whilelt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.s, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilelt_c32 (x0, (uint64_t) 5, 4), + pn8 = svwhilelt_c32_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c64.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c64.c new file mode 100644 index 0000000000000000000000000000000000000000..3c48cd04081eed168d79dc3d2450abc7d9136124 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c64.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_pn0_rr_2_s64: +** whilelt pn[0-9]+\.d, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t, + pn0 = svwhilelt_c64_s64 (x0, x1, 2), + pn0 = svwhilelt_c64 (x0, x1, 2)) + +/* +** whilelt_pn7_rr_4_s64: +** whilelt pn[0-9]+\.d, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t, + pn7 = svwhilelt_c64_s64 (x0, x1, 4), + pn7 = svwhilelt_c64 (x0, x1, 4)) + +/* +** whilelt_pn8_rr_2_s64: +** whilelt pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t, + pn8 = svwhilelt_c64_s64 (x0, x1, 2), + pn8 = svwhilelt_c64 (x0, x1, 2)) + +/* +** whilelt_pn15_rr_4_s64: +** whilelt pn15\.d, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t, + pn15 = svwhilelt_c64_s64 (x0, x1, 4), + pn15 = svwhilelt_c64 (x0, x1, 4)) + +/* +** whilelt_pn8_0r_2_s64: +** whilelt pn8\.d, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t, + pn8 = svwhilelt_c64 ((int64_t) 0, x1, 2), + pn8 = svwhilelt_c64_s64 (0, x1, 2)) + +/* +** whilelt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn8\.d, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t, + pn8 = svwhilelt_c64 ((int64_t) 5, x1, 4), + pn8 = svwhilelt_c64_s64 (5, x1, 4)) + +/* +** whilelt_pn8_r0_2_s64: +** whilelt pn8\.d, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t, + pn8 = svwhilelt_c64 (x0, (int64_t) 0, 2), + pn8 = svwhilelt_c64_s64 (x0, 0, 2)) + +/* +** whilelt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn15\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t, + pn15 = svwhilelt_c64 (x0, (int64_t) 5, 4), + pn15 = svwhilelt_c64_s64 (x0, 5, 4)) + +/* +** whilelt_pn8_rr_2_u64: +** whilelo pn8\.d, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilelt_c64_u64 (x0, x1, 2), + pn8 = svwhilelt_c64 (x0, x1, 2)) + +/* +** whilelt_pn8_0r_4_u64: +** whilelo pn8\.d, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilelt_c64 ((uint64_t) 0, x1, 4), + pn8 = svwhilelt_c64_u64 (0, x1, 4)) + +/* +** whilelt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.d, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilelt_c64 ((uint64_t) 5, x1, 2), + pn8 = svwhilelt_c64_u64 (5, x1, 2)) + +/* +** whilelt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.d, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilelt_c64 (x0, (uint64_t) 5, 4), + pn8 = svwhilelt_c64_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c8.c new file mode 100644 index 0000000000000000000000000000000000000000..729c12971d97cb316ac3d4c6018e6e83a30ca041 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/whilelt_c8.c @@ -0,0 +1,117 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** whilelt_pn0_rr_2_s64: +** whilelt pn[0-9]+\.b, x0, x1, vlx2 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t, + pn0 = svwhilelt_c8_s64 (x0, x1, 2), + pn0 = svwhilelt_c8 (x0, x1, 2)) + +/* +** whilelt_pn7_rr_4_s64: +** whilelt pn[0-9]+\.b, x0, x1, vlx4 +** mov [^\n]+ +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t, + pn7 = svwhilelt_c8_s64 (x0, x1, 4), + pn7 = svwhilelt_c8 (x0, x1, 4)) + +/* +** whilelt_pn8_rr_2_s64: +** whilelt pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t, + pn8 = svwhilelt_c8_s64 (x0, x1, 2), + pn8 = svwhilelt_c8 (x0, x1, 2)) + +/* +** whilelt_pn15_rr_4_s64: +** whilelt pn15\.b, x0, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t, + pn15 = svwhilelt_c8_s64 (x0, x1, 4), + pn15 = svwhilelt_c8 (x0, x1, 4)) + +/* +** whilelt_pn8_0r_2_s64: +** whilelt pn8\.b, xzr, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t, + pn8 = svwhilelt_c8 ((int64_t) 0, x1, 2), + pn8 = svwhilelt_c8_s64 (0, x1, 2)) + +/* +** whilelt_pn8_5r_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn8\.b, \1, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t, + pn8 = svwhilelt_c8 ((int64_t) 5, x1, 4), + pn8 = svwhilelt_c8_s64 (5, x1, 4)) + +/* +** whilelt_pn8_r0_2_s64: +** whilelt pn8\.b, x0, xzr, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t, + pn8 = svwhilelt_c8 (x0, (int64_t) 0, 2), + pn8 = svwhilelt_c8_s64 (x0, 0, 2)) + +/* +** whilelt_pn15_r5_4_s64: +** mov (x[0-9]+), #?5 +** whilelt pn15\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t, + pn15 = svwhilelt_c8 (x0, (int64_t) 5, 4), + pn15 = svwhilelt_c8_s64 (x0, 5, 4)) + +/* +** whilelt_pn8_rr_2_u64: +** whilelo pn8\.b, x0, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t, + pn8 = svwhilelt_c8_u64 (x0, x1, 2), + pn8 = svwhilelt_c8 (x0, x1, 2)) + +/* +** whilelt_pn8_0r_4_u64: +** whilelo pn8\.b, xzr, x1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t, + pn8 = svwhilelt_c8 ((uint64_t) 0, x1, 4), + pn8 = svwhilelt_c8_u64 (0, x1, 4)) + +/* +** whilelt_pn8_5r_2_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.b, \1, x1, vlx2 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t, + pn8 = svwhilelt_c8 ((uint64_t) 5, x1, 2), + pn8 = svwhilelt_c8_u64 (5, x1, 2)) + +/* +** whilelt_pn8_r5_4_u64: +** mov (x[0-9]+), #?5 +** whilelo pn8\.b, x0, \1, vlx4 +** ret +*/ +TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t, + pn8 = svwhilelt_c8 (x0, (uint64_t) 5, 4), + pn8 = svwhilelt_c8_u64 (x0, 5, 4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..2d18a21106d4cf56ffa7472471abff6da8fab57a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x2_t, + svwrite_hor_za16_s16_vg2 (0, 0, z0), + svwrite_hor_za16_s16_vg2 (0, 0, z0)) + +/* +** write_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (1, 1, z4), + svwrite_hor_za16_u16_vg2 (1, 1, z4)) + +/* +** write_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0h\.h\[\1, 0:1\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x2_t, + svwrite_hor_za16_f16_vg2 (0, w11, z28), + svwrite_hor_za16_f16_vg2 (0, w11, z28)) + +/* +** write_za16_bf16_z0_1_w12: +** mova za1h\.h\[w12, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z0_1_w12, svbfloat16x2_t, + svwrite_hor_za16_bf16_vg2 (1, w12, z0), + svwrite_hor_za16_bf16_vg2 (1, w12, z0)) + +/* +** write_za16_u16_z18_0_w15: +** mova za0h\.h\[w15, 0:1\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (0, w15, z18), + svwrite_hor_za16_u16_vg2 (0, w15, z18)) + +/* +** write_za16_s16_z23_1_w12p6: +** mov [^\n]+ +** mov [^\n]+ +** mova za1h\.h\[w12, 6:7\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z23_1_w12p6, svint16x2_t, + svwrite_hor_za16_s16_vg2 (1, w12 + 6, z23), + svwrite_hor_za16_s16_vg2 (1, w12 + 6, z23)) + +/* +** write_za16_f16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z4_0_w12p1, svfloat16x2_t, + svwrite_hor_za16_f16_vg2 (0, w12 + 1, z4), + svwrite_hor_za16_f16_vg2 (0, w12 + 1, z4)) + +/* +** write_za16_s16_z28_1_w12p2: +** mova za1h\.h\[w12, 2:3\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x2_t, + svwrite_hor_za16_s16_vg2 (1, w12 + 2, z28), + svwrite_hor_za16_s16_vg2 (1, w12 + 2, z28)) + +/* +** write_za16_u16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z0_0_w15p3, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (0, w15 + 3, z0), + svwrite_hor_za16_u16_vg2 (0, w15 + 3, z0)) + +/* +** write_za16_bf16_z4_1_w15p4: +** mova za1h\.h\[w15, 4:5\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z4_1_w15p4, svbfloat16x2_t, + svwrite_hor_za16_bf16_vg2 (1, w15 + 4, z4), + svwrite_hor_za16_bf16_vg2 (1, w15 + 4, z4)) + +/* +** write_za16_u16_z28_0_w12p7: +** add (w[0-9]+), w12, #?7 +** mova za0h\.h\[\1, 0:1\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_0_w12p7, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (0, w12 + 7, z28), + svwrite_hor_za16_u16_vg2 (0, w12 + 7, z28)) + +/* +** write_za16_s16_z0_1_w15p8: +** add (w[0-9]+), w15, #?8 +** mova za1h\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_1_w15p8, svint16x2_t, + svwrite_hor_za16_s16_vg2 (1, w15 + 8, z0), + svwrite_hor_za16_s16_vg2 (1, w15 + 8, z0)) + +/* +** write_za16_u16_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0h\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_0_w12m1, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (0, w12 - 1, z4), + svwrite_hor_za16_u16_vg2 (0, w12 - 1, z4)) + +/* +** write_za16_u16_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1h\.h\[\1, 0:1\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_1_w16, svuint16x2_t, + svwrite_hor_za16_u16_vg2 (1, w16, z18), + svwrite_hor_za16_u16_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..34e1a22352b08acada1a8a7d7f28152f3a43ed95 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za16_vg4.c @@ -0,0 +1,138 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x4_t, + svwrite_hor_za16_s16_vg4 (0, 0, z0), + svwrite_hor_za16_s16_vg4 (0, 0, z0)) + +/* +** write_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x4_t, + svwrite_hor_za16_u16_vg4 (1, 1, z4), + svwrite_hor_za16_u16_vg4 (1, 1, z4)) + +/* +** write_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0h\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x4_t, + svwrite_hor_za16_f16_vg4 (0, w11, z28), + svwrite_hor_za16_f16_vg4 (0, w11, z28)) + +/* +** write_za16_s16_z0_1_w12: +** mova za1h\.h\[w12, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_1_w12, svint16x4_t, + svwrite_hor_za16_s16_vg4 (1, w12, z0), + svwrite_hor_za16_s16_vg4 (1, w12, z0)) + +/* +** write_za16_u16_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.h\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x4_t, + svwrite_hor_za16_u16_vg4 (0, w15, z18), + svwrite_hor_za16_u16_vg4 (0, w15, z18)) + +/* +** write_za16_bf16_z23_1_w12p4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za1h\.h\[w12, 4:7\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z23_1_w12p4, svbfloat16x4_t, + svwrite_hor_za16_bf16_vg4 (1, w12 + 4, z23), + svwrite_hor_za16_bf16_vg4 (1, w12 + 4, z23)) + +/* +** write_za16_u16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_0_w12p1, svuint16x4_t, + svwrite_hor_za16_u16_vg4 (0, w12 + 1, z4), + svwrite_hor_za16_u16_vg4 (0, w12 + 1, z4)) + +/* +** write_za16_s16_z28_1_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za1h\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x4_t, + svwrite_hor_za16_s16_vg4 (1, w12 + 2, z28), + svwrite_hor_za16_s16_vg4 (1, w12 + 2, z28)) + +/* +** write_za16_f16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z0_0_w15p3, svfloat16x4_t, + svwrite_hor_za16_f16_vg4 (0, w15 + 3, z0), + svwrite_hor_za16_f16_vg4 (0, w15 + 3, z0)) + +/* +** write_za16_u16_z28_1_w12p6: +** add (w[0-9]+), w12, #?6 +** mova za1h\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_1_w12p6, svuint16x4_t, + svwrite_hor_za16_u16_vg4 (1, w12 + 6, z28), + svwrite_hor_za16_u16_vg4 (1, w12 + 6, z28)) + +/* +** write_za16_s16_z0_0_w15p8: +** add (w[0-9]+), w15, #?8 +** mova za0h\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_w15p8, svint16x4_t, + svwrite_hor_za16_s16_vg4 (0, w15 + 8, z0), + svwrite_hor_za16_s16_vg4 (0, w15 + 8, z0)) + +/* +** write_za16_bf16_z4_1_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za1h\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z4_1_w12m1, svbfloat16x4_t, + svwrite_hor_za16_bf16_vg4 (1, w12 - 1, z4), + svwrite_hor_za16_bf16_vg4 (1, w12 - 1, z4)) + +/* +** write_za16_u16_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0h\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_0_w16, svuint16x4_t, + svwrite_hor_za16_u16_vg4 (0, w16, z28), + svwrite_hor_za16_u16_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..7e64839d25faeca3cee9a7d378634553b4c89e87 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x2_t, + svwrite_hor_za32_s32_vg2 (0, 0, z0), + svwrite_hor_za32_s32_vg2 (0, 0, z0)) + +/* +** write_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x2_t, + svwrite_hor_za32_u32_vg2 (1, 1, z4), + svwrite_hor_za32_u32_vg2 (1, 1, z4)) + +/* +** write_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2h\.s\[\1, 0:1\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x2_t, + svwrite_hor_za32_f32_vg2 (2, w11, z28), + svwrite_hor_za32_f32_vg2 (2, w11, z28)) + +/* +** write_za32_f32_z0_3_w12: +** mova za3h\.s\[w12, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z0_3_w12, svfloat32x2_t, + svwrite_hor_za32_f32_vg2 (3, w12, z0), + svwrite_hor_za32_f32_vg2 (3, w12, z0)) + +/* +** write_za32_u32_z18_0_w15: +** mova za0h\.s\[w15, 0:1\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x2_t, + svwrite_hor_za32_u32_vg2 (0, w15, z18), + svwrite_hor_za32_u32_vg2 (0, w15, z18)) + +/* +** write_za32_s32_z23_1_w12p2: +** mov [^\n]+ +** mov [^\n]+ +** mova za1h\.s\[w12, 2:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z23_1_w12p2, svint32x2_t, + svwrite_hor_za32_s32_vg2 (1, w12 + 2, z23), + svwrite_hor_za32_s32_vg2 (1, w12 + 2, z23)) + +/* +** write_za32_f32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za2h\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z4_2_w12p1, svfloat32x2_t, + svwrite_hor_za32_f32_vg2 (2, w12 + 1, z4), + svwrite_hor_za32_f32_vg2 (2, w12 + 1, z4)) + +/* +** write_za32_u32_z0_3_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za3h\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z0_3_w15p3, svuint32x2_t, + svwrite_hor_za32_u32_vg2 (3, w15 + 3, z0), + svwrite_hor_za32_u32_vg2 (3, w15 + 3, z0)) + +/* +** write_za32_s32_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova za1h\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_1_w15p4, svint32x2_t, + svwrite_hor_za32_s32_vg2 (1, w15 + 4, z0), + svwrite_hor_za32_s32_vg2 (1, w15 + 4, z0)) + +/* +** write_za32_u32_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za3h\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_3_w12m1, svuint32x2_t, + svwrite_hor_za32_u32_vg2 (3, w12 - 1, z4), + svwrite_hor_za32_u32_vg2 (3, w12 - 1, z4)) + +/* +** write_za32_u32_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1h\.s\[\1, 0:1\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_1_w16, svuint32x2_t, + svwrite_hor_za32_u32_vg2 (1, w16, z18), + svwrite_hor_za32_u32_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..da00972c7e2f1bb01e2c237cdd43d1a1d0e9e744 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za32_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.s\[\1, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x4_t, + svwrite_hor_za32_s32_vg4 (0, 0, z0), + svwrite_hor_za32_s32_vg4 (0, 0, z0)) + +/* +** write_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x4_t, + svwrite_hor_za32_u32_vg4 (1, 1, z4), + svwrite_hor_za32_u32_vg4 (1, 1, z4)) + +/* +** write_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2h\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x4_t, + svwrite_hor_za32_f32_vg4 (2, w11, z28), + svwrite_hor_za32_f32_vg4 (2, w11, z28)) + +/* +** write_za32_s32_z0_3_w12: +** mova za3h\.s\[w12, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_3_w12, svint32x4_t, + svwrite_hor_za32_s32_vg4 (3, w12, z0), + svwrite_hor_za32_s32_vg4 (3, w12, z0)) + +/* +** write_za32_u32_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.s\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x4_t, + svwrite_hor_za32_u32_vg4 (0, w15, z18), + svwrite_hor_za32_u32_vg4 (0, w15, z18)) + +/* +** write_za32_f32_z23_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za1h\.s\[\1, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z23_1_w12p4, svfloat32x4_t, + svwrite_hor_za32_f32_vg4 (1, w12 + 4, z23), + svwrite_hor_za32_f32_vg4 (1, w12 + 4, z23)) + +/* +** write_za32_u32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za2h\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_2_w12p1, svuint32x4_t, + svwrite_hor_za32_u32_vg4 (2, w12 + 1, z4), + svwrite_hor_za32_u32_vg4 (2, w12 + 1, z4)) + +/* +** write_za32_s32_z28_3_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za3h\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z28_3_w12p2, svint32x4_t, + svwrite_hor_za32_s32_vg4 (3, w12 + 2, z28), + svwrite_hor_za32_s32_vg4 (3, w12 + 2, z28)) + +/* +** write_za32_f32_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.s\[\1, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z0_0_w15p3, svfloat32x4_t, + svwrite_hor_za32_f32_vg4 (0, w15 + 3, z0), + svwrite_hor_za32_f32_vg4 (0, w15 + 3, z0)) + +/* +** write_za32_u32_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova za1h\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z28_1_w12p4, svuint32x4_t, + svwrite_hor_za32_u32_vg4 (1, w12 + 4, z28), + svwrite_hor_za32_u32_vg4 (1, w12 + 4, z28)) + +/* +** write_za32_f32_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za2h\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z4_2_w12m1, svfloat32x4_t, + svwrite_hor_za32_f32_vg4 (2, w12 - 1, z4), + svwrite_hor_za32_f32_vg4 (2, w12 - 1, z4)) + +/* +** write_za32_u32_z28_3_w16: +** mov (w1[2-5]), w16 +** mova za3h\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z28_3_w16, svuint32x4_t, + svwrite_hor_za32_u32_vg4 (3, w16, z28), + svwrite_hor_za32_u32_vg4 (3, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..57b5194abd94e2789d4bec698fd5fc37f2df411d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg2.c @@ -0,0 +1,113 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x2_t, + svwrite_hor_za64_s64_vg2 (0, 0, z0), + svwrite_hor_za64_s64_vg2 (0, 0, z0)) + +/* +** write_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x2_t, + svwrite_hor_za64_u64_vg2 (1, 1, z4), + svwrite_hor_za64_u64_vg2 (1, 1, z4)) + +/* +** write_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2h\.d\[\1, 0:1\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x2_t, + svwrite_hor_za64_f64_vg2 (2, w11, z28), + svwrite_hor_za64_f64_vg2 (2, w11, z28)) + +/* +** write_za64_f64_z0_3_w12: +** mova za3h\.d\[w12, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z0_3_w12, svfloat64x2_t, + svwrite_hor_za64_f64_vg2 (3, w12, z0), + svwrite_hor_za64_f64_vg2 (3, w12, z0)) + +/* +** write_za64_u64_z18_4_w15: +** mova za4h\.d\[w15, 0:1\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x2_t, + svwrite_hor_za64_u64_vg2 (4, w15, z18), + svwrite_hor_za64_u64_vg2 (4, w15, z18)) + +/* +** write_za64_s64_z23_5_w12p2: +** add (w[0-9]+), w12, #?2 +** mov [^\n]+ +** mov [^\n]+ +** mova za5h\.d\[\1, 0:1\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z23_5_w12p2, svint64x2_t, + svwrite_hor_za64_s64_vg2 (5, w12 + 2, z23), + svwrite_hor_za64_s64_vg2 (5, w12 + 2, z23)) + +/* +** write_za64_f64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za6h\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z4_6_w12p1, svfloat64x2_t, + svwrite_hor_za64_f64_vg2 (6, w12 + 1, z4), + svwrite_hor_za64_f64_vg2 (6, w12 + 1, z4)) + +/* +** write_za64_u64_z0_7_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za7h\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z0_7_w15p3, svuint64x2_t, + svwrite_hor_za64_u64_vg2 (7, w15 + 3, z0), + svwrite_hor_za64_u64_vg2 (7, w15 + 3, z0)) + +/* +** write_za64_s64_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova za1h\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_1_w15p4, svint64x2_t, + svwrite_hor_za64_s64_vg2 (1, w15 + 4, z0), + svwrite_hor_za64_s64_vg2 (1, w15 + 4, z0)) + +/* +** write_za64_u64_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za3h\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_3_w12m1, svuint64x2_t, + svwrite_hor_za64_u64_vg2 (3, w12 - 1, z4), + svwrite_hor_za64_u64_vg2 (3, w12 - 1, z4)) + +/* +** write_za64_u64_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1h\.d\[\1, 0:1\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_1_w16, svuint64x2_t, + svwrite_hor_za64_u64_vg2 (1, w16, z18), + svwrite_hor_za64_u64_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..fbe9588234c90862ec7a44d356e107b8bcc666f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za64_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.d\[\1, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x4_t, + svwrite_hor_za64_s64_vg4 (0, 0, z0), + svwrite_hor_za64_s64_vg4 (0, 0, z0)) + +/* +** write_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1h\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x4_t, + svwrite_hor_za64_u64_vg4 (1, 1, z4), + svwrite_hor_za64_u64_vg4 (1, 1, z4)) + +/* +** write_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2h\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x4_t, + svwrite_hor_za64_f64_vg4 (2, w11, z28), + svwrite_hor_za64_f64_vg4 (2, w11, z28)) + +/* +** write_za64_s64_z0_3_w12: +** mova za3h\.d\[w12, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_3_w12, svint64x4_t, + svwrite_hor_za64_s64_vg4 (3, w12, z0), + svwrite_hor_za64_s64_vg4 (3, w12, z0)) + +/* +** write_za64_u64_z18_4_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za4h\.d\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x4_t, + svwrite_hor_za64_u64_vg4 (4, w15, z18), + svwrite_hor_za64_u64_vg4 (4, w15, z18)) + +/* +** write_za64_f64_z23_5_w12p4: +** add (w[0-9]+), w12, #?4 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za5h\.d\[\1, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z23_5_w12p4, svfloat64x4_t, + svwrite_hor_za64_f64_vg4 (5, w12 + 4, z23), + svwrite_hor_za64_f64_vg4 (5, w12 + 4, z23)) + +/* +** write_za64_u64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za6h\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_6_w12p1, svuint64x4_t, + svwrite_hor_za64_u64_vg4 (6, w12 + 1, z4), + svwrite_hor_za64_u64_vg4 (6, w12 + 1, z4)) + +/* +** write_za64_s64_z28_7_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za7h\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z28_7_w12p2, svint64x4_t, + svwrite_hor_za64_s64_vg4 (7, w12 + 2, z28), + svwrite_hor_za64_s64_vg4 (7, w12 + 2, z28)) + +/* +** write_za64_f64_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.d\[\1, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z0_0_w15p3, svfloat64x4_t, + svwrite_hor_za64_f64_vg4 (0, w15 + 3, z0), + svwrite_hor_za64_f64_vg4 (0, w15 + 3, z0)) + +/* +** write_za64_u64_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova za1h\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z28_1_w12p4, svuint64x4_t, + svwrite_hor_za64_u64_vg4 (1, w12 + 4, z28), + svwrite_hor_za64_u64_vg4 (1, w12 + 4, z28)) + +/* +** write_za64_f64_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za2h\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z4_2_w12m1, svfloat64x4_t, + svwrite_hor_za64_f64_vg4 (2, w12 - 1, z4), + svwrite_hor_za64_f64_vg4 (2, w12 - 1, z4)) + +/* +** write_za64_u64_z28_3_w16: +** mov (w1[2-5]), w16 +** mova za3h\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z28_3_w16, svuint64x4_t, + svwrite_hor_za64_u64_vg4 (3, w16, z28), + svwrite_hor_za64_u64_vg4 (3, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..a2af846b60b84e903ff82384abb9306fc830add2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, 0, z0), + svwrite_hor_za8_s8_vg2 (0, 0, z0)) + +/* +** write_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, 1, z4), + svwrite_hor_za8_u8_vg2 (0, 1, z4)) + +/* +** write_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, w11, z28), + svwrite_hor_za8_s8_vg2 (0, w11, z28)) + +/* +** write_za8_s8_z0_0_w12: +** mova za0h\.b\[w12, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, w12, z0), + svwrite_hor_za8_s8_vg2 (0, w12, z0)) + +/* +** write_za8_u8_z18_0_w15: +** mova za0h\.b\[w15, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w15, z18), + svwrite_hor_za8_u8_vg2 (0, w15, z18)) + +/* +** write_za8_s8_z23_0_w12p14: +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.b\[w12, 14:15\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z23_0_w12p14, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, w12 + 14, z23), + svwrite_hor_za8_s8_vg2 (0, w12 + 14, z23)) + +/* +** write_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4), + svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4)) + +/* +** write_za8_s8_z28_0_w12p2: +** mova za0h\.b\[w12, 2:3\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, w12 + 2, z28), + svwrite_hor_za8_s8_vg2 (0, w12 + 2, z28)) + +/* +** write_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0), + svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0)) + +/* +** write_za8_u8_z4_0_w15p12: +** mova za0h\.b\[w15, 12:13\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4), + svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4)) + +/* +** write_za8_u8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova za0h\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28), + svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28)) + +/* +** write_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova za0h\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x2_t, + svwrite_hor_za8_s8_vg2 (0, w15 + 16, z0), + svwrite_hor_za8_s8_vg2 (0, w15 + 16, z0)) + +/* +** write_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4), + svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4)) + +/* +** write_za8_u8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova za0h\.b\[\1, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t, + svwrite_hor_za8_u8_vg2 (0, w16, z18), + svwrite_hor_za8_u8_vg2 (0, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..e333ce699e39f9ed6c82fa843a41f38b0c9b32f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c @@ -0,0 +1,156 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, 0, z0), + svwrite_hor_za8_s8_vg4 (0, 0, z0)) + +/* +** write_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, 1, z4), + svwrite_hor_za8_u8_vg4 (0, 1, z4)) + +/* +** write_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, w11, z28), + svwrite_hor_za8_s8_vg4 (0, w11, z28)) + +/* +** write_za8_s8_z0_0_w12: +** mova za0h\.b\[w12, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, w12, z0), + svwrite_hor_za8_s8_vg4 (0, w12, z0)) + +/* +** write_za8_u8_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.b\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w15, z18), + svwrite_hor_za8_u8_vg4 (0, w15, z18)) + +/* +** write_za8_s8_z23_0_w12p12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0h\.b\[w12, 12:15\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z23_0_w12p12, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, w12 + 12, z23), + svwrite_hor_za8_s8_vg4 (0, w12 + 12, z23)) + +/* +** write_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4), + svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4)) + +/* +** write_za8_s8_z28_0_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, w12 + 2, z28), + svwrite_hor_za8_s8_vg4 (0, w12 + 2, z28)) + +/* +** write_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0), + svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0)) + +/* +** write_za8_u8_z0_0_w12p4: +** mova za0h\.b\[w12, 4:7\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0), + svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0)) + +/* +** write_za8_u8_z4_0_w15p12: +** mova za0h\.b\[w15, 12:15\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4), + svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4)) + +/* +** write_za8_u8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28), + svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28)) + +/* +** write_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova za0h\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x4_t, + svwrite_hor_za8_s8_vg4 (0, w15 + 16, z0), + svwrite_hor_za8_s8_vg4 (0, w15 + 16, z0)) + +/* +** write_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0h\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4), + svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4)) + +/* +** write_za8_u8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0h\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t, + svwrite_hor_za8_u8_vg4 (0, w16, z28), + svwrite_hor_za8_u8_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..0b8dc186c9e270ab2f8edb9b36305609b01e3e2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x2_t, + svwrite_ver_za16_s16_vg2 (0, 0, z0), + svwrite_ver_za16_s16_vg2 (0, 0, z0)) + +/* +** write_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (1, 1, z4), + svwrite_ver_za16_u16_vg2 (1, 1, z4)) + +/* +** write_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0v\.h\[\1, 0:1\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x2_t, + svwrite_ver_za16_f16_vg2 (0, w11, z28), + svwrite_ver_za16_f16_vg2 (0, w11, z28)) + +/* +** write_za16_bf16_z0_1_w12: +** mova za1v\.h\[w12, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z0_1_w12, svbfloat16x2_t, + svwrite_ver_za16_bf16_vg2 (1, w12, z0), + svwrite_ver_za16_bf16_vg2 (1, w12, z0)) + +/* +** write_za16_u16_z18_0_w15: +** mova za0v\.h\[w15, 0:1\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (0, w15, z18), + svwrite_ver_za16_u16_vg2 (0, w15, z18)) + +/* +** write_za16_s16_z23_1_w12p6: +** mov [^\n]+ +** mov [^\n]+ +** mova za1v\.h\[w12, 6:7\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z23_1_w12p6, svint16x2_t, + svwrite_ver_za16_s16_vg2 (1, w12 + 6, z23), + svwrite_ver_za16_s16_vg2 (1, w12 + 6, z23)) + +/* +** write_za16_f16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z4_0_w12p1, svfloat16x2_t, + svwrite_ver_za16_f16_vg2 (0, w12 + 1, z4), + svwrite_ver_za16_f16_vg2 (0, w12 + 1, z4)) + +/* +** write_za16_s16_z28_1_w12p2: +** mova za1v\.h\[w12, 2:3\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x2_t, + svwrite_ver_za16_s16_vg2 (1, w12 + 2, z28), + svwrite_ver_za16_s16_vg2 (1, w12 + 2, z28)) + +/* +** write_za16_u16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z0_0_w15p3, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (0, w15 + 3, z0), + svwrite_ver_za16_u16_vg2 (0, w15 + 3, z0)) + +/* +** write_za16_bf16_z4_1_w15p4: +** mova za1v\.h\[w15, 4:5\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z4_1_w15p4, svbfloat16x2_t, + svwrite_ver_za16_bf16_vg2 (1, w15 + 4, z4), + svwrite_ver_za16_bf16_vg2 (1, w15 + 4, z4)) + +/* +** write_za16_u16_z28_0_w12p7: +** add (w[0-9]+), w12, #?7 +** mova za0v\.h\[\1, 0:1\], {z28\.h - z29\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_0_w12p7, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (0, w12 + 7, z28), + svwrite_ver_za16_u16_vg2 (0, w12 + 7, z28)) + +/* +** write_za16_s16_z0_1_w15p8: +** add (w[0-9]+), w15, #?8 +** mova za1v\.h\[\1, 0:1\], {z0\.h - z1\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_1_w15p8, svint16x2_t, + svwrite_ver_za16_s16_vg2 (1, w15 + 8, z0), + svwrite_ver_za16_s16_vg2 (1, w15 + 8, z0)) + +/* +** write_za16_u16_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0v\.h\[\1, 0:1\], {z4\.h - z5\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_0_w12m1, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (0, w12 - 1, z4), + svwrite_ver_za16_u16_vg2 (0, w12 - 1, z4)) + +/* +** write_za16_u16_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1v\.h\[\1, 0:1\], {z18\.h - z19\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_1_w16, svuint16x2_t, + svwrite_ver_za16_u16_vg2 (1, w16, z18), + svwrite_ver_za16_u16_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..432681529cd50a6aa959d86de03966c18ebecc75 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za16_vg4.c @@ -0,0 +1,138 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za16_s16_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_0, svint16x4_t, + svwrite_ver_za16_s16_vg4 (0, 0, z0), + svwrite_ver_za16_s16_vg4 (0, 0, z0)) + +/* +** write_za16_u16_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_1_1, svuint16x4_t, + svwrite_ver_za16_u16_vg4 (1, 1, z4), + svwrite_ver_za16_u16_vg4 (1, 1, z4)) + +/* +** write_za16_f16_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0v\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z28_0_w11, svfloat16x4_t, + svwrite_ver_za16_f16_vg4 (0, w11, z28), + svwrite_ver_za16_f16_vg4 (0, w11, z28)) + +/* +** write_za16_s16_z0_1_w12: +** mova za1v\.h\[w12, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_1_w12, svint16x4_t, + svwrite_ver_za16_s16_vg4 (1, w12, z0), + svwrite_ver_za16_s16_vg4 (1, w12, z0)) + +/* +** write_za16_u16_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.h\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z18_0_w15, svuint16x4_t, + svwrite_ver_za16_u16_vg4 (0, w15, z18), + svwrite_ver_za16_u16_vg4 (0, w15, z18)) + +/* +** write_za16_bf16_z23_1_w12p4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za1v\.h\[w12, 4:7\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z23_1_w12p4, svbfloat16x4_t, + svwrite_ver_za16_bf16_vg4 (1, w12 + 4, z23), + svwrite_ver_za16_bf16_vg4 (1, w12 + 4, z23)) + +/* +** write_za16_u16_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z4_0_w12p1, svuint16x4_t, + svwrite_ver_za16_u16_vg4 (0, w12 + 1, z4), + svwrite_ver_za16_u16_vg4 (0, w12 + 1, z4)) + +/* +** write_za16_s16_z28_1_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za1v\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z28_1_w12p2, svint16x4_t, + svwrite_ver_za16_s16_vg4 (1, w12 + 2, z28), + svwrite_ver_za16_s16_vg4 (1, w12 + 2, z28)) + +/* +** write_za16_f16_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_f16_z0_0_w15p3, svfloat16x4_t, + svwrite_ver_za16_f16_vg4 (0, w15 + 3, z0), + svwrite_ver_za16_f16_vg4 (0, w15 + 3, z0)) + +/* +** write_za16_u16_z28_1_w12p6: +** add (w[0-9]+), w12, #?6 +** mova za1v\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_1_w12p6, svuint16x4_t, + svwrite_ver_za16_u16_vg4 (1, w12 + 6, z28), + svwrite_ver_za16_u16_vg4 (1, w12 + 6, z28)) + +/* +** write_za16_s16_z0_0_w15p8: +** add (w[0-9]+), w15, #?8 +** mova za0v\.h\[\1, 0:3\], {z0\.h - z3\.h} +** ret +*/ +TEST_ZA_XN (write_za16_s16_z0_0_w15p8, svint16x4_t, + svwrite_ver_za16_s16_vg4 (0, w15 + 8, z0), + svwrite_ver_za16_s16_vg4 (0, w15 + 8, z0)) + +/* +** write_za16_bf16_z4_1_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za1v\.h\[\1, 0:3\], {z4\.h - z7\.h} +** ret +*/ +TEST_ZA_XN (write_za16_bf16_z4_1_w12m1, svbfloat16x4_t, + svwrite_ver_za16_bf16_vg4 (1, w12 - 1, z4), + svwrite_ver_za16_bf16_vg4 (1, w12 - 1, z4)) + +/* +** write_za16_u16_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0v\.h\[\1, 0:3\], {z28\.h - z31\.h} +** ret +*/ +TEST_ZA_XN (write_za16_u16_z28_0_w16, svuint16x4_t, + svwrite_ver_za16_u16_vg4 (0, w16, z28), + svwrite_ver_za16_u16_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..307a2d709ab46b04fd2f7083e3f6bb4aae001373 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg2.c @@ -0,0 +1,112 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x2_t, + svwrite_ver_za32_s32_vg2 (0, 0, z0), + svwrite_ver_za32_s32_vg2 (0, 0, z0)) + +/* +** write_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x2_t, + svwrite_ver_za32_u32_vg2 (1, 1, z4), + svwrite_ver_za32_u32_vg2 (1, 1, z4)) + +/* +** write_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2v\.s\[\1, 0:1\], {z28\.s - z29\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x2_t, + svwrite_ver_za32_f32_vg2 (2, w11, z28), + svwrite_ver_za32_f32_vg2 (2, w11, z28)) + +/* +** write_za32_f32_z0_3_w12: +** mova za3v\.s\[w12, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z0_3_w12, svfloat32x2_t, + svwrite_ver_za32_f32_vg2 (3, w12, z0), + svwrite_ver_za32_f32_vg2 (3, w12, z0)) + +/* +** write_za32_u32_z18_0_w15: +** mova za0v\.s\[w15, 0:1\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x2_t, + svwrite_ver_za32_u32_vg2 (0, w15, z18), + svwrite_ver_za32_u32_vg2 (0, w15, z18)) + +/* +** write_za32_s32_z23_1_w12p2: +** mov [^\n]+ +** mov [^\n]+ +** mova za1v\.s\[w12, 2:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z23_1_w12p2, svint32x2_t, + svwrite_ver_za32_s32_vg2 (1, w12 + 2, z23), + svwrite_ver_za32_s32_vg2 (1, w12 + 2, z23)) + +/* +** write_za32_f32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za2v\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z4_2_w12p1, svfloat32x2_t, + svwrite_ver_za32_f32_vg2 (2, w12 + 1, z4), + svwrite_ver_za32_f32_vg2 (2, w12 + 1, z4)) + +/* +** write_za32_u32_z0_3_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za3v\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z0_3_w15p3, svuint32x2_t, + svwrite_ver_za32_u32_vg2 (3, w15 + 3, z0), + svwrite_ver_za32_u32_vg2 (3, w15 + 3, z0)) + +/* +** write_za32_s32_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova za1v\.s\[\1, 0:1\], {z0\.s - z1\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_1_w15p4, svint32x2_t, + svwrite_ver_za32_s32_vg2 (1, w15 + 4, z0), + svwrite_ver_za32_s32_vg2 (1, w15 + 4, z0)) + +/* +** write_za32_u32_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za3v\.s\[\1, 0:1\], {z4\.s - z5\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_3_w12m1, svuint32x2_t, + svwrite_ver_za32_u32_vg2 (3, w12 - 1, z4), + svwrite_ver_za32_u32_vg2 (3, w12 - 1, z4)) + +/* +** write_za32_u32_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1v\.s\[\1, 0:1\], {z18\.s - z19\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_1_w16, svuint32x2_t, + svwrite_ver_za32_u32_vg2 (1, w16, z18), + svwrite_ver_za32_u32_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..033409475b1beea4ccdf6d975e449b9537a09eaa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za32_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za32_s32_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.s\[\1, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_0_0, svint32x4_t, + svwrite_ver_za32_s32_vg4 (0, 0, z0), + svwrite_ver_za32_s32_vg4 (0, 0, z0)) + +/* +** write_za32_u32_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_1_1, svuint32x4_t, + svwrite_ver_za32_u32_vg4 (1, 1, z4), + svwrite_ver_za32_u32_vg4 (1, 1, z4)) + +/* +** write_za32_f32_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2v\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z28_2_w11, svfloat32x4_t, + svwrite_ver_za32_f32_vg4 (2, w11, z28), + svwrite_ver_za32_f32_vg4 (2, w11, z28)) + +/* +** write_za32_s32_z0_3_w12: +** mova za3v\.s\[w12, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z0_3_w12, svint32x4_t, + svwrite_ver_za32_s32_vg4 (3, w12, z0), + svwrite_ver_za32_s32_vg4 (3, w12, z0)) + +/* +** write_za32_u32_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.s\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z18_0_w15, svuint32x4_t, + svwrite_ver_za32_u32_vg4 (0, w15, z18), + svwrite_ver_za32_u32_vg4 (0, w15, z18)) + +/* +** write_za32_f32_z23_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za1v\.s\[\1, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z23_1_w12p4, svfloat32x4_t, + svwrite_ver_za32_f32_vg4 (1, w12 + 4, z23), + svwrite_ver_za32_f32_vg4 (1, w12 + 4, z23)) + +/* +** write_za32_u32_z4_2_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za2v\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z4_2_w12p1, svuint32x4_t, + svwrite_ver_za32_u32_vg4 (2, w12 + 1, z4), + svwrite_ver_za32_u32_vg4 (2, w12 + 1, z4)) + +/* +** write_za32_s32_z28_3_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za3v\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_s32_z28_3_w12p2, svint32x4_t, + svwrite_ver_za32_s32_vg4 (3, w12 + 2, z28), + svwrite_ver_za32_s32_vg4 (3, w12 + 2, z28)) + +/* +** write_za32_f32_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.s\[\1, 0:3\], {z0\.s - z3\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z0_0_w15p3, svfloat32x4_t, + svwrite_ver_za32_f32_vg4 (0, w15 + 3, z0), + svwrite_ver_za32_f32_vg4 (0, w15 + 3, z0)) + +/* +** write_za32_u32_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova za1v\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z28_1_w12p4, svuint32x4_t, + svwrite_ver_za32_u32_vg4 (1, w12 + 4, z28), + svwrite_ver_za32_u32_vg4 (1, w12 + 4, z28)) + +/* +** write_za32_f32_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za2v\.s\[\1, 0:3\], {z4\.s - z7\.s} +** ret +*/ +TEST_ZA_XN (write_za32_f32_z4_2_w12m1, svfloat32x4_t, + svwrite_ver_za32_f32_vg4 (2, w12 - 1, z4), + svwrite_ver_za32_f32_vg4 (2, w12 - 1, z4)) + +/* +** write_za32_u32_z28_3_w16: +** mov (w1[2-5]), w16 +** mova za3v\.s\[\1, 0:3\], {z28\.s - z31\.s} +** ret +*/ +TEST_ZA_XN (write_za32_u32_z28_3_w16, svuint32x4_t, + svwrite_ver_za32_u32_vg4 (3, w16, z28), + svwrite_ver_za32_u32_vg4 (3, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..9b13ea51ab892d9d6db45008b968e692b75cd3b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg2.c @@ -0,0 +1,113 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x2_t, + svwrite_ver_za64_s64_vg2 (0, 0, z0), + svwrite_ver_za64_s64_vg2 (0, 0, z0)) + +/* +** write_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x2_t, + svwrite_ver_za64_u64_vg2 (1, 1, z4), + svwrite_ver_za64_u64_vg2 (1, 1, z4)) + +/* +** write_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2v\.d\[\1, 0:1\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x2_t, + svwrite_ver_za64_f64_vg2 (2, w11, z28), + svwrite_ver_za64_f64_vg2 (2, w11, z28)) + +/* +** write_za64_f64_z0_3_w12: +** mova za3v\.d\[w12, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z0_3_w12, svfloat64x2_t, + svwrite_ver_za64_f64_vg2 (3, w12, z0), + svwrite_ver_za64_f64_vg2 (3, w12, z0)) + +/* +** write_za64_u64_z18_4_w15: +** mova za4v\.d\[w15, 0:1\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x2_t, + svwrite_ver_za64_u64_vg2 (4, w15, z18), + svwrite_ver_za64_u64_vg2 (4, w15, z18)) + +/* +** write_za64_s64_z23_5_w12p2: +** add (w[0-9]+), w12, #?2 +** mov [^\n]+ +** mov [^\n]+ +** mova za5v\.d\[\1, 0:1\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z23_5_w12p2, svint64x2_t, + svwrite_ver_za64_s64_vg2 (5, w12 + 2, z23), + svwrite_ver_za64_s64_vg2 (5, w12 + 2, z23)) + +/* +** write_za64_f64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za6v\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z4_6_w12p1, svfloat64x2_t, + svwrite_ver_za64_f64_vg2 (6, w12 + 1, z4), + svwrite_ver_za64_f64_vg2 (6, w12 + 1, z4)) + +/* +** write_za64_u64_z0_7_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za7v\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z0_7_w15p3, svuint64x2_t, + svwrite_ver_za64_u64_vg2 (7, w15 + 3, z0), + svwrite_ver_za64_u64_vg2 (7, w15 + 3, z0)) + +/* +** write_za64_s64_z0_1_w15p4: +** add (w[0-9]+), w15, #?4 +** mova za1v\.d\[\1, 0:1\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_1_w15p4, svint64x2_t, + svwrite_ver_za64_s64_vg2 (1, w15 + 4, z0), + svwrite_ver_za64_s64_vg2 (1, w15 + 4, z0)) + +/* +** write_za64_u64_z4_3_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za3v\.d\[\1, 0:1\], {z4\.d - z5\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_3_w12m1, svuint64x2_t, + svwrite_ver_za64_u64_vg2 (3, w12 - 1, z4), + svwrite_ver_za64_u64_vg2 (3, w12 - 1, z4)) + +/* +** write_za64_u64_z18_1_w16: +** mov (w1[2-5]), w16 +** mova za1v\.d\[\1, 0:1\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_1_w16, svuint64x2_t, + svwrite_ver_za64_u64_vg2 (1, w16, z18), + svwrite_ver_za64_u64_vg2 (1, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..a6d091a02149e135cb47750f472d047715d1a7b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za64_vg4.c @@ -0,0 +1,129 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za64_s64_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.d\[\1, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_0_0, svint64x4_t, + svwrite_ver_za64_s64_vg4 (0, 0, z0), + svwrite_ver_za64_s64_vg4 (0, 0, z0)) + +/* +** write_za64_u64_z4_1_1: +** mov (w1[2-5]), #?1 +** mova za1v\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_1_1, svuint64x4_t, + svwrite_ver_za64_u64_vg4 (1, 1, z4), + svwrite_ver_za64_u64_vg4 (1, 1, z4)) + +/* +** write_za64_f64_z28_2_w11: +** mov (w1[2-5]), w11 +** mova za2v\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z28_2_w11, svfloat64x4_t, + svwrite_ver_za64_f64_vg4 (2, w11, z28), + svwrite_ver_za64_f64_vg4 (2, w11, z28)) + +/* +** write_za64_s64_z0_3_w12: +** mova za3v\.d\[w12, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z0_3_w12, svint64x4_t, + svwrite_ver_za64_s64_vg4 (3, w12, z0), + svwrite_ver_za64_s64_vg4 (3, w12, z0)) + +/* +** write_za64_u64_z18_4_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za4v\.d\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z18_4_w15, svuint64x4_t, + svwrite_ver_za64_u64_vg4 (4, w15, z18), + svwrite_ver_za64_u64_vg4 (4, w15, z18)) + +/* +** write_za64_f64_z23_5_w12p4: +** add (w[0-9]+), w12, #?4 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za5v\.d\[\1, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z23_5_w12p4, svfloat64x4_t, + svwrite_ver_za64_f64_vg4 (5, w12 + 4, z23), + svwrite_ver_za64_f64_vg4 (5, w12 + 4, z23)) + +/* +** write_za64_u64_z4_6_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za6v\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z4_6_w12p1, svuint64x4_t, + svwrite_ver_za64_u64_vg4 (6, w12 + 1, z4), + svwrite_ver_za64_u64_vg4 (6, w12 + 1, z4)) + +/* +** write_za64_s64_z28_7_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za7v\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_s64_z28_7_w12p2, svint64x4_t, + svwrite_ver_za64_s64_vg4 (7, w12 + 2, z28), + svwrite_ver_za64_s64_vg4 (7, w12 + 2, z28)) + +/* +** write_za64_f64_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.d\[\1, 0:3\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z0_0_w15p3, svfloat64x4_t, + svwrite_ver_za64_f64_vg4 (0, w15 + 3, z0), + svwrite_ver_za64_f64_vg4 (0, w15 + 3, z0)) + +/* +** write_za64_u64_z28_1_w12p4: +** add (w[0-9]+), w12, #?4 +** mova za1v\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z28_1_w12p4, svuint64x4_t, + svwrite_ver_za64_u64_vg4 (1, w12 + 4, z28), + svwrite_ver_za64_u64_vg4 (1, w12 + 4, z28)) + +/* +** write_za64_f64_z4_2_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za2v\.d\[\1, 0:3\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_za64_f64_z4_2_w12m1, svfloat64x4_t, + svwrite_ver_za64_f64_vg4 (2, w12 - 1, z4), + svwrite_ver_za64_f64_vg4 (2, w12 - 1, z4)) + +/* +** write_za64_u64_z28_3_w16: +** mov (w1[2-5]), w16 +** mova za3v\.d\[\1, 0:3\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_za64_u64_z28_3_w16, svuint64x4_t, + svwrite_ver_za64_u64_vg4 (3, w16, z28), + svwrite_ver_za64_u64_vg4 (3, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c new file mode 100644 index 0000000000000000000000000000000000000000..ce3dbdd872926df9501e87793642fcc6e1398ef8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c @@ -0,0 +1,140 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, 0, z0), + svwrite_ver_za8_s8_vg2 (0, 0, z0)) + +/* +** write_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, 1, z4), + svwrite_ver_za8_u8_vg2 (0, 1, z4)) + +/* +** write_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, w11, z28), + svwrite_ver_za8_s8_vg2 (0, w11, z28)) + +/* +** write_za8_s8_z0_0_w12: +** mova za0v\.b\[w12, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, w12, z0), + svwrite_ver_za8_s8_vg2 (0, w12, z0)) + +/* +** write_za8_u8_z18_0_w15: +** mova za0v\.b\[w15, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w15, z18), + svwrite_ver_za8_u8_vg2 (0, w15, z18)) + +/* +** write_za8_s8_z23_0_w12p14: +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.b\[w12, 14:15\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z23_0_w12p14, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, w12 + 14, z23), + svwrite_ver_za8_s8_vg2 (0, w12 + 14, z23)) + +/* +** write_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4), + svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4)) + +/* +** write_za8_s8_z28_0_w12p2: +** mova za0v\.b\[w12, 2:3\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, w12 + 2, z28), + svwrite_ver_za8_s8_vg2 (0, w12 + 2, z28)) + +/* +** write_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0), + svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0)) + +/* +** write_za8_u8_z4_0_w15p12: +** mova za0v\.b\[w15, 12:13\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4), + svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4)) + +/* +** write_za8_u8_z28_0_w12p15: +** add (w[0-9]+), w12, #?15 +** mova za0v\.b\[\1, 0:1\], {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28), + svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28)) + +/* +** write_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova za0v\.b\[\1, 0:1\], {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x2_t, + svwrite_ver_za8_s8_vg2 (0, w15 + 16, z0), + svwrite_ver_za8_s8_vg2 (0, w15 + 16, z0)) + +/* +** write_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:1\], {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4), + svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4)) + +/* +** write_za8_u8_z18_0_w16: +** mov (w1[2-5]), w16 +** mova za0v\.b\[\1, 0:1\], {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t, + svwrite_ver_za8_u8_vg2 (0, w16, z18), + svwrite_ver_za8_u8_vg2 (0, w16, z18)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c new file mode 100644 index 0000000000000000000000000000000000000000..8972fed59e39eddf1e359a43a3ce39c32f3f7218 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c @@ -0,0 +1,156 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_za8_s8_z0_0_0: +** mov (w1[2-5]), (?:wzr|#?0) +** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_0, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, 0, z0), + svwrite_ver_za8_s8_vg4 (0, 0, z0)) + +/* +** write_za8_u8_z4_0_1: +** mov (w1[2-5]), #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, 1, z4), + svwrite_ver_za8_u8_vg4 (0, 1, z4)) + +/* +** write_za8_s8_z28_0_w11: +** mov (w1[2-5]), w11 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w11, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, w11, z28), + svwrite_ver_za8_s8_vg4 (0, w11, z28)) + +/* +** write_za8_s8_z0_0_w12: +** mova za0v\.b\[w12, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w12, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, w12, z0), + svwrite_ver_za8_s8_vg4 (0, w12, z0)) + +/* +** write_za8_u8_z18_0_w15: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.b\[w15, 0:3\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w15, z18), + svwrite_ver_za8_u8_vg4 (0, w15, z18)) + +/* +** write_za8_s8_z23_0_w12p12: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za0v\.b\[w12, 12:15\], {[^\n]+} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z23_0_w12p12, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, w12 + 12, z23), + svwrite_ver_za8_s8_vg4 (0, w12 + 12, z23)) + +/* +** write_za8_u8_z4_0_w12p1: +** add (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4), + svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4)) + +/* +** write_za8_s8_z28_0_w12p2: +** add (w[0-9]+), w12, #?2 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z28_0_w12p2, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, w12 + 2, z28), + svwrite_ver_za8_s8_vg4 (0, w12 + 2, z28)) + +/* +** write_za8_u8_z0_0_w15p3: +** add (w[0-9]+), w15, #?3 +** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0), + svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0)) + +/* +** write_za8_u8_z0_0_w12p4: +** mova za0v\.b\[w12, 4:7\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0), + svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0)) + +/* +** write_za8_u8_z4_0_w15p12: +** mova za0v\.b\[w15, 12:15\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4), + svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4)) + +/* +** write_za8_u8_z28_0_w12p14: +** add (w[0-9]+), w12, #?14 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28), + svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28)) + +/* +** write_za8_s8_z0_0_w15p16: +** add (w[0-9]+), w15, #?16 +** mova za0v\.b\[\1, 0:3\], {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (write_za8_s8_z0_0_w15p16, svint8x4_t, + svwrite_ver_za8_s8_vg4 (0, w15 + 16, z0), + svwrite_ver_za8_s8_vg4 (0, w15 + 16, z0)) + +/* +** write_za8_u8_z4_0_w12m1: +** sub (w[0-9]+), w12, #?1 +** mova za0v\.b\[\1, 0:3\], {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4), + svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4)) + +/* +** write_za8_u8_z28_0_w16: +** mov (w1[2-5]), w16 +** mova za0v\.b\[\1, 0:3\], {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t, + svwrite_ver_za8_u8_vg4 (0, w16, z28), + svwrite_ver_za8_u8_vg4 (0, w16, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..204446003b7c6ec86ea693343843b102aa87a4ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svfloat16x2_t, + svwrite_za16_f16_vg1x2 (0, z0), + svwrite_za16_vg1x2 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svint16x2_t, + svwrite_za16_s16_vg1x2 (w0, z0), + svwrite_za16_vg1x2 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svuint16x2_t, + svwrite_za16_u16_vg1x2 (w7, z0), + svwrite_za16_vg1x2 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svbfloat16x2_t, + svwrite_za16_bf16_vg1x2 (w8, z0), + svwrite_za16_vg1x2 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svint16x2_t, + svwrite_za16_s16_vg1x2 (w11, z0), + svwrite_za16_vg1x2 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svuint16x2_t, + svwrite_za16_u16_vg1x2 (w12, z0), + svwrite_za16_vg1x2 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svfloat16x2_t, + svwrite_za16_f16_vg1x2 (w8 + 7, z0), + svwrite_za16_vg1x2 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svint16x2_t, + svwrite_za16_s16_vg1x2 (w8 + 8, z0), + svwrite_za16_vg1x2 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svuint16x2_t, + svwrite_za16_u16_vg1x2 (w8 - 1, z0), + svwrite_za16_vg1x2 (w8 - 1, z0)) + +/* +** write_w8_z18: +** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z18, svfloat16x2_t, + svwrite_za16_f16_vg1x2 (w8, z18), + svwrite_za16_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svint16x2_t, + svwrite_za16_s16_vg1x2 (w8, z23), + svwrite_za16_vg1x2 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svbfloat16x2_t, + svwrite_za16_bf16_vg1x2 (w8, z28), + svwrite_za16_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..e965801277fe9a3de770bff4bba1bc8345eda831 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za16_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svint16x4_t, + svwrite_za16_s16_vg1x4 (0, z0), + svwrite_za16_vg1x4 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svuint16x4_t, + svwrite_za16_u16_vg1x4 (w0, z0), + svwrite_za16_vg1x4 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svfloat16x4_t, + svwrite_za16_f16_vg1x4 (w7, z0), + svwrite_za16_vg1x4 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svint16x4_t, + svwrite_za16_s16_vg1x4 (w8, z0), + svwrite_za16_vg1x4 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svuint16x4_t, + svwrite_za16_u16_vg1x4 (w11, z0), + svwrite_za16_vg1x4 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svbfloat16x4_t, + svwrite_za16_bf16_vg1x4 (w12, z0), + svwrite_za16_vg1x4 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svint16x4_t, + svwrite_za16_s16_vg1x4 (w8 + 7, z0), + svwrite_za16_vg1x4 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svuint16x4_t, + svwrite_za16_u16_vg1x4 (w8 + 8, z0), + svwrite_za16_vg1x4 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svfloat16x4_t, + svwrite_za16_f16_vg1x4 (w8 - 1, z0), + svwrite_za16_vg1x4 (w8 - 1, z0)) + +/* +** write_w8_z4: +** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z4, svint16x4_t, + svwrite_za16_s16_vg1x4 (w8, z4), + svwrite_za16_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z18, svuint16x4_t, + svwrite_za16_u16_vg1x4 (w8, z18), + svwrite_za16_vg1x4 (w8, z18)) + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svbfloat16x4_t, + svwrite_za16_bf16_vg1x4 (w8, z23), + svwrite_za16_vg1x4 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svint16x4_t, + svwrite_za16_s16_vg1x4 (w8, z28), + svwrite_za16_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..9f4471608f90bd675534c2a4cc778f71634b9045 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svfloat32x2_t, + svwrite_za32_f32_vg1x2 (0, z0), + svwrite_za32_vg1x2 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svint32x2_t, + svwrite_za32_s32_vg1x2 (w0, z0), + svwrite_za32_vg1x2 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svuint32x2_t, + svwrite_za32_u32_vg1x2 (w7, z0), + svwrite_za32_vg1x2 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svfloat32x2_t, + svwrite_za32_f32_vg1x2 (w8, z0), + svwrite_za32_vg1x2 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svint32x2_t, + svwrite_za32_s32_vg1x2 (w11, z0), + svwrite_za32_vg1x2 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svuint32x2_t, + svwrite_za32_u32_vg1x2 (w12, z0), + svwrite_za32_vg1x2 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svfloat32x2_t, + svwrite_za32_f32_vg1x2 (w8 + 7, z0), + svwrite_za32_vg1x2 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svint32x2_t, + svwrite_za32_s32_vg1x2 (w8 + 8, z0), + svwrite_za32_vg1x2 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svuint32x2_t, + svwrite_za32_u32_vg1x2 (w8 - 1, z0), + svwrite_za32_vg1x2 (w8 - 1, z0)) + +/* +** write_w8_z18: +** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z18, svfloat32x2_t, + svwrite_za32_f32_vg1x2 (w8, z18), + svwrite_za32_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svint32x2_t, + svwrite_za32_s32_vg1x2 (w8, z23), + svwrite_za32_vg1x2 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svuint32x2_t, + svwrite_za32_u32_vg1x2 (w8, z28), + svwrite_za32_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..d94c8d806dc4611a8f2c511e472f7ac0b5b6088b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za32_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svint32x4_t, + svwrite_za32_s32_vg1x4 (0, z0), + svwrite_za32_vg1x4 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svuint32x4_t, + svwrite_za32_u32_vg1x4 (w0, z0), + svwrite_za32_vg1x4 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svfloat32x4_t, + svwrite_za32_f32_vg1x4 (w7, z0), + svwrite_za32_vg1x4 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svint32x4_t, + svwrite_za32_s32_vg1x4 (w8, z0), + svwrite_za32_vg1x4 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svuint32x4_t, + svwrite_za32_u32_vg1x4 (w11, z0), + svwrite_za32_vg1x4 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svfloat32x4_t, + svwrite_za32_f32_vg1x4 (w12, z0), + svwrite_za32_vg1x4 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svint32x4_t, + svwrite_za32_s32_vg1x4 (w8 + 7, z0), + svwrite_za32_vg1x4 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svuint32x4_t, + svwrite_za32_u32_vg1x4 (w8 + 8, z0), + svwrite_za32_vg1x4 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svfloat32x4_t, + svwrite_za32_f32_vg1x4 (w8 - 1, z0), + svwrite_za32_vg1x4 (w8 - 1, z0)) + +/* +** write_w8_z4: +** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z4, svint32x4_t, + svwrite_za32_s32_vg1x4 (w8, z4), + svwrite_za32_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z18, svuint32x4_t, + svwrite_za32_u32_vg1x4 (w8, z18), + svwrite_za32_vg1x4 (w8, z18)) + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svfloat32x4_t, + svwrite_za32_f32_vg1x4 (w8, z23), + svwrite_za32_vg1x4 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svint32x4_t, + svwrite_za32_s32_vg1x4 (w8, z28), + svwrite_za32_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..23ff25bee8a31d6a1c79af5b861906476f9cdfa0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svfloat64x2_t, + svwrite_za64_f64_vg1x2 (0, z0), + svwrite_za64_vg1x2 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svint64x2_t, + svwrite_za64_s64_vg1x2 (w0, z0), + svwrite_za64_vg1x2 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svuint64x2_t, + svwrite_za64_u64_vg1x2 (w7, z0), + svwrite_za64_vg1x2 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svfloat64x2_t, + svwrite_za64_f64_vg1x2 (w8, z0), + svwrite_za64_vg1x2 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svint64x2_t, + svwrite_za64_s64_vg1x2 (w11, z0), + svwrite_za64_vg1x2 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svuint64x2_t, + svwrite_za64_u64_vg1x2 (w12, z0), + svwrite_za64_vg1x2 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svfloat64x2_t, + svwrite_za64_f64_vg1x2 (w8 + 7, z0), + svwrite_za64_vg1x2 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svint64x2_t, + svwrite_za64_s64_vg1x2 (w8 + 8, z0), + svwrite_za64_vg1x2 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svuint64x2_t, + svwrite_za64_u64_vg1x2 (w8 - 1, z0), + svwrite_za64_vg1x2 (w8 - 1, z0)) + +/* +** write_w8_z18: +** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z18, svfloat64x2_t, + svwrite_za64_f64_vg1x2 (w8, z18), + svwrite_za64_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svint64x2_t, + svwrite_za64_s64_vg1x2 (w8, z23), + svwrite_za64_vg1x2 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svuint64x2_t, + svwrite_za64_u64_vg1x2 (w8, z28), + svwrite_za64_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7fcd6e546a370b94e7a813562ca63fda0460e3e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za64_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svint64x4_t, + svwrite_za64_s64_vg1x4 (0, z0), + svwrite_za64_vg1x4 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svuint64x4_t, + svwrite_za64_u64_vg1x4 (w0, z0), + svwrite_za64_vg1x4 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svfloat64x4_t, + svwrite_za64_f64_vg1x4 (w7, z0), + svwrite_za64_vg1x4 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svint64x4_t, + svwrite_za64_s64_vg1x4 (w8, z0), + svwrite_za64_vg1x4 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svuint64x4_t, + svwrite_za64_u64_vg1x4 (w11, z0), + svwrite_za64_vg1x4 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svfloat64x4_t, + svwrite_za64_f64_vg1x4 (w12, z0), + svwrite_za64_vg1x4 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svint64x4_t, + svwrite_za64_s64_vg1x4 (w8 + 7, z0), + svwrite_za64_vg1x4 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svuint64x4_t, + svwrite_za64_u64_vg1x4 (w8 + 8, z0), + svwrite_za64_vg1x4 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svfloat64x4_t, + svwrite_za64_f64_vg1x4 (w8 - 1, z0), + svwrite_za64_vg1x4 (w8 - 1, z0)) + +/* +** write_w8_z4: +** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z4, svint64x4_t, + svwrite_za64_s64_vg1x4 (w8, z4), + svwrite_za64_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z18, svuint64x4_t, + svwrite_za64_u64_vg1x4 (w8, z18), + svwrite_za64_vg1x4 (w8, z18)) + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svfloat64x4_t, + svwrite_za64_f64_vg1x4 (w8, z23), + svwrite_za64_vg1x4 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svint64x4_t, + svwrite_za64_s64_vg1x4 (w8, z28), + svwrite_za64_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4b83a37edd297ae8c30c0f474dee7bf0b58a6112 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c @@ -0,0 +1,122 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (0, z0), + svwrite_za8_vg1x2 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (w0, z0), + svwrite_za8_vg1x2 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svuint8x2_t, + svwrite_za8_u8_vg1x2 (w7, z0), + svwrite_za8_vg1x2 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (w8, z0), + svwrite_za8_vg1x2 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (w11, z0), + svwrite_za8_vg1x2 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svuint8x2_t, + svwrite_za8_u8_vg1x2 (w12, z0), + svwrite_za8_vg1x2 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (w8 + 7, z0), + svwrite_za8_vg1x2 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svint8x2_t, + svwrite_za8_s8_vg1x2 (w8 + 8, z0), + svwrite_za8_vg1x2 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx2\], {z0\.d - z1\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svuint8x2_t, + svwrite_za8_u8_vg1x2 (w8 - 1, z0), + svwrite_za8_vg1x2 (w8 - 1, z0)) + +/* +** write_w8_z18: +** mova za\.d\[w8, 0, vgx2\], {z18\.d - z19\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z18, svuint8x2_t, + svwrite_za8_u8_vg1x2 (w8, z18), + svwrite_za8_vg1x2 (w8, z18)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx2\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svint8x2_t, + svwrite_za8_s8_vg1x2 (w8, z23), + svwrite_za8_vg1x2 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx2\], {z28\.d - z29\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svuint8x2_t, + svwrite_za8_u8_vg1x2 (w8, z28), + svwrite_za8_vg1x2 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a529bf9fccaf5d36a6c932ea7ca9e90faa54c5da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c @@ -0,0 +1,137 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** write_0_z0: +** mov (w8|w9|w10|w11), #?0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_0_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (0, z0), + svwrite_za8_vg1x4 (0, z0)) + +/* +** write_w0_z0: +** mov (w8|w9|w10|w11), w0 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w0_z0, svuint8x4_t, + svwrite_za8_u8_vg1x4 (w0, z0), + svwrite_za8_vg1x4 (w0, z0)) + +/* +** write_w7_z0: +** mov (w8|w9|w10|w11), w7 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w7_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (w7, z0), + svwrite_za8_vg1x4 (w7, z0)) + +/* +** write_w8_z0: +** mova za\.d\[w8, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (w8, z0), + svwrite_za8_vg1x4 (w8, z0)) + +/* +** write_w11_z0: +** mova za\.d\[w11, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w11_z0, svuint8x4_t, + svwrite_za8_u8_vg1x4 (w11, z0), + svwrite_za8_vg1x4 (w11, z0)) + + +/* +** write_w12_z0: +** mov (w8|w9|w10|w11), w12 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w12_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (w12, z0), + svwrite_za8_vg1x4 (w12, z0)) + +/* +** write_w8p7_z0: +** mova za\.d\[w8, 7, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p7_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (w8 + 7, z0), + svwrite_za8_vg1x4 (w8 + 7, z0)) + +/* +** write_w8p8_z0: +** add (w8|w9|w10|w11), w8, #?8 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8p8_z0, svuint8x4_t, + svwrite_za8_u8_vg1x4 (w8 + 8, z0), + svwrite_za8_vg1x4 (w8 + 8, z0)) + +/* +** write_w8m1_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** mova za\.d\[\1, 0, vgx4\], {z0\.d - z3\.d} +** ret +*/ +TEST_ZA_XN (write_w8m1_z0, svint8x4_t, + svwrite_za8_s8_vg1x4 (w8 - 1, z0), + svwrite_za8_vg1x4 (w8 - 1, z0)) + +/* +** write_w8_z4: +** mova za\.d\[w8, 0, vgx4\], {z4\.d - z7\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z4, svint8x4_t, + svwrite_za8_s8_vg1x4 (w8, z4), + svwrite_za8_vg1x4 (w8, z4)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** write_w8_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z18, svuint8x4_t, + svwrite_za8_u8_vg1x4 (w8, z18), + svwrite_za8_vg1x4 (w8, z18)) + +/* +** write_w8_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mova za\.d\[w8, 0, vgx4\], [^\n]+ +** ret +*/ +TEST_ZA_XN (write_w8_z23, svuint8x4_t, + svwrite_za8_u8_vg1x4 (w8, z23), + svwrite_za8_vg1x4 (w8, z23)) + +/* +** write_w8_z28: +** mova za\.d\[w8, 0, vgx4\], {z28\.d - z31\.d} +** ret +*/ +TEST_ZA_XN (write_w8_z28, svint8x4_t, + svwrite_za8_s8_vg1x4 (w8, z28), + svwrite_za8_vg1x4 (w8, z28)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_zt.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_zt.c new file mode 100644 index 0000000000000000000000000000000000000000..eec298f8bd53b75759640455779d18c2bdaacbb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zero_zt.c @@ -0,0 +1,12 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#define STREAMING_COMPATIBLE +#define SHARED_ZT0 +#include "test_sme2_acle.h" + +/* +** zero_zt0: +** zero { zt0 } +** ret +*/ +PROTO (zero_zt0, void, ()) { svzero_zt (0); } diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..53fb2fafc7aab8e21bf9befb6b4bed7d01d48de1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z0_z0, svbfloat16x2_t, z0, + svzip_bf16_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (zip_z0_z4, svbfloat16x2_t, z0, + svzip_bf16_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (zip_z4_z18, svbfloat16x2_t, z4, + svzip_bf16_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (zip_z18_z23, svbfloat16x2_t, z18, + svzip_bf16_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svbfloat16x2_t, z23, + svzip_bf16_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z28_z0, svbfloat16x2_t, z28, + svzip_bf16_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (zip_z28_z0_z23, svbfloat16x2_t, z28, + svzip_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (zip_z28_z5_z19, svbfloat16x2_t, z28, + svzip_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..7e532f35860f32e205f4b51b0e83fe35b291ca04 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z0_z0, svbfloat16x4_t, z0, + svzip_bf16_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (zip_z0_z4, svbfloat16x4_t, z0, + svzip_bf16_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svbfloat16x4_t, z4, + svzip_bf16_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svbfloat16x4_t, z18, + svzip_bf16_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svbfloat16x4_t, z23, + svzip_bf16_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z28_z0, svbfloat16x4_t, z28, + svzip_bf16_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..c404cfa7d40aacf9fd098d7c9ef4bfba07bf1c9f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z0_z0, svfloat16x2_t, z0, + svzip_f16_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (zip_z0_z4, svfloat16x2_t, z0, + svzip_f16_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (zip_z4_z18, svfloat16x2_t, z4, + svzip_f16_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (zip_z18_z23, svfloat16x2_t, z18, + svzip_f16_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat16x2_t, z23, + svzip_f16_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z28_z0, svfloat16x2_t, z28, + svzip_f16_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (zip_z28_z0_z23, svfloat16x2_t, z28, + svzip_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (zip_z28_z5_z19, svfloat16x2_t, z28, + svzip_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..3159d1ec7baf382dda1586c6df62fb54167e505e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z0_z0, svfloat16x4_t, z0, + svzip_f16_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (zip_z0_z4, svfloat16x4_t, z0, + svzip_f16_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svfloat16x4_t, z4, + svzip_f16_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svfloat16x4_t, z18, + svzip_f16_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat16x4_t, z23, + svzip_f16_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z28_z0, svfloat16x4_t, z28, + svzip_f16_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..24f19001b89ff968607833b12e31d3518f804bfb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z0_z0, svfloat32x2_t, z0, + svzip_f32_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (zip_z0_z4, svfloat32x2_t, z0, + svzip_f32_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (zip_z4_z18, svfloat32x2_t, z4, + svzip_f32_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (zip_z18_z23, svfloat32x2_t, z18, + svzip_f32_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat32x2_t, z23, + svzip_f32_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z28_z0, svfloat32x2_t, z28, + svzip_f32_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (zip_z28_z0_z23, svfloat32x2_t, z28, + svzip_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (zip_z28_z5_z19, svfloat32x2_t, z28, + svzip_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2f4fd53e9116910414bde628f7ba1126ce7a5f2b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z0_z0, svfloat32x4_t, z0, + svzip_f32_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (zip_z0_z4, svfloat32x4_t, z0, + svzip_f32_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svfloat32x4_t, z4, + svzip_f32_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svfloat32x4_t, z18, + svzip_f32_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat32x4_t, z23, + svzip_f32_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z28_z0, svfloat32x4_t, z28, + svzip_f32_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..6c27bdb868dae0aa1d1dcb47d90721a9f6f51478 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z0_z0, svfloat64x2_t, z0, + svzip_f64_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (zip_z0_z4, svfloat64x2_t, z0, + svzip_f64_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (zip_z4_z18, svfloat64x2_t, z4, + svzip_f64_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (zip_z18_z23, svfloat64x2_t, z18, + svzip_f64_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat64x2_t, z23, + svzip_f64_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z28_z0, svfloat64x2_t, z28, + svzip_f64_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (zip_z28_z0_z23, svfloat64x2_t, z28, + svzip_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (zip_z28_z5_z19, svfloat64x2_t, z28, + svzip_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ae102763c26ef074cd39672f4e200d0628846422 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z0_z0, svfloat64x4_t, z0, + svzip_f64_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (zip_z0_z4, svfloat64x4_t, z0, + svzip_f64_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svfloat64x4_t, z4, + svzip_f64_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svfloat64x4_t, z18, + svzip_f64_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svfloat64x4_t, z23, + svzip_f64_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z28_z0, svfloat64x4_t, z28, + svzip_f64_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..ebe26f1ad46a6a54ff4bd3631dd62ae5387b5204 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z0_z0, svint16x2_t, z0, + svzip_s16_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (zip_z0_z4, svint16x2_t, z0, + svzip_s16_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (zip_z4_z18, svint16x2_t, z4, + svzip_s16_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (zip_z18_z23, svint16x2_t, z18, + svzip_s16_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint16x2_t, z23, + svzip_s16_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z28_z0, svint16x2_t, z28, + svzip_s16_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (zip_z28_z0_z23, svint16x2_t, z28, + svzip_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (zip_z28_z5_z19, svint16x2_t, z28, + svzip_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bfb42a02aafbe384cc5f912e20360d4c7d015789 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z0_z0, svint16x4_t, z0, + svzip_s16_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (zip_z0_z4, svint16x4_t, z0, + svzip_s16_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svint16x4_t, z4, + svzip_s16_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svint16x4_t, z18, + svzip_s16_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint16x4_t, z23, + svzip_s16_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z28_z0, svint16x4_t, z28, + svzip_s16_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8969a89cd1afef30bf55ba1efbb0a0f487ae52cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z0_z0, svint32x2_t, z0, + svzip_s32_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (zip_z0_z4, svint32x2_t, z0, + svzip_s32_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (zip_z4_z18, svint32x2_t, z4, + svzip_s32_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (zip_z18_z23, svint32x2_t, z18, + svzip_s32_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint32x2_t, z23, + svzip_s32_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z28_z0, svint32x2_t, z28, + svzip_s32_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (zip_z28_z0_z23, svint32x2_t, z28, + svzip_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (zip_z28_z5_z19, svint32x2_t, z28, + svzip_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..5c2c39327261315db71da354645d1d9ce81706b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z0_z0, svint32x4_t, z0, + svzip_s32_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (zip_z0_z4, svint32x4_t, z0, + svzip_s32_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svint32x4_t, z4, + svzip_s32_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svint32x4_t, z18, + svzip_s32_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint32x4_t, z23, + svzip_s32_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z28_z0, svint32x4_t, z28, + svzip_s32_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..68ca3fafdf5349081b20d498ca3c04596582691b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z0_z0, svint64x2_t, z0, + svzip_s64_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (zip_z0_z4, svint64x2_t, z0, + svzip_s64_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (zip_z4_z18, svint64x2_t, z4, + svzip_s64_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (zip_z18_z23, svint64x2_t, z18, + svzip_s64_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint64x2_t, z23, + svzip_s64_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z28_z0, svint64x2_t, z28, + svzip_s64_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (zip_z28_z0_z23, svint64x2_t, z28, + svzip_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (zip_z28_z5_z19, svint64x2_t, z28, + svzip_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..9a63b6dda736c55cf10a3f8872c564d398b89bd7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z0_z0, svint64x4_t, z0, + svzip_s64_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (zip_z0_z4, svint64x4_t, z0, + svzip_s64_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svint64x4_t, z4, + svzip_s64_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svint64x4_t, z18, + svzip_s64_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint64x4_t, z23, + svzip_s64_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z28_z0, svint64x4_t, z28, + svzip_s64_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..2c98222c90073b6feffb3bf5092bcef2d577f602 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z0_z0, svint8x2_t, z0, + svzip_s8_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (zip_z0_z4, svint8x2_t, z0, + svzip_s8_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (zip_z4_z18, svint8x2_t, z4, + svzip_s8_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (zip_z18_z23, svint8x2_t, z18, + svzip_s8_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint8x2_t, z23, + svzip_s8_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z28_z0, svint8x2_t, z28, + svzip_s8_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (zip_z28_z0_z23, svint8x2_t, z28, + svzip_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (zip_z28_z5_z19, svint8x2_t, z28, + svzip_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..6f2a2d23f214af5b0a100c998de643d25f4e0728 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z0_z0, svint8x4_t, z0, + svzip_s8_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (zip_z0_z4, svint8x4_t, z0, + svzip_s8_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svint8x4_t, z4, + svzip_s8_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svint8x4_t, z18, + svzip_s8_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svint8x4_t, z23, + svzip_s8_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z28_z0, svint8x4_t, z28, + svzip_s8_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8ed76db922c514fcd688725f0fb8c7d3e2ddbbba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z1\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z0_z0, svuint16x2_t, z0, + svzip_u16_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z1\.h}, z4\.h, z5\.h +** ret +*/ +TEST_XN (zip_z0_z4, svuint16x2_t, z0, + svzip_u16_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.h - z5\.h}, z18\.h, z19\.h +** ret +*/ +TEST_XN (zip_z4_z18, svuint16x2_t, z4, + svzip_u16_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.h - z19\.h}, z23\.h, z24\.h +** ret +*/ +TEST_XN (zip_z18_z23, svuint16x2_t, z18, + svzip_u16_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.h, z29\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint16x2_t, z23, + svzip_u16_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z29\.h}, z0\.h, z1\.h +** ret +*/ +TEST_XN (zip_z28_z0, svuint16x2_t, z28, + svzip_u16_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.h - z29\.h}, z0\.h, z23\.h +** ret +*/ +TEST_XN (zip_z28_z0_z23, svuint16x2_t, z28, + svzip_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.h - z29\.h}, z5\.h, z19\.h +** ret +*/ +TEST_XN (zip_z28_z5_z19, svuint16x2_t, z28, + svzip_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..2f693937b6ee0e717cebe3a056e7b8088dba688c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.h - z3\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z0_z0, svuint16x4_t, z0, + svzip_u16_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (zip_z0_z4, svuint16x4_t, z0, + svzip_u16_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.h - z7\.h}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svuint16x4_t, z4, + svzip_u16_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svuint16x4_t, z18, + svzip_u16_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint16x4_t, z23, + svzip_u16_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (zip_z28_z0, svuint16x4_t, z28, + svzip_u16_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3970d3e20a1efcb16518ac310efd74ee56df5d6c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z1\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z0_z0, svuint32x2_t, z0, + svzip_u32_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z1\.s}, z4\.s, z5\.s +** ret +*/ +TEST_XN (zip_z0_z4, svuint32x2_t, z0, + svzip_u32_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.s - z5\.s}, z18\.s, z19\.s +** ret +*/ +TEST_XN (zip_z4_z18, svuint32x2_t, z4, + svzip_u32_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.s - z19\.s}, z23\.s, z24\.s +** ret +*/ +TEST_XN (zip_z18_z23, svuint32x2_t, z18, + svzip_u32_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.s, z29\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint32x2_t, z23, + svzip_u32_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z29\.s}, z0\.s, z1\.s +** ret +*/ +TEST_XN (zip_z28_z0, svuint32x2_t, z28, + svzip_u32_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.s - z29\.s}, z0\.s, z23\.s +** ret +*/ +TEST_XN (zip_z28_z0_z23, svuint32x2_t, z28, + svzip_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.s - z29\.s}, z5\.s, z19\.s +** ret +*/ +TEST_XN (zip_z28_z5_z19, svuint32x2_t, z28, + svzip_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..bba01adf7abdadfc999eae2c992d0fe20e99d8ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.s - z3\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z0_z0, svuint32x4_t, z0, + svzip_u32_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (zip_z0_z4, svuint32x4_t, z0, + svzip_u32_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.s - z7\.s}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svuint32x4_t, z4, + svzip_u32_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svuint32x4_t, z18, + svzip_u32_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint32x4_t, z23, + svzip_u32_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (zip_z28_z0, svuint32x4_t, z28, + svzip_u32_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7aa1d601508d051fb872e29ff24affc0c14af08b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z1\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z0_z0, svuint64x2_t, z0, + svzip_u64_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z1\.d}, z4\.d, z5\.d +** ret +*/ +TEST_XN (zip_z0_z4, svuint64x2_t, z0, + svzip_u64_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.d - z5\.d}, z18\.d, z19\.d +** ret +*/ +TEST_XN (zip_z4_z18, svuint64x2_t, z4, + svzip_u64_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.d - z19\.d}, z23\.d, z24\.d +** ret +*/ +TEST_XN (zip_z18_z23, svuint64x2_t, z18, + svzip_u64_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.d, z29\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint64x2_t, z23, + svzip_u64_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z29\.d}, z0\.d, z1\.d +** ret +*/ +TEST_XN (zip_z28_z0, svuint64x2_t, z28, + svzip_u64_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.d - z29\.d}, z0\.d, z23\.d +** ret +*/ +TEST_XN (zip_z28_z0_z23, svuint64x2_t, z28, + svzip_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.d - z29\.d}, z5\.d, z19\.d +** ret +*/ +TEST_XN (zip_z28_z5_z19, svuint64x2_t, z28, + svzip_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..ed0c1dbfcd66a76a66bc13ffac371f098255510e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.d - z3\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z0_z0, svuint64x4_t, z0, + svzip_u64_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (zip_z0_z4, svuint64x4_t, z0, + svzip_u64_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.d - z7\.d}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svuint64x4_t, z4, + svzip_u64_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svuint64x4_t, z18, + svzip_u64_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint64x4_t, z23, + svzip_u64_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (zip_z28_z0, svuint64x4_t, z28, + svzip_u64_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..716edb4912deb1b6d4eedfb2a372a0b4dc35d8f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z1\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z0_z0, svuint8x2_t, z0, + svzip_u8_x2 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z1\.b}, z4\.b, z5\.b +** ret +*/ +TEST_XN (zip_z0_z4, svuint8x2_t, z0, + svzip_u8_x2 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** zip {z4\.b - z5\.b}, z18\.b, z19\.b +** ret +*/ +TEST_XN (zip_z4_z18, svuint8x2_t, z4, + svzip_u8_x2 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** zip {z18\.b - z19\.b}, z23\.b, z24\.b +** ret +*/ +TEST_XN (zip_z18_z23, svuint8x2_t, z18, + svzip_u8_x2 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, z28\.b, z29\.b +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint8x2_t, z23, + svzip_u8_x2 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z29\.b}, z0\.b, z1\.b +** ret +*/ +TEST_XN (zip_z28_z0, svuint8x2_t, z28, + svzip_u8_x2 (z0), + svzip (z0)) + +/* +** zip_z28_z0_z23: +** zip {z28\.b - z29\.b}, z0\.b, z23\.b +** ret +*/ +TEST_XN (zip_z28_z0_z23, svuint8x2_t, z28, + svzip_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zip_z28_z5_z19: +** zip {z28\.b - z29\.b}, z5\.b, z19\.b +** ret +*/ +TEST_XN (zip_z28_z5_z19, svuint8x2_t, z28, + svzip_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c13ad57cd0430743af3bef2adef72341c48bcc59 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zip_z0_z0: +** zip {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z0_z0, svuint8x4_t, z0, + svzip_u8_x4 (z0), + svzip (z0)) + +/* +** zip_z0_z4: +** zip {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_XN (zip_z0_z4, svuint8x4_t, z0, + svzip_u8_x4 (z4), + svzip (z4)) + +/* +** zip_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.b - z7\.b}, [^\n]+ +** ret +*/ +TEST_XN (zip_z4_z18, svuint8x4_t, z4, + svzip_u8_x4 (z18), + svzip (z18)) + +/* +** zip_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z18_z23, svuint8x4_t, z18, + svzip_u8_x4 (z23), + svzip (z23)) + +/* +** zip_z23_z28: +** zip [^\n]+, {z28\.b - z31\.b} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zip_z23_z28, svuint8x4_t, z23, + svzip_u8_x4 (z28), + svzip (z28)) + +/* +** zip_z28_z0: +** zip {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_XN (zip_z28_z0, svuint8x4_t, z28, + svzip_u8_x4 (z0), + svzip (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..d9432cbdd3ca66624ae0a858c32d4b8c803eed95 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svbfloat16x2_t, z0, + svzipq_bf16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svbfloat16x2_t, z0, + svzipq_bf16_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svbfloat16x2_t, z4, + svzipq_bf16_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svbfloat16x2_t, z18, + svzipq_bf16_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svbfloat16x2_t, z23, + svzipq_bf16_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svbfloat16x2_t, z28, + svzipq_bf16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svbfloat16x2_t, z28, + svzipq_bf16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svbfloat16x2_t, z28, + svzipq_bf16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..db27bed7b7c925478c842ac5418235c3929310b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svbfloat16x4_t, z0, + svzipq_bf16_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svbfloat16x4_t, z0, + svzipq_bf16_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svbfloat16x4_t, z4, + svzipq_bf16_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svbfloat16x4_t, z18, + svzipq_bf16_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svbfloat16x4_t, z23, + svzipq_bf16_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svbfloat16x4_t, z28, + svzipq_bf16_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..928ec546b1850ca67bc5b5103910705a93a180cc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat16x2_t, z0, + svzipq_f16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat16x2_t, z0, + svzipq_f16_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat16x2_t, z4, + svzipq_f16_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat16x2_t, z18, + svzipq_f16_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat16x2_t, z23, + svzipq_f16_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat16x2_t, z28, + svzipq_f16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svfloat16x2_t, z28, + svzipq_f16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svfloat16x2_t, z28, + svzipq_f16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..cfd5ce79e5e674df9385226a4671805cdf0dd2d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat16x4_t, z0, + svzipq_f16_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat16x4_t, z0, + svzipq_f16_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat16x4_t, z4, + svzipq_f16_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat16x4_t, z18, + svzipq_f16_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat16x4_t, z23, + svzipq_f16_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat16x4_t, z28, + svzipq_f16_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..7cf9b43bcc60ff269216bde42d27eccbbd5741f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat32x2_t, z0, + svzipq_f32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat32x2_t, z0, + svzipq_f32_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat32x2_t, z4, + svzipq_f32_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat32x2_t, z18, + svzipq_f32_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat32x2_t, z23, + svzipq_f32_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat32x2_t, z28, + svzipq_f32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svfloat32x2_t, z28, + svzipq_f32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svfloat32x2_t, z28, + svzipq_f32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..fd6f6e444b63df9d74de3ff74cb04cd55035be38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat32x4_t, z0, + svzipq_f32_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat32x4_t, z0, + svzipq_f32_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat32x4_t, z4, + svzipq_f32_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat32x4_t, z18, + svzipq_f32_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat32x4_t, z23, + svzipq_f32_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat32x4_t, z28, + svzipq_f32_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..3360f0a3214dd4afee7391f6c240fa450d8ae622 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat64x2_t, z0, + svzipq_f64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat64x2_t, z0, + svzipq_f64_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat64x2_t, z4, + svzipq_f64_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat64x2_t, z18, + svzipq_f64_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat64x2_t, z23, + svzipq_f64_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat64x2_t, z28, + svzipq_f64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svfloat64x2_t, z28, + svzipq_f64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svfloat64x2_t, z28, + svzipq_f64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..a51b1ff163c8c509f80aa85ecabf704e4c05d08a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svfloat64x4_t, z0, + svzipq_f64_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svfloat64x4_t, z0, + svzipq_f64_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svfloat64x4_t, z4, + svzipq_f64_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svfloat64x4_t, z18, + svzipq_f64_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svfloat64x4_t, z23, + svzipq_f64_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svfloat64x4_t, z28, + svzipq_f64_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..130c094a0d4ae10cda67aee06e22180878fc77e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svint16x2_t, z0, + svzipq_s16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svint16x2_t, z0, + svzipq_s16_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svint16x2_t, z4, + svzipq_s16_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svint16x2_t, z18, + svzipq_s16_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint16x2_t, z23, + svzipq_s16_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svint16x2_t, z28, + svzipq_s16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svint16x2_t, z28, + svzipq_s16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svint16x2_t, z28, + svzipq_s16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f2784f3d3ac76db3c965afcf1bb780f531fbb0cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svint16x4_t, z0, + svzipq_s16_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svint16x4_t, z0, + svzipq_s16_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svint16x4_t, z4, + svzipq_s16_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svint16x4_t, z18, + svzipq_s16_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint16x4_t, z23, + svzipq_s16_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svint16x4_t, z28, + svzipq_s16_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..cb353cba5d69f23864e736ff0db7a4607406daf6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svint32x2_t, z0, + svzipq_s32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svint32x2_t, z0, + svzipq_s32_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svint32x2_t, z4, + svzipq_s32_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svint32x2_t, z18, + svzipq_s32_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint32x2_t, z23, + svzipq_s32_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svint32x2_t, z28, + svzipq_s32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svint32x2_t, z28, + svzipq_s32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svint32x2_t, z28, + svzipq_s32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..109af863c5e7e28d632bf9067700b8b16d350a32 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svint32x4_t, z0, + svzipq_s32_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svint32x4_t, z0, + svzipq_s32_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svint32x4_t, z4, + svzipq_s32_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svint32x4_t, z18, + svzipq_s32_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint32x4_t, z23, + svzipq_s32_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svint32x4_t, z28, + svzipq_s32_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..8791a664dd109cab69fc2d7265d73fd3a2b9182d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svint64x2_t, z0, + svzipq_s64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svint64x2_t, z0, + svzipq_s64_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svint64x2_t, z4, + svzipq_s64_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svint64x2_t, z18, + svzipq_s64_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint64x2_t, z23, + svzipq_s64_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svint64x2_t, z28, + svzipq_s64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svint64x2_t, z28, + svzipq_s64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svint64x2_t, z28, + svzipq_s64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..71bbd18dce3c52c5e993d78b96dcaad63767678b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svint64x4_t, z0, + svzipq_s64_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svint64x4_t, z0, + svzipq_s64_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svint64x4_t, z4, + svzipq_s64_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svint64x4_t, z18, + svzipq_s64_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint64x4_t, z23, + svzipq_s64_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svint64x4_t, z28, + svzipq_s64_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..83a17565c77a486352a8d93bfe9f337a4f3e073f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svint8x2_t, z0, + svzipq_s8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svint8x2_t, z0, + svzipq_s8_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svint8x2_t, z4, + svzipq_s8_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svint8x2_t, z18, + svzipq_s8_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint8x2_t, z23, + svzipq_s8_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svint8x2_t, z28, + svzipq_s8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svint8x2_t, z28, + svzipq_s8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svint8x2_t, z28, + svzipq_s8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..05a58134779ac2c165175e10f9fefa5bed0d9e73 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svint8x4_t, z0, + svzipq_s8_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svint8x4_t, z0, + svzipq_s8_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svint8x4_t, z4, + svzipq_s8_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svint8x4_t, z18, + svzipq_s8_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svint8x4_t, z23, + svzipq_s8_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svint8x4_t, z28, + svzipq_s8_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..e45ba03890994e39d4cbc06756e1ebf8cc46a5fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svuint16x2_t, z0, + svzipq_u16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svuint16x2_t, z0, + svzipq_u16_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svuint16x2_t, z4, + svzipq_u16_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svuint16x2_t, z18, + svzipq_u16_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint16x2_t, z23, + svzipq_u16_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svuint16x2_t, z28, + svzipq_u16_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svuint16x2_t, z28, + svzipq_u16_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svuint16x2_t, z28, + svzipq_u16_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..f562a6db257fa9b374c0c22350e7295596b3a7fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svuint16x4_t, z0, + svzipq_u16_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svuint16x4_t, z0, + svzipq_u16_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svuint16x4_t, z4, + svzipq_u16_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svuint16x4_t, z18, + svzipq_u16_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint16x4_t, z23, + svzipq_u16_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svuint16x4_t, z28, + svzipq_u16_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..893b956449f5444495aee0d0bb8877ffd5a8c05f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svuint32x2_t, z0, + svzipq_u32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svuint32x2_t, z0, + svzipq_u32_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svuint32x2_t, z4, + svzipq_u32_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svuint32x2_t, z18, + svzipq_u32_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint32x2_t, z23, + svzipq_u32_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svuint32x2_t, z28, + svzipq_u32_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svuint32x2_t, z28, + svzipq_u32_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svuint32x2_t, z28, + svzipq_u32_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..c7c52e19d750ad2f9242ffe1d9833a138c5eb9b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svuint32x4_t, z0, + svzipq_u32_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svuint32x4_t, z0, + svzipq_u32_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svuint32x4_t, z4, + svzipq_u32_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svuint32x4_t, z18, + svzipq_u32_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint32x4_t, z23, + svzipq_u32_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svuint32x4_t, z28, + svzipq_u32_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..4cbf69a67ce3cc31d219b6150c7d0097803546b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svuint64x2_t, z0, + svzipq_u64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svuint64x2_t, z0, + svzipq_u64_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svuint64x2_t, z4, + svzipq_u64_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svuint64x2_t, z18, + svzipq_u64_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint64x2_t, z23, + svzipq_u64_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svuint64x2_t, z28, + svzipq_u64_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svuint64x2_t, z28, + svzipq_u64_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svuint64x2_t, z28, + svzipq_u64_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..780e3a4231c8c4ec236e0fcf3aff9c63f54648de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svuint64x4_t, z0, + svzipq_u64_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svuint64x4_t, z0, + svzipq_u64_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svuint64x4_t, z4, + svzipq_u64_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svuint64x4_t, z18, + svzipq_u64_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint64x4_t, z23, + svzipq_u64_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svuint64x4_t, z28, + svzipq_u64_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c new file mode 100644 index 0000000000000000000000000000000000000000..1e1ee8bb458c4f79a9450881a2caa9a1cf4dd911 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c @@ -0,0 +1,77 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z1\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z0_z0, svuint8x2_t, z0, + svzipq_u8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z1\.q}, z4\.q, z5\.q +** ret +*/ +TEST_XN (zipq_z0_z4, svuint8x2_t, z0, + svzipq_u8_x2 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** zip {z4\.q - z5\.q}, z18\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z4_z18, svuint8x2_t, z4, + svzipq_u8_x2 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** zip {z18\.q - z19\.q}, z23\.q, z24\.q +** ret +*/ +TEST_XN (zipq_z18_z23, svuint8x2_t, z18, + svzipq_u8_x2 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, z28\.q, z29\.q +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint8x2_t, z23, + svzipq_u8_x2 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z29\.q}, z0\.q, z1\.q +** ret +*/ +TEST_XN (zipq_z28_z0, svuint8x2_t, z28, + svzipq_u8_x2 (z0), + svzipq (z0)) + +/* +** zipq_z28_z0_z23: +** zip {z28\.q - z29\.q}, z0\.q, z23\.q +** ret +*/ +TEST_XN (zipq_z28_z0_z23, svuint8x2_t, z28, + svzipq_u8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))), + svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0)))) + +/* +** zipq_z28_z5_z19: +** zip {z28\.q - z29\.q}, z5\.q, z19\.q +** ret +*/ +TEST_XN (zipq_z28_z5_z19, svuint8x2_t, z28, + svzipq_u8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))), + svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1)))) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x4.c new file mode 100644 index 0000000000000000000000000000000000000000..20344d04f194478f0e1859c0664185e04facf47d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x4.c @@ -0,0 +1,73 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +/* +** zipq_z0_z0: +** zip {z0\.q - z3\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z0_z0, svuint8x4_t, z0, + svzipq_u8_x4 (z0), + svzipq (z0)) + +/* +** zipq_z0_z4: +** zip {z0\.q - z3\.q}, {z4\.q - z7\.q} +** ret +*/ +TEST_XN (zipq_z0_z4, svuint8x4_t, z0, + svzipq_u8_x4 (z4), + svzipq (z4)) + +/* +** zipq_z4_z18: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z4\.q - z7\.q}, [^\n]+ +** ret +*/ +TEST_XN (zipq_z4_z18, svuint8x4_t, z4, + svzipq_u8_x4 (z18), + svzipq (z18)) + +/* +** zipq_z18_z23: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** zip {z[^\n]+}, {z[^\n]+} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z18_z23, svuint8x4_t, z18, + svzipq_u8_x4 (z23), + svzipq (z23)) + +/* +** zipq_z23_z28: +** zip [^\n]+, {z28\.q - z31\.q} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (zipq_z23_z28, svuint8x4_t, z23, + svzipq_u8_x4 (z28), + svzipq (z28)) + +/* +** zipq_z28_z0: +** zip {z28\.q - z31\.q}, {z0\.q - z3\.q} +** ret +*/ +TEST_XN (zipq_z28_z0, svuint8x4_t, z28, + svzipq_u8_x4 (z0), + svzipq (z0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c index e9158ed8adf02962f9a10756d9323d00c04da9f4..3b9245e199f4deaf07cb7dacb3e63abbbe73b3db 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c @@ -121,3 +121,21 @@ TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t, TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t, z0 = svcreate2_f64 (z5, z4), z0 = svcreate2 (z5, z4)) + +/* +** create2_b_0: +** ret +*/ +TEST_CREATE_B (create2_b_0, svboolx2_t, + p0_res = svcreate2_b (p0, p1), + p0_res = svcreate2 (p0, p1)) + +/* +** create2_b_1: +** mov p0\.b, p2\.b +** mov p1\.b, p3\.b +** ret +*/ +TEST_CREATE_B (create2_b_1, svboolx2_t, + p0_res = svcreate2_b (p2, p3), + p0_res = svcreate2 (p2, p3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c new file mode 100644 index 0000000000000000000000000000000000000000..f54feeae6ba6ff01c8c63c51263e732d596be3d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_b.c @@ -0,0 +1,55 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get2_b_p0_0: +** mov p0\.b, p4\.b +** ret +*/ +TEST_GET_B (get2_b_p0_0, svboolx2_t, + p0 = svget2_b (p4, 0), + p0 = svget2 (p4, 0)) + +/* +** get2_b_p0_1: +** mov p0\.b, p5\.b +** ret +*/ +TEST_GET_B (get2_b_p0_1, svboolx2_t, + p0 = svget2_b (p4, 1), + p0 = svget2 (p4, 1)) + +/* +** get2_b_p4_0: +** ret +*/ +TEST_GET_B (get2_b_p4_0, svboolx2_t, + p4_res = svget2_b (p4, 0), + p4_res = svget2 (p4, 0)) + +/* +** get2_b_p4_1: +** mov p4\.b, p5\.b +** ret +*/ +TEST_GET_B (get2_b_p4_1, svboolx2_t, + p4_res = svget2_b (p4, 1), + p4_res = svget2 (p4, 1)) + +/* +** get2_b_p5_0: +** mov p5\.b, p4\.b +** ret +*/ +TEST_GET_B (get2_b_p5_0, svboolx2_t, + p5_res = svget2_b (p4, 0), + p5_res = svget2 (p4, 0)) + +/* +** get2_b_p5_1: +** ret +*/ +TEST_GET_B (get2_b_p5_1, svboolx2_t, + p5_res = svget2_b (p4, 1), + p5_res = svget2 (p4, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c new file mode 100644 index 0000000000000000000000000000000000000000..30afb6abc24ccff534f9be037555eec72ea596ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_b.c @@ -0,0 +1,41 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set2_b_p8_0: +** mov p9\.b, p5\.b +** mov p8\.b, p0\.b +** ret +*/ +TEST_SET_B (set2_b_p8_0, svboolx2_t, + p8 = svset2_b (p4, 0, p0), + p8 = svset2 (p4, 0, p0)) + +/* +** set2_b_p8_1: +** mov p8\.b, p4\.b +** mov p9\.b, p0\.b +** ret +*/ +TEST_SET_B (set2_b_p8_1, svboolx2_t, + p8 = svset2_b (p4, 1, p0), + p8 = svset2 (p4, 1, p0)) + +/* +** set2_b_p4_0: +** mov p4\.b, p12\.b +** ret +*/ +TEST_SET_B (set2_b_p4_0, svboolx2_t, + p4 = svset2_b (p4, 0, p12), + p4 = svset2 (p4, 0, p12)) + +/* +** set2_b_p4_1: +** mov p5\.b, p13\.b +** ret +*/ +TEST_SET_B (set2_b_p4_1, svboolx2_t, + p4 = svset2_b (p4, 1, p13), + p4 = svset2 (p4, 1, p13)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 5ce0be5947bed7d96ce0d212c937f33efce290e4..756fe4db3856208a56e735278026d7c2b236717a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -25,7 +25,13 @@ #define ZA_ATTR #endif -#define ATTR SM_ATTR ZA_ATTR +#ifdef SHARED_ZT0 +#define ZT0_ATTR __arm_inout("zt0") +#else +#define ZT0_ATTR +#endif + +#define ATTR SM_ATTR ZA_ATTR ZT0_ATTR #ifdef __cplusplus #define PROTO(NAME, RET, ARGS) \ @@ -232,6 +238,24 @@ return z0; \ } +#define TEST_LOAD_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \ + PROTO (NAME, void, (const STYPE *x0, intptr_t x1)) \ + { \ + register svcount_t pn0 __asm ("pn0"); \ + register svcount_t pn7 __asm ("pn7"); \ + register svcount_t pn8 __asm ("pn8"); \ + register svcount_t pn15 __asm ("pn15"); \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z17 __asm ("z17"); \ + register TTYPE z22 __asm ("z22"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \ + "=Upa" (pn8), "=Upa" (pn15)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0), "w" (z17), \ + "w" (z22), "w" (z28)); \ + } + #define TEST_LOAD_GATHER_SZ(NAME, RES_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ const STYPE *x0)) \ @@ -278,6 +302,24 @@ INVOKE (CODE1, CODE2); \ } +#define TEST_STORE_COUNT(NAME, TTYPE, STYPE, CODE1, CODE2) \ + PROTO (NAME, void, (STYPE *x0, intptr_t x1)) \ + { \ + register svcount_t pn0 __asm ("pn0"); \ + register svcount_t pn7 __asm ("pn7"); \ + register svcount_t pn8 __asm ("pn8"); \ + register svcount_t pn15 __asm ("pn15"); \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z17 __asm ("z17"); \ + register TTYPE z22 __asm ("z22"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \ + "=Upa" (pn8), "=Upa" (pn15), \ + "=w" (z0), "=w" (z17), "=w" (z22), \ + "=w" (z28)); \ + INVOKE (CODE1, CODE2); \ + } + #define TEST_STORE_SCATTER_SZ(NAME, DATA_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \ STYPE *x0)) \ @@ -308,6 +350,79 @@ return x0; \ } +#define TEST_PN(NAME, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register svcount_t pn0 __asm("pn0"); \ + register svcount_t pn7 __asm("pn7"); \ + register svcount_t pn8 __asm("pn8"); \ + register svcount_t pn15 __asm("pn15"); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7), \ + "Upa" (pn8), "Upa" (pn15)); \ + } + +#define TEST_COUNT_PN(NAME, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register svcount_t pn0 __asm ("pn0"); \ + register svcount_t pn7 __asm ("pn7"); \ + register svcount_t pn8 __asm ("pn8"); \ + register svcount_t pn15 __asm ("pn15"); \ + register uint64_t x0 __asm ("x0"); \ + register uint64_t x15 __asm ("x15"); \ + register uint64_t x17 __asm ("x17"); \ + __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \ + "=Upa" (pn8), "=Upa" (pn15)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "r" (x0), "r" (x15), \ + "r" (x17)); \ + } + +#define TEST_EXTRACT_PN(NAME, TYPE, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register svcount_t pn0 __asm ("pn0"); \ + register TYPE p2 __asm ("p2"); \ + register TYPE p5 __asm ("p5"); \ + register svcount_t pn7 __asm ("pn7"); \ + register svcount_t pn8 __asm ("pn8"); \ + register TYPE p9 __asm ("p9"); \ + register svcount_t pn11 __asm ("pn11"); \ + register TYPE p12 __asm ("p12"); \ + register svcount_t pn15 __asm ("pn15"); \ + __asm volatile ("" : "=Upa" (pn0), "=Upa" (pn7), \ + "=Upa" (pn8), "=Upa" (pn11), \ + "=Upa" (pn15)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (p2), "Upa" (p5), \ + "Upa" (p9), "Upa" (p12)); \ + } + +#define TEST_SELECT_P(NAME, TYPE, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register TYPE p0 __asm ("p0"); \ + register TYPE p2 __asm ("p2"); \ + register svbool_t p7 __asm ("p7"); \ + register svbool_t p8 __asm ("p8"); \ + register TYPE p13 __asm ("p13"); \ + register svbool_t p15 __asm ("p15"); \ + register int32_t w11 __asm ("w11"); \ + register int32_t w12 __asm ("w12"); \ + register int32_t w15 __asm ("w15"); \ + register int32_t w16 __asm ("w16"); \ + __asm volatile ("" : "=Upa" (p0), "=Upa" (p2), \ + "=Upa" (p7), "=Upa" (p8), \ + "=Upa" (p13), "=Upa" (p15), \ + "=r" (w11), "=r" (w12), \ + "=r" (w15), "=r" (w16)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (p0), "Upa" (p2), \ + "Upa" (p7), "Upa" (p8), \ + "Upa" (p13), "Upa" (p15)); \ + } + #define TEST_COMPARE_S(NAME, TYPE, CODE1, CODE2) \ PROTO (NAME, svbool_t, (TYPE x0, TYPE x1)) \ { \ @@ -316,6 +431,30 @@ return p0; \ } +#define TEST_COMPARE_S_X2(NAME, TYPE, CODE1, CODE2) \ + PROTO (NAME, void, (TYPE x0, TYPE x1)) \ + { \ + register svboolx2_t p1 __asm("p1"); \ + register svboolx2_t p4 __asm("p4"); \ + register svboolx2_t p9 __asm("p9"); \ + register svboolx2_t p14 __asm("p14"); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (p1), "Upa" (p4), \ + "Upa" (p9), "Upa" (p14)); \ + } + +#define TEST_COMPARE_S_C(NAME, TYPE, CODE1, CODE2) \ + PROTO (NAME, void, (TYPE x0, TYPE x1)) \ + { \ + register svcount_t pn0 __asm("pn0"); \ + register svcount_t pn7 __asm("pn7"); \ + register svcount_t pn8 __asm("pn8"); \ + register svcount_t pn15 __asm("pn15"); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (pn0), "Upa" (pn7), \ + "Upa" (pn8), "Upa" (pn15)); \ + } + #define TEST_COMPARE_Z(NAME, TYPE, CODE1, CODE2) \ PROTO (NAME, svbool_t, (TYPE z0, TYPE z1, \ svbool_t p0, svbool_t p1)) \ @@ -414,6 +553,15 @@ return z0; \ } +#define TEST_CREATE_B(NAME, TTYPE, CODE1, CODE2) \ + PROTO (NAME, TTYPE, (svbool_t p0, svbool_t p1, \ + svbool_t p2, svbool_t p3)) \ + { \ + TTYPE p0_res; \ + INVOKE (CODE1, CODE2); \ + return p0_res; \ + } + #define TEST_GET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1, \ ZTYPE unused2, ZTYPE unused3, TTYPE z4)) \ @@ -428,6 +576,22 @@ "w" (z6_res), "w" (z7_res)); \ } +#define TEST_GET_B(NAME, TTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register svbool_t p0 __asm ("p0"); \ + register TTYPE p4 __asm ("p4"); \ + register svbool_t p4_res __asm ("p4"); \ + register svbool_t p5_res __asm ("p5"); \ + register svbool_t p6_res __asm ("p6"); \ + register svbool_t p7_res __asm ("p7"); \ + __asm volatile ("" : "=Upa" (p0), "=Upa" (p4)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (p0), "Upa" (p4_res), \ + "Upa" (p5_res), "Upa" (p6_res), \ + "Upa" (p7_res)); \ + } + #define TEST_SET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ TTYPE z4)) \ @@ -437,6 +601,20 @@ __asm volatile ("" :: "w" (z4), "w" (z24)); \ } +#define TEST_SET_B(NAME, TTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (void)) \ + { \ + register svbool_t p0 __asm ("p0"); \ + register TTYPE p4 __asm ("p4"); \ + register TTYPE p8 __asm ("p8"); \ + register svbool_t p12 __asm ("p12"); \ + register svbool_t p13 __asm ("p13"); \ + __asm volatile ("" : "=Upa" (p0), "=Upa" (p4), \ + "=Upa" (p12), "=Upa" (p13)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "Upa" (p4), "Upa" (p8)); \ + } + #define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4)) \ { \ @@ -453,6 +631,25 @@ return z0_res; \ } +#define TEST_XN(NAME, TTYPE, RES, CODE1, CODE2) \ + PROTO (NAME, void, ()) \ + { \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z4 __asm ("z4"); \ + register TTYPE z18 __asm ("z18"); \ + register TTYPE z23 __asm ("z23"); \ + register TTYPE z28 __asm ("z28"); \ + register svcount_t pn0 __asm ("pn0"); \ + register svcount_t pn7 __asm ("pn7"); \ + register svcount_t pn8 __asm ("pn8"); \ + register svcount_t pn15 __asm ("pn15"); \ + __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z18), \ + "=w" (z23), "=w" (z28), "=Upa" (pn0), \ + "=Upa" (pn7), "=Upa" (pn8), "=Upa" (pn15)); \ + INVOKE (RES = CODE1, RES = CODE2); \ + __asm volatile ("" :: "w" (RES)); \ + } + #define TEST_DUAL_XN(NAME, TTYPE1, TTYPE2, RES, CODE1, CODE2) \ PROTO (NAME, void, ()) \ { \ @@ -467,4 +664,74 @@ __asm volatile ("" :: "w" (RES)); \ } +#define TEST_XN_SINGLE(NAME, TTYPE, ZTYPE, RES, CODE1, CODE2) \ + PROTO (NAME, void, ()) \ + { \ + register ZTYPE z0 __asm ("z0"); \ + register TTYPE z1 __asm ("z1"); \ + register ZTYPE z5 __asm ("z5"); \ + register ZTYPE z7 __asm ("z7"); \ + register ZTYPE z16 __asm ("z16"); \ + register TTYPE z18 __asm ("z18"); \ + register ZTYPE z23 __asm ("z23"); \ + register TTYPE z24 __asm ("z24"); \ + register TTYPE z28 __asm ("z28"); \ + __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (z5), \ + "=w" (z7), "=w" (z16), "=w" (z18), \ + "=w" (z23), "=w" (z24), "=w" (z28)); \ + INVOKE (RES = CODE1, RES = CODE2); \ + __asm volatile ("" :: "w" (RES)); \ + } + +#define TEST_XN_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, TTYPE, (TTYPE z0)) \ + { \ + register ZTYPE z15 __asm ("z15"); \ + __asm volatile ("" : "=w" (z15)); \ + INVOKE (CODE1, CODE2); \ + return z0; \ + } + +#define TEST_XN_SINGLE_AWKWARD(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, TTYPE, (ZTYPE z0, TTYPE z1, ZTYPE zn)) \ + { \ + TTYPE z0_res; \ + INVOKE (CODE1, CODE2); \ + return z0_res; \ + } + +#define TEST_X2_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, ()) \ + { \ + register TTYPE z0 __asm ("z0"); \ + register ZTYPE z5 __asm ("z5"); \ + register TTYPE z6 __asm ("z6"); \ + register TTYPE z16 __asm ("z16"); \ + register ZTYPE z22 __asm ("z22"); \ + register TTYPE z29 __asm ("z29"); \ + register ZTYPE z0_res __asm ("z0"); \ + __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \ + "=w" (z16), "=w" (z22), "=w" (z29)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \ + } + +#define TEST_X4_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, ()) \ + { \ + register TTYPE z0 __asm ("z0"); \ + register TTYPE z4 __asm ("z4"); \ + register TTYPE z16 __asm ("z16"); \ + register TTYPE z21 __asm ("z21"); \ + register ZTYPE z25 __asm ("z25"); \ + register TTYPE z26 __asm ("z26"); \ + register ZTYPE z0_res __asm ("z0"); \ + register ZTYPE z22_res __asm ("z22"); \ + __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z16), \ + "=w" (z21), "=w" (z26)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0_res), "w" (z22_res), \ + "w" (z25)); \ + } + #endif diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c new file mode 100644 index 0000000000000000000000000000000000000000..f0b2dbb41be779c00c9b3a0d9501e8b0dde8fb1e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_1.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sve2" + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, + svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2, + svuint32x2_t u32x2) +{ + svrshl_x (pg, s16); /* { dg-error {too few arguments to function 'svrshl_x'} } */ + svrshl_x (pg, s16, s16, s16); /* { dg-error {too many arguments to function 'svrshl_x'} } */ + svrshl_x (s32, s16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */ + svrshl_x (1, s16, s32); /* { dg-error {passing 'int' to argument 1 of 'svrshl_x', which expects 'svbool_t'} } */ + svrshl_x (pg, pg, s16); /* { dg-error {'svrshl_x' has no form that takes 'svbool_t' arguments} } */ + svrshl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svrshl_x', which expects an SVE type rather than a scalar} } */ + svrshl_x (pg, s16, s16); + svrshl_x (pg, s16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, s16, s32); /* { dg-error {arguments 2 and 3 of 'svrshl_x' must have the same element size, but the values passed here have type 'svint16_t' and 'svint32_t' respectively} } */ + svrshl_x (pg, s16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, s16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, s16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, s16, 0); + svrshl_x (pg, f16, s16); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */ + svrshl_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, f16, s32); /* { dg-error {'svrshl_x' has no form that takes 'svfloat16_t' arguments} } */ + svrshl_x (pg, f16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svrshl_x', which expects a vector of signed integers} } */ + svrshl_x (pg, u16, s16); + + svrshl_x (pg, s32x2, s32x2); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */ + svrshl_x (pg, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svrshl_x', which expects vectors of signed integers} } */ + svrshl_x (pg, s32x2, s32); /* { dg-error {'svrshl_x' has no form that takes 'svint32x2_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c new file mode 100644 index 0000000000000000000000000000000000000000..976d5af7f2373d826bfdb7fcef0a6d267f172db4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_single_n_2.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sme2" + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2, + svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2, + svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, + svfloat32_t f32) + __arm_streaming +{ + svrshl (s16x2); /* { dg-error {too few arguments to function 'svrshl'} } */ + svrshl (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svrshl'} } */ + svrshl (pg, s16x2); /* { dg-error {'svrshl' has no form that takes 'svbool_t' arguments} } */ + svrshl (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svrshl', which expects an SVE type rather than a scalar} } */ + svrshl (s16, s16); /* { dg-error {'svrshl' has no form that takes 'svint16_t' arguments} } */ + svrshl (s16x2, s16x2); + svrshl (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */ + svrshl (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */ + svrshl (s16x2, s32x2); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint16x2_t' and 'svint32x2_t' respectively} } */ + svrshl (s32x2, s16); /* { dg-error {arguments 1 and 2 of 'svrshl' must have the same element size, but the values passed here have type 'svint32x2_t' and 'svint16_t' respectively} } */ + svrshl (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */ + svrshl (s32x2, s32); + svrshl (s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */ + svrshl (s32x2, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */ + svrshl (s16x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */ + svrshl (s16x2, f32x2); /* { dg-error {passing 'svfloat32x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */ + svrshl (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svrshl', which expects a vector of signed integers} } */ + svrshl (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svrshl', which expects an SVE type rather than a scalar type} } */ + svrshl (f16x2, s16x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */ + svrshl (f16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svrshl', which expects vectors of signed integers} } */ + svrshl (f16x2, s32x2); /* { dg-error {'svrshl' has no form that takes 'svfloat16x2_t' arguments} } */ + svrshl (u16x2, s16x2); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c new file mode 100644 index 0000000000000000000000000000000000000000..9676de711f4a6812e9bd0c8cd6272ba0f2879ce2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sve2" + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, + svfloat32_t f32, svint32_t s32, svuint32_t u32, svint32x2_t s32x2, + svuint32x2_t u32x2) +{ + svqdmulh (s16); /* { dg-error {too few arguments to function 'svqdmulh'} } */ + svqdmulh (s16, s16, s16); /* { dg-error {too many arguments to function 'svqdmulh'} } */ + svqdmulh (pg, pg); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */ + svqdmulh (1, s16); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */ + svqdmulh (s16, s16); + svqdmulh (s16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */ + svqdmulh (s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */ + svqdmulh (s16, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16_t'} } */ + svqdmulh (s32, s32x2); /* { dg-error {passing tuple 'svint32x2_t' to argument 2 of 'svqdmulh' after passing single vector 'svint32_t' to argument 1} } */ + svqdmulh (s16, 0); + svqdmulh (f16, f16); /* { dg-error {'svqdmulh' has no form that takes 'svfloat16_t' arguments} } */ + svqdmulh (u16, u16); /* { dg-error {'svqdmulh' has no form that takes 'svuint16_t' arguments} } */ + + svqdmulh (s32x2, s32x2); /* { dg-error {ACLE function 'svqdmulh_s32_x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c new file mode 100644 index 0000000000000000000000000000000000000000..5cc8a4c5c50fcd663db2c43c16ae5aec9b1b14a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_single_n_2.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sme2" + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2, + svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2, + svint32x3_t s32x3, svint32x4_t s32x4, + svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, + svfloat32_t f32) + __arm_streaming +{ + svqdmulh (s16x2); /* { dg-error {too few arguments to function 'svqdmulh'} } */ + svqdmulh (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svqdmulh'} } */ + svqdmulh (pg, s16x2); /* { dg-error {'svqdmulh' has no form that takes 'svbool_t' arguments} } */ + svqdmulh (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svqdmulh', which expects an SVE type rather than a scalar} } */ + svqdmulh (s16, s16); + svqdmulh (s16x2, s16x2); + svqdmulh (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */ + svqdmulh (s16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svqdmulh', but argument 1 had type 'svint16x2_t'} } */ + svqdmulh (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */ + svqdmulh (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint32_t'} } */ + svqdmulh (s32x2, s32); + svqdmulh (s32x2, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */ + svqdmulh (s32x2, s32x4); /* { dg-error {passing mismatched tuple types 'svint32x2_t' and 'svint32x4_t' to arguments 1 and 2 of 'svqdmulh'} } */ + svqdmulh (s32x3, s32x2); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */ + svqdmulh (s32x3, s32x3); /* { dg-error {'svqdmulh' has no form that takes 'svint32x3_t' arguments} } */ + svqdmulh (s32x4, s32x2); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x2_t' to arguments 1 and 2 of 'svqdmulh'} } */ + svqdmulh (s32x4, s32x3); /* { dg-error {passing mismatched tuple types 'svint32x4_t' and 'svint32x3_t' to arguments 1 and 2 of 'svqdmulh'} } */ + svqdmulh (s32x4, s32x4); + svqdmulh (u32x2, u32x2); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */ + svqdmulh (u32x2, u32); /* { dg-error {'svqdmulh' has no form that takes 'svuint32x2_t' arguments} } */ + + svqdmulh (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svqdmulh', but argument 1 was a tuple of 'svint16_t'} } */ + svqdmulh (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svqdmulh', which expects an SVE type rather than a scalar type} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c new file mode 100644 index 0000000000000000000000000000000000000000..aa7633bb322f331ce57f293bfda2c7e124e63873 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_single_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sme2" + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svfloat16x2_t f16x2, svint16x2_t s16x2, svuint16x2_t u16x2, + svfloat32x2_t f32x2, svint32x2_t s32x2, svuint32x2_t u32x2, + svint32x3_t s32x3, svint32x4_t s32x4, + svint16_t s16, svuint16_t u16, svfloat16_t f16, svint32_t s32, + svuint32_t u32, svfloat32_t f32) + __arm_streaming +{ + svadd (s16x2); /* { dg-error {too few arguments to function 'svadd'} } */ + svadd (s16x2, s16x2, s16x2); /* { dg-error {too many arguments to function 'svadd'} } */ + svadd (pg, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */ + svadd (1, s16x2); /* { dg-error {passing 'int' to argument 1 of 'svadd', which expects an SVE type rather than a scalar} } */ + svadd (s16, s16); /* { dg-error {'svadd' has no form that takes 'svint16_t' arguments} } */ + svadd (s16x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */ + svadd (s16x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */ + svadd (s16x2, s16); + svadd (s16x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */ + svadd (s16x2, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */ + svadd (s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */ + svadd (s32x2, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint32_t'} } */ + svadd (s32x2, s32); + svadd (s32x3, s32); /* { dg-error {'svadd' has no form that takes 'svint32x3_t' arguments} } */ + svadd (s32x4, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd', which expects a single SVE vector rather than a tuple} } */ + svadd (f32x2, f32); /* { dg-error {'svadd' has no form that takes 'svfloat32x2_t' arguments} } */ + + svadd (s16x2, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd', but argument 1 was a tuple of 'svint16_t'} } */ + svadd (s16x2, 0); /* { dg-error {passing 'int' to argument 2 of 'svadd', which expects an SVE type rather than a scalar type} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c index 7e91a41cc42fea2ab725e0894e839cb08f75378e..44c3e48e9164f7681e61e7773c8e4896d5eb2c4e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c @@ -20,7 +20,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32, svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */ svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */ svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ - svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */ svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */ svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c new file mode 100644 index 0000000000000000000000000000000000000000..01cd88f180b84c1b5644ae81eb892c920dc58bc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_int_opt_single_1.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svint8_t s8, svuint8_t u8, + svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2, + svint8x3_t s8x3, svuint8x3_t u8x3, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint64x2_t s64x2, svuint64x2_t u64x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svusdot_za32_vg1x2 (1, u8x2); /* { dg-error {too few arguments to function 'svusdot_za32_vg1x2'} } */ + svusdot_za32_vg1x2 (1, u8x2, s8x2, s8x2); /* { dg-error {too many arguments to function 'svusdot_za32_vg1x2'} } */ + + svusdot_za32_vg1x2 (s8x2, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */ + svusdot_za32_vg1x2 (f, u8x2, s8x2); + svusdot_za32_vg1x2 (d, u8x2, s8x2); + svusdot_za32_vg1x2 (pg, u8x2, s8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_za32_vg1x2', which expects 'uint32_t'} } */ + + svusdot_za32_vg1x2 (1, 1, s8x2); /* { dg-error {passing 'int' to argument 2 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svusdot_za32_vg1x2 (1, pg, s8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, s8, s8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, u8x3, s8x3); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, u8x4, s8x4); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svusdot_za32_vg1x2 (1, u8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svusdot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svusdot_za32_vg1x2 (1, u8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */ + svusdot_za32_vg1x2 (1, u8x2, s16); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16_t' respectively} } */ + svusdot_za32_vg1x2 (1, u8x2, s16x2); /* { dg-error {arguments 2 and 3 of 'svusdot_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint16x2_t' respectively} } */ + svusdot_za32_vg1x2 (1, u8x2, s8); + svusdot_za32_vg1x2 (1, u8x2, s8x2); + svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a vector of signed integers} } */ + svusdot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svusdot_za32_vg1x2', which expects vectors of signed integers} } */ + svusdot_za32_vg1x2 (1, u8x2, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, u8x2, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svusdot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */ + svusdot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */ + + svusdot_za32_vg1x2 (1, u16x2, s16); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */ + svusdot_za32_vg1x2 (1, u16x2, s16x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */ + svusdot_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */ + svusdot_za32_vg1x2 (1, u64x2, s64x2); /* { dg-error {'svusdot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */ +} + +void +f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming +{ + svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za") +{ + svusdot_za32_vg1x2 (0, u8x2, s8x2); /* { dg-error {ACLE function 'svusdot_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..937d992b0541bd842ced39e84e5b8f61f248ffce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c @@ -0,0 +1,73 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint32x2_t s32x2, svuint32x2_t u32x2, + svint16x3_t s16x3, svuint16x3_t u16x3, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za32_vg2x1 (0, s16, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x1'} } */ + svmla_lane_za32_vg2x1 (0, s16, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x1'} } */ + + svmla_lane_za32_vg2x1 (s16, s16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */ + svmla_lane_za32_vg2x1 (f, s16, s16, 0); + svmla_lane_za32_vg2x1 (d, s16, s16, 0); + svmla_lane_za32_vg2x1 (pg, s16, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x1', which expects 'uint32_t'} } */ + + svmla_lane_za32_vg2x1 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x1 (0, pg, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svbool_t'} } */ + svmla_lane_za32_vg2x1 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */ + svmla_lane_za32_vg2x1 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */ + + svmla_lane_za32_vg2x1 (0, s16, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x1', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x1 (0, s16, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */ + svmla_lane_za32_vg2x1 (0, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */ + svmla_lane_za32_vg2x1 (0, s16, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svint16_t'} } */ + svmla_lane_za32_vg2x1 (0, s16, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x1', which expects a single SVE vector rather than a tuple} } */ + svmla_lane_za32_vg2x1 (0, u16, u16, 0); + svmla_lane_za32_vg2x1 (0, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x1', but argument 2 had type 'svuint16_t'} } */ + svmla_lane_za32_vg2x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svint32_t' arguments} } */ + svmla_lane_za32_vg2x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x1' has no form that takes 'svuint32_t' arguments} } */ + + svmla_lane_za32_vg2x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x1 (0, s16, s16, 7); + svmla_lane_za32_vg2x1 (0, s16, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x1', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x1 (0, s16, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x1' must be an integer constant expression} } */ +} + +void +f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming +{ + svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za") +{ + svmla_lane_za32_vg2x1 (0, s16, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x1' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, + svint64_t s64, svuint64_t u64) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za64_vg4x1 (0, s16, s16, 0); + svmla_lane_za64_vg4x1 (0, u16, u16, 0); + svmla_lane_za64_vg4x1 (0, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x1 (0, s16, s16, 7); + svmla_lane_za64_vg4x1 (0, u16, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x1', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x1 (0, s32, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint32_t' arguments} } */ + svmla_lane_za64_vg4x1 (0, u32, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint32_t' arguments} } */ + svmla_lane_za64_vg4x1 (0, s64, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint64_t' arguments} } */ + svmla_lane_za64_vg4x1 (0, u64, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint64_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c new file mode 100644 index 0000000000000000000000000000000000000000..126a764c998e1c2b8e6ed7e3230f82066b6ec10b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_2.c @@ -0,0 +1,78 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint32x2_t s32x2, svuint32x2_t u32x2, + svint16x3_t s16x3, svuint16x3_t u16x3, + svint16x4_t s16x4, svuint16x4_t u16x4, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za32_vg2x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x2'} } */ + svmla_lane_za32_vg2x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x2'} } */ + + svmla_lane_za32_vg2x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */ + svmla_lane_za32_vg2x2 (f, s16x2, s16, 0); + svmla_lane_za32_vg2x2 (d, s16x2, s16, 0); + svmla_lane_za32_vg2x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x2', which expects 'uint32_t'} } */ + + svmla_lane_za32_vg2x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */ + svmla_lane_za32_vg2x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */ + svmla_lane_za32_vg2x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */ + svmla_lane_za32_vg2x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svmla_lane_za32_vg2x2', which expects a tuple of 2 vectors} } */ + + svmla_lane_za32_vg2x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x2', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svmla_lane_za32_vg2x2', which expects a single SVE vector rather than a tuple} } */ + svmla_lane_za32_vg2x2 (0, u16x2, u16, 0); + svmla_lane_za32_vg2x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x2', but argument 2 was a tuple of 'svuint16_t'} } */ + svmla_lane_za32_vg2x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svint32x2_t' arguments} } */ + svmla_lane_za32_vg2x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x2' has no form that takes 'svuint32x2_t' arguments} } */ + + svmla_lane_za32_vg2x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x2 (0, s16x2, s16, 7); + svmla_lane_za32_vg2x2 (0, s16x2, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x2', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x2' must be an integer constant expression} } */ +} + +void +f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming +{ + svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za") +{ + svmla_lane_za32_vg2x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x2' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint16_t s16, svuint16_t u16, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint32_t s32, svuint32_t u32, + svint32x2_t s32x2, svuint32x2_t u32x2, + svint64_t s64, svuint64_t u64, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za64_vg4x2 (0, s16x2, s16, 0); + svmla_lane_za64_vg4x2 (0, u16x2, u16, 0); + svmla_lane_za64_vg4x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x2 (0, s16x2, s16, 7); + svmla_lane_za64_vg4x2 (0, u16x2, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x2', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint32x2_t' arguments} } */ + svmla_lane_za64_vg4x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint32x2_t' arguments} } */ + svmla_lane_za64_vg4x2 (0, s64x2, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svint64x2_t' arguments} } */ + svmla_lane_za64_vg4x2 (0, u64x2, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x2' has no form that takes 'svuint64x2_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c new file mode 100644 index 0000000000000000000000000000000000000000..17bed0c72dc5e6a877f41e78b1154d0084dd4d89 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_3.c @@ -0,0 +1,78 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint16x3_t s16x3, svuint16x3_t u16x3, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32x4_t s32x4, svuint32x4_t u32x4, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za32_vg2x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg2x4'} } */ + svmla_lane_za32_vg2x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg2x4'} } */ + + svmla_lane_za32_vg2x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */ + svmla_lane_za32_vg2x4 (f, s16x4, s16, 0); + svmla_lane_za32_vg2x4 (d, s16x4, s16, 0); + svmla_lane_za32_vg2x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svmla_lane_za32_vg2x4', which expects 'uint32_t'} } */ + + svmla_lane_za32_vg2x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */ + svmla_lane_za32_vg2x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */ + svmla_lane_za32_vg2x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */ + svmla_lane_za32_vg2x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svmla_lane_za32_vg2x4', which expects a tuple of 4 vectors} } */ + + svmla_lane_za32_vg2x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane_za32_vg2x4', which expects an SVE type rather than a scalar type} } */ + svmla_lane_za32_vg2x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svint16_t'} } */ + svmla_lane_za32_vg2x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svmla_lane_za32_vg2x4', which expects a single SVE vector rather than a tuple} } */ + svmla_lane_za32_vg2x4 (0, u16x4, u16, 0); + svmla_lane_za32_vg2x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_lane_za32_vg2x4', but argument 2 was a tuple of 'svuint16_t'} } */ + svmla_lane_za32_vg2x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svint32x4_t' arguments} } */ + svmla_lane_za32_vg2x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za32_vg2x4' has no form that takes 'svuint32x4_t' arguments} } */ + + svmla_lane_za32_vg2x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x4 (0, s16x4, s16, 7); + svmla_lane_za32_vg2x4 (0, s16x4, s16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za32_vg2x4', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za32_vg2x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svmla_lane_za32_vg2x4' must be an integer constant expression} } */ +} + +void +f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming +{ + svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za") +{ + svmla_lane_za32_vg2x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svmla_lane_za32_s16_vg2x4' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint16_t s16, svuint16_t u16, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32_t s32, svuint32_t u32, + svint32x4_t s32x4, svuint32x4_t u32x4, + svint64_t s64, svuint64_t u64, + svint64x4_t s64x4, svuint64x4_t u64x4) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za64_vg4x4 (0, s16x4, s16, 0); + svmla_lane_za64_vg4x4 (0, u16x4, u16, 0); + svmla_lane_za64_vg4x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x4 (0, s16x4, s16, 7); + svmla_lane_za64_vg4x4 (0, u16x4, u16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane_za64_vg4x4', which expects a value in the range \[0, 7\]} } */ + svmla_lane_za64_vg4x4 (0, s32x4, s32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint32x4_t' arguments} } */ + svmla_lane_za64_vg4x4 (0, u32x4, u32, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint32x4_t' arguments} } */ + svmla_lane_za64_vg4x4 (0, s64x4, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svint64x4_t' arguments} } */ + svmla_lane_za64_vg4x4 (0, u64x4, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x4' has no form that takes 'svuint64x4_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c new file mode 100644 index 0000000000000000000000000000000000000000..d2a67c678dff591086e4aef9b72a86d0b2f76ac6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_4.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, svfloat32_t f32, + svint32x2_t s32x2, svuint32x2_t u32x2, svfloat32x2_t f32x2, int i) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za32_vg4x1 (0, s8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */ + svmla_lane_za32_vg4x1 (0, u8, u8, 0); + svmla_lane_za32_vg4x1 (0, s8, s8, 15); + svmla_lane_za32_vg4x1 (0, u8, u8, 16); /* { dg-error {passing 16 to argument 4 of 'svmla_lane_za32_vg4x1', which expects a value in the range \[0, 15\]} } */ + svmla_lane_za32_vg4x1 (0, s16, s16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svint16_t' arguments} } */ + svmla_lane_za32_vg4x1 (0, u16, u16, 0); /* { dg-error {'svmla_lane_za32_vg4x1' has no form that takes 'svuint16_t' arguments} } */ + + svmla_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svmla_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svmla_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svmla_lane_za32_vg1x2 (0, f32x2, f32, 0); + svmla_lane_za32_vg1x2 (0, f32x2, f32, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svmla_lane_za32_vg1x2 (0, f32x2, f32, 4); /* { dg-error {passing 4 to argument 4 of 'svmla_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svmla_lane_za32_vg1x2 (0, f32x2, f32, i); /* { dg-error {argument 4 of 'svmla_lane_za32_vg1x2' must be an integer constant expression} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c new file mode 100644 index 0000000000000000000000000000000000000000..8307a2813dda49211b06b4cd3bb153231f4cd1c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32, + svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2, + svint32x3_t s32x3, svuint32x3_t u32x3, + svint32x4_t s32x4, svuint32x4_t u32x4, + svint64x2_t s64x2, svuint64x2_t u64x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svadd_write_za32_vg1x2 (1, s32x2); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x2'} } */ + svadd_write_za32_vg1x2 (1, s32x2, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x2'} } */ + + svadd_write_za32_vg1x2 (s32x2, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */ + svadd_write_za32_vg1x2 (f, s32x2, s32x2); + svadd_write_za32_vg1x2 (d, s32x2, s32x2); + svadd_write_za32_vg1x2 (pg, s32x2, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x2', which expects 'uint32_t'} } */ + + svadd_write_za32_vg1x2 (1, 1, s32x2); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svadd_write_za32_vg1x2 (1, pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32, s32x2); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svadd_write_za32_vg1x2 (1, s32x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svadd_write_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svadd_write_za32_vg1x2 (1, s32x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */ + svadd_write_za32_vg1x2 (1, s32x2, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */ + svadd_write_za32_vg1x2 (1, s32x2, s16x2); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */ + svadd_write_za32_vg1x2 (1, s32x2, s32); + svadd_write_za32_vg1x2 (1, s32x2, s32x2); + svadd_write_za32_vg1x2 (1, s32x2, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32x2, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32x2, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svint32_t'} } */ + svadd_write_za32_vg1x2 (1, s32x2, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svint32x2_t'} } */ + svadd_write_za32_vg1x2 (1, s32x2, u32x3); /* { dg-error {passing 'svuint32x3_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, s32x2, u32x4); /* { dg-error {passing 'svuint32x4_t' to argument 3 of 'svadd_write_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_write_za32_vg1x2 (1, u32x2, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 was a tuple of 'svuint32_t'} } */ + svadd_write_za32_vg1x2 (1, u32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 3 of 'svadd_write_za32_vg1x2', but argument 2 had type 'svuint32x2_t'} } */ + svadd_write_za32_vg1x2 (1, u32x2, u32); + svadd_write_za32_vg1x2 (1, u32x2, u32x2); + + svadd_write_za32_vg1x2 (1, s16x2, s16); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + svadd_write_za32_vg1x2 (1, s16x2, s16x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + svadd_write_za32_vg1x2 (1, s64x2, s64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */ + svadd_write_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svadd_write_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */ +} + +void +f2 (svint32x2_t s32x2) __arm_streaming +{ + svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint32x2_t s32x2) __arm_inout("za") +{ + svadd_write_za32_vg1x2 (0, s32x2, s32x2); /* { dg-error {ACLE function 'svadd_write_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint32x2_t s32x2, svuint32x2_t u32x2, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svadd_write_za64_vg1x2 (1, s64x2, s64x2); + svadd_write_za64_vg1x2 (1, u64x2, u64x2); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c new file mode 100644 index 0000000000000000000000000000000000000000..181f509eee13eb7e70ddba019f5b2c36cd10a31f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint16_t s16, svint32_t s32, svuint32_t u32, + svint16x2_t s16x2, svint32x2_t s32x2, svuint32x2_t u32x2, + svint32x3_t s32x3, svuint32x3_t u32x3, + svint32x4_t s32x4, svuint32x4_t u32x4, + svint64x2_t s64x2, svuint64x2_t u64x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svadd_write_za32_vg1x4 (1, s32x4); /* { dg-error {too few arguments to function 'svadd_write_za32_vg1x4'} } */ + svadd_write_za32_vg1x4 (1, s32x4, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_write_za32_vg1x4'} } */ + + svadd_write_za32_vg1x4 (s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */ + svadd_write_za32_vg1x4 (f, s32x4, s32x4); + svadd_write_za32_vg1x4 (d, s32x4, s32x4); + svadd_write_za32_vg1x4 (pg, s32x4, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_write_za32_vg1x4', which expects 'uint32_t'} } */ + + svadd_write_za32_vg1x4 (1, 1, s32x4); /* { dg-error {passing 'int' to argument 2 of 'svadd_write_za32_vg1x4', which expects an SVE type rather than a scalar} } */ + svadd_write_za32_vg1x4 (1, pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_write_za32_vg1x4 (1, s32, s32x4); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_write_za32_vg1x4 (1, s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_write_za32_vg1x4 (1, s32x3, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_write_za32_vg1x4', which expects a tuple of 4 vectors} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c new file mode 100644 index 0000000000000000000000000000000000000000..8c8414ec55cf43ff10a0291f6f901f65d332646f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2+nosme-i16i64") + +void +f1 (svint32x2_t s32x2, svuint32x2_t u32x2, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svadd_write_za64_vg1x2 (1, s32x2, s32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svadd_write_za64_vg1x2 (1, u32x2, u32x2); /* { dg-error {'svadd_write_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svadd_write_za64_vg1x2 (1, s64x2, s64x2); /* { dg-error {ACLE function 'svadd_write_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */ + svadd_write_za64_vg1x2 (1, u64x2, u64x2); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b00c04320bf06c72f62ec3f249952d235991180e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_uint_opt_single_1.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svuint16_t u16, svint8_t s8, svuint8_t u8, + svint16x2_t s16x2, svuint16x2_t u16x2, svint8x2_t s8x2, svuint8x2_t u8x2, + svint8x3_t s8x3, svuint8x3_t u8x3, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint64x2_t s64x2, svuint64x2_t u64x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svsudot_za32_vg1x2 (1, s8x2); /* { dg-error {too few arguments to function 'svsudot_za32_vg1x2'} } */ + svsudot_za32_vg1x2 (1, s8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsudot_za32_vg1x2'} } */ + + svsudot_za32_vg1x2 (s8x2, s8x2, u8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */ + svsudot_za32_vg1x2 (f, s8x2, u8x2); + svsudot_za32_vg1x2 (d, s8x2, u8x2); + svsudot_za32_vg1x2 (pg, s8x2, u8x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_za32_vg1x2', which expects 'uint32_t'} } */ + + svsudot_za32_vg1x2 (1, 1, u8x2); /* { dg-error {passing 'int' to argument 2 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svsudot_za32_vg1x2 (1, pg, u8x2); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8, u8x2); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8x3, u8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8x4, u8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svsudot_za32_vg1x2 (1, s8x2, 1); /* { dg-error {passing 'int' to argument 3 of 'svsudot_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svsudot_za32_vg1x2 (1, s8x2, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */ + svsudot_za32_vg1x2 (1, s8x2, u16); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16_t' respectively} } */ + svsudot_za32_vg1x2 (1, s8x2, u16x2); /* { dg-error {arguments 2 and 3 of 'svsudot_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint16x2_t' respectively} } */ + svsudot_za32_vg1x2 (1, s8x2, u8); + svsudot_za32_vg1x2 (1, s8x2, u8x2); + svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8x2, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a vector of unsigned integers} } */ + svsudot_za32_vg1x2 (1, s8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsudot_za32_vg1x2', which expects vectors of unsigned integers} } */ + svsudot_za32_vg1x2 (1, s8x2, u8x3); /* { dg-error {passing 'svuint8x3_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, s8x2, u8x4); /* { dg-error {passing 'svuint8x4_t' to argument 3 of 'svsudot_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_za32_vg1x2 (1, u8x2, u8); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */ + svsudot_za32_vg1x2 (1, u8x2, u8x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */ + + svsudot_za32_vg1x2 (1, s16x2, u16); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + svsudot_za32_vg1x2 (1, s16x2, u16x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + svsudot_za32_vg1x2 (1, s64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */ + svsudot_za32_vg1x2 (1, u64x2, u64x2); /* { dg-error {'svsudot_za32_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */ +} + +void +f2 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_streaming +{ + svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint8x2_t s8x2, svuint8x2_t u8x2) __arm_inout("za") +{ + svsudot_za32_vg1x2 (0, s8x2, u8x2); /* { dg-error {ACLE function 'svsudot_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..98b2433ce72a1af2efaa33a3e427b1e5c2db8645 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_1.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +void +f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16, + svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4) +{ + svsel (pg, u8); /* { dg-error {too few arguments to function 'svsel'} } */ + svsel (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svsel'} } */ + svsel (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */ + svsel (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */ + svsel (pn, u8, u8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */ + svsel (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsel', but argument 2 had type 'svbool_t'} } */ + svsel (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */ + svsel (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8_t'} } */ + svsel (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */ + svsel (pg, pg, pg); + svsel (pg, u8, u8); + svsel (pg, u8, u8x2); /* { dg-error {passing tuple 'svuint8x2_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */ + svsel (pg, u8, u8x3); /* { dg-error {passing tuple 'svuint8x3_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */ + svsel (pg, u8, u8x4); /* { dg-error {passing tuple 'svuint8x4_t' to argument 3 of 'svsel' after passing single vector 'svuint8_t' to argument 2} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c new file mode 100644 index 0000000000000000000000000000000000000000..600b7fc7959d1c8b42ea6c6d843349422677ad24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binaryxn_2.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +void +f1 (svbool_t pg, svcount_t pn, svuint8_t u8, svint16_t s16, + svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4, + svuint8x2_t u8x2, svuint8x3_t u8x3, svuint8x4_t u8x4, + svuint16x2_t u16x2) __arm_streaming +{ + svsel (pn, u8x2); /* { dg-error {too few arguments to function 'svsel'} } */ + svsel (pn, u8x2, u8x2, u8x2); /* { dg-error {too many arguments to function 'svsel'} } */ + svsel (0, u8x2, u8x2); /* { dg-error {passing 'int' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */ + svsel (u8x2, u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsel', which expects an 'svbool_t' or 'svcount_t'} } */ + svsel (pg, u8x2, u8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */ + svsel (pn, u8x2, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */ + svsel (pn, u8x2, u16x2); /* { dg-error {passing 'svuint16x2_t' to argument 3 of 'svsel', but argument 2 had type 'svuint8x2_t'} } */ + svsel (pn, u8x2, 0); /* { dg-error {passing 'int' to argument 3 of 'svsel', which expects an SVE type rather than a scalar} } */ + svsel (pn, u8x2, u8); /* { dg-error {passing single vector 'svuint8_t' to argument 3 of 'svsel' after passing tuple 'svuint8x2_t' to argument 2} } */ + svsel (pn, u8x2, u8x2); + svsel (pn, u8x2, u8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x3_t' to arguments 2 and 3 of 'svsel'} } */ + svsel (pn, u8x2, s8x3); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svint8x3_t' to arguments 2 and 3 of 'svsel'} } */ + svsel (pn, u8x2, u8x4); /* { dg-error {passing mismatched tuple types 'svuint8x2_t' and 'svuint8x4_t' to arguments 2 and 3 of 'svsel'} } */ + svsel (pn, s8x4, s8x2); /* { dg-error {passing mismatched tuple types 'svint8x4_t' and 'svint8x2_t' to arguments 2 and 3 of 'svsel'} } */ +} + +void +f2 (svcount_t pn, svuint8x2_t u8x2) +{ + svsel (pn, u8x2, u8x2); /* { dg-error {ACLE function 'svsel_u8_x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c new file mode 100644 index 0000000000000000000000000000000000000000..342bebc07d6cec6dfa3aac52e0a145b93c969c85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +void +f1 (svcount_t pn, svfloat16_t f16, svint16_t s16, svfloat32_t f32, + svfloat16x2_t f16x2, svfloat16x3_t f16x3, svfloat16x4_t f16x4) + __arm_streaming +{ + svclamp (f16, f16); /* { dg-error {too few arguments to function 'svclamp'} } */ + svclamp (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svclamp'} } */ + svclamp (0, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svclamp', which expects an SVE type rather than a scalar type} } */ + svclamp (f16, f16, f16); + svclamp (s16, s16, s16); /* { dg-error {'svclamp' has no form that takes 'svint16_t' arguments} } */ + svclamp (pn, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svclamp', but argument 1 had type 'svcount_t'} } */ + svclamp (f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */ + svclamp (f16, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */ + svclamp (f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */ + svclamp (f16, f16, 0); /* { dg-error {passing 'int' to argument 3 of 'svclamp', which expects an SVE type rather than a scalar} } */ + svclamp (f16, f16x2, f16); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */ + svclamp (f16, f16x4, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */ + svclamp (f16, f16, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */ + svclamp (f16, f16, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svclamp', which expects a single SVE vector rather than a tuple} } */ + + svclamp (f16x2, f16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svclamp', which expects a single SVE vector rather than a tuple} } */ + svclamp (f16x2, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */ + svclamp (f16x2, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svclamp', but argument 1 was a tuple of 'svfloat16_t'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c new file mode 100644 index 0000000000000000000000000000000000000000..47077f7a4e5b48d6efb2f5b18994c2c0dd79f25f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_count_1.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> +#include <stdbool.h> + +#pragma GCC target "+sme2" + +enum signed_enum { SA = -1, SB }; +enum unsigned_enum { UA, UB }; + +void +test (int32_t s32, int64_t s64, uint16_t u16, uint32_t u32, uint64_t u64, + bool b, int *ptr, float f32, svbool_t pg, svint32_t vec) + __arm_streaming +{ + svwhilele_c8 (s64, 2); /* { dg-error {too few arguments to function 'svwhilele_c8'} } */ + svwhilele_c8 (s64, s64, 2, 2); /* { dg-error {too many arguments to function 'svwhilele_c8'} } */ + + svwhilele_c8 (b, b, 2); /* { dg-error {passing '_Bool' and '_Bool' to arguments 1 and 2 of 'svwhilele_c8', which expects a pair of 64-bit integers} } */ + svwhilele_c8 (u16, u16, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (ptr, ptr, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (f32, f32, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (pg, pg, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (vec, vec, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (0, 0, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + svwhilele_c8 (s32, s32, 2); /* { dg-error {expects a pair of 64-bit integers} } */ + + svwhilele_c8 (0, s64, 2); + svwhilele_c8 (0U, s64, 2); + svwhilele_c8 (0, u64, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (0U, u64, 2); + + svwhilele_c8 (s32, s64, 2); + svwhilele_c8 (u32, s64, 2); + svwhilele_c8 (s32, u64, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (u32, u64, 2); + + svwhilele_c8 (s64, s64, 2); + svwhilele_c8 (u64, s64, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (s64, u64, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (u64, u64, 2); + + svwhilele_c8 (s64, 0, 2); + svwhilele_c8 (s64, 0U, 2); + svwhilele_c8 (u64, 0, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (u64, 0U, 2); + + svwhilele_c8 (s64, s32, 2); + svwhilele_c8 (s64, u32, 2); + svwhilele_c8 (u64, s32, 2); /* { dg-error {mismatched integer types} } */ + svwhilele_c8 (u64, u32, 2); + + svwhilele_c8 (u64, u64, u64); /* { dg-error {argument 3 of 'svwhilele_c8' must be an integer constant expression} } */ + svwhilele_c8 (u64, u64, 1); /* { dg-error {passing 1 to argument 3 of 'svwhilele_c8', which expects either 2 or 4} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c index 7a617aa1563b0516b2cd6fdbcb06a5a73b0da663..22b031ad4db07cf206ad6c5a14d889d00a0b86af 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c @@ -15,7 +15,7 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */ *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */ - *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate2 (pg, pg); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svboolx2_t'} } */ *ptr = svcreate2 (u8, u8); *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..ca2a039b3903cccb7f98913004750a0086c22c81 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_int_lane_1.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, + svint8x2_t s8x2, svuint8x2_t u8x2, + svint8x3_t s8x3, svuint8x3_t u8x3, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint16x2_t s16x2, svuint16x2_t u16x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svusdot_lane_za32_vg1x2 (0, u8x2, s8); /* { dg-error {too few arguments to function 'svusdot_lane_za32_vg1x2'} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane_za32_vg1x2'} } */ + + svusdot_lane_za32_vg1x2 (u8x2, u8x2, s8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + svusdot_lane_za32_vg1x2 (f, u8x2, s8, 0); + svusdot_lane_za32_vg1x2 (d, u8x2, s8, 0); + svusdot_lane_za32_vg1x2 (pg, u8x2, s8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svusdot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + + svusdot_lane_za32_vg1x2 (0, 1, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svusdot_lane_za32_vg1x2 (0, pg, s8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_lane_za32_vg1x2 (0, u8, s8, 0); /* { dg-error {passing single vector 'svuint8_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_lane_za32_vg1x2 (0, u8x3, s8, 0); /* { dg-error {passing 'svuint8x3_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svusdot_lane_za32_vg1x2 (0, u8x4, s8, 0); /* { dg-error {passing 'svuint8x4_t' to argument 2 of 'svusdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svusdot_lane_za32_vg1x2 (0, u8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a vector of signed integers} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s32, 0); /* { dg-error {arguments 2 and 3 of 'svusdot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svuint8x2_t' and 'svint32_t' respectively} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8x2, 0); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svusdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); + svusdot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svint8x2_t' arguments} } */ + svusdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {'svusdot_lane_za32_vg1x2' has no form that takes 'svuint16x2_t' arguments} } */ + + svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3); + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 3); + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, f); /* { dg-error {argument 4 of 'svusdot_lane_za32_vg1x2' must be an integer constant expression} } */ +} + +void +f2 (svuint8x2_t u8x2, svint8_t s8) __arm_streaming +{ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svuint8x2_t u8x2, svint8_t s8) __arm_inout("za") +{ + svusdot_lane_za32_vg1x2 (0, u8x2, s8, 0); /* { dg-error {ACLE function 'svusdot_lane_za32_u8_vg1x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..e37d24ab6087f68da92f31ddeb5160f676b2d432 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_1.c @@ -0,0 +1,83 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, + svint8x2_t s8x2, svuint8x2_t u8x2, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint16x3_t s16x3, svuint16x3_t u16x3, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32x2_t s32x2, svuint32x2_t u32x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svdot_lane_za32_vg1x2 (0, s16x2, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x2'} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x2'} } */ + + svdot_lane_za32_vg1x2 (s16x2, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + svdot_lane_za32_vg1x2 (f, s16x2, s16, 0); + svdot_lane_za32_vg1x2 (d, s16x2, s16, 0); + svdot_lane_za32_vg1x2 (pg, s16x2, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + + svdot_lane_za32_vg1x2 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svdot_lane_za32_vg1x2 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svdot_lane_za32_vg1x2 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svdot_lane_za32_vg1x2 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svdot_lane_za32_vg1x2 (0, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 2 of 'svdot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svdot_lane_za32_vg1x2 (0, s16x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svdot_lane_za32_vg1x2 (0, s16x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s16x2, 0); /* { dg-error {passing 'svint16x2_t' to argument 3 of 'svdot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */ + svdot_lane_za32_vg1x2 (0, u16x2, u16, 0); + svdot_lane_za32_vg1x2 (0, u16x2, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x2', but argument 2 was a tuple of 'svuint16_t'} } */ + svdot_lane_za32_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svdot_lane_za32_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + + svdot_lane_za32_vg1x2 (0, s8x2, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x2 (0, s8x2, s8, 3); + svdot_lane_za32_vg1x2 (0, s8x2, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s16, 3); + svdot_lane_za32_vg1x2 (0, s16x2, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x2 (0, s16x2, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x2' must be an integer constant expression} } */ +} + +void +f2 (svint16x2_t s16x2, svint16_t s16) __arm_streaming +{ + svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint16x2_t s16x2, svint16_t s16) __arm_inout("za") +{ + svdot_lane_za32_vg1x2 (0, s16x2, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x2' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint16_t s16, svuint16_t u16, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint32_t s32, svuint32_t u32, + svint32x2_t s32x2, svuint32x2_t u32x2, + svint64_t s64, svuint64_t u64, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svdot_lane_za64_vg1x2 (0, s16x2, s16, 0); + svdot_lane_za64_vg1x2 (0, u16x2, u16, 0); + svdot_lane_za64_vg1x2 (0, s16x2, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */ + svdot_lane_za64_vg1x2 (0, s16x2, s16, 1); + svdot_lane_za64_vg1x2 (0, u16x2, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x2', which expects a value in the range \[0, 1\]} } */ + svdot_lane_za64_vg1x2 (0, s32x2, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svdot_lane_za64_vg1x2 (0, u32x2, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svdot_lane_za64_vg1x2 (0, s64x2, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svint64x2_t' arguments} } */ + svdot_lane_za64_vg1x2 (0, u64x2, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x2' has no form that takes 'svuint64x2_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c new file mode 100644 index 0000000000000000000000000000000000000000..7af3c6f9eef9cfade446583b0ef13c483ffc7d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_lane_2.c @@ -0,0 +1,83 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint16x3_t s16x3, svuint16x3_t u16x3, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32x4_t s32x4, svuint32x4_t u32x4, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svdot_lane_za32_vg1x4 (0, s16x4, s16); /* { dg-error {too few arguments to function 'svdot_lane_za32_vg1x4'} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s16, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane_za32_vg1x4'} } */ + + svdot_lane_za32_vg1x4 (s16x4, s16x4, s16, 0); /* { dg-error {passing 'svint16x4_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */ + svdot_lane_za32_vg1x4 (f, s16x4, s16, 0); + svdot_lane_za32_vg1x4 (d, s16x4, s16, 0); + svdot_lane_za32_vg1x4 (pg, s16x4, s16, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svdot_lane_za32_vg1x4', which expects 'uint32_t'} } */ + + svdot_lane_za32_vg1x4 (0, 1, s16, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */ + svdot_lane_za32_vg1x4 (0, pg, s16, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svdot_lane_za32_vg1x4 (0, s16, s16, 0); /* { dg-error {passing single vector 'svint16_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svdot_lane_za32_vg1x4 (0, s16x2, s16, 0); /* { dg-error {passing 'svint16x2_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svdot_lane_za32_vg1x4 (0, s16x3, s16, 0); /* { dg-error {passing 'svint16x3_t' to argument 2 of 'svdot_lane_za32_vg1x4', which expects a tuple of 4 vectors} } */ + + svdot_lane_za32_vg1x4 (0, s16x4, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_za32_vg1x4', which expects an SVE type rather than a scalar type} } */ + svdot_lane_za32_vg1x4 (0, s16x4, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x4 (0, s16x4, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s32, 0); /* { dg-error {passing 'svint32_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svint16_t'} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s16x4, 0); /* { dg-error {passing 'svint16x4_t' to argument 3 of 'svdot_lane_za32_vg1x4', which expects a single SVE vector rather than a tuple} } */ + svdot_lane_za32_vg1x4 (0, u16x4, u16, 0); + svdot_lane_za32_vg1x4 (0, u16x4, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane_za32_vg1x4', but argument 2 was a tuple of 'svuint16_t'} } */ + svdot_lane_za32_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svint32x4_t' arguments} } */ + svdot_lane_za32_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za32_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */ + + svdot_lane_za32_vg1x4 (0, s8x4, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x4 (0, s8x4, s8, 3); + svdot_lane_za32_vg1x4 (0, s8x4, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s16, 3); + svdot_lane_za32_vg1x4 (0, s16x4, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_za32_vg1x4', which expects a value in the range \[0, 3\]} } */ + svdot_lane_za32_vg1x4 (0, s16x4, s16, f); /* { dg-error {argument 4 of 'svdot_lane_za32_vg1x4' must be an integer constant expression} } */ +} + +void +f2 (svint16x4_t s16x4, svint16_t s16) __arm_streaming +{ + svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint16x4_t s16x4, svint16_t s16) __arm_inout("za") +{ + svdot_lane_za32_vg1x4 (0, s16x4, s16, 0); /* { dg-error {ACLE function 'svdot_lane_za32_s16_vg1x4' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint16_t s16, svuint16_t u16, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32_t s32, svuint32_t u32, + svint32x4_t s32x4, svuint32x4_t u32x4, + svint64_t s64, svuint64_t u64, + svint64x4_t s64x4, svuint64x4_t u64x4) + __arm_streaming __arm_inout("za") +{ + svdot_lane_za64_vg1x4 (0, s16x4, s16, 0); + svdot_lane_za64_vg1x4 (0, u16x4, u16, 0); + svdot_lane_za64_vg1x4 (0, s16x4, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */ + svdot_lane_za64_vg1x4 (0, s16x4, s16, 1); + svdot_lane_za64_vg1x4 (0, u16x4, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane_za64_vg1x4', which expects a value in the range \[0, 1\]} } */ + svdot_lane_za64_vg1x4 (0, s32x4, s32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint32x4_t' arguments} } */ + svdot_lane_za64_vg1x4 (0, u32x4, u32, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint32x4_t' arguments} } */ + svdot_lane_za64_vg1x4 (0, s64x4, s64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svint64x4_t' arguments} } */ + svdot_lane_za64_vg1x4 (0, u64x4, u64, 0); /* { dg-error {'svdot_lane_za64_vg1x4' has no form that takes 'svuint64x4_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..2efa2eb15ee0f713815b1d4a83dbdcbfb8a3cf87 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_za_slice_uint_lane_1.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, + svint8x2_t s8x2, svuint8x2_t u8x2, + svint8x3_t s8x3, svuint8x3_t u8x3, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint16x2_t s16x2, svuint16x2_t u16x2, + float f, double d) + __arm_streaming __arm_inout("za") +{ + svsudot_lane_za32_vg1x2 (0, s8x2, u8); /* { dg-error {too few arguments to function 'svsudot_lane_za32_vg1x2'} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane_za32_vg1x2'} } */ + + svsudot_lane_za32_vg1x2 (u8x2, s8x2, u8, 0); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + svsudot_lane_za32_vg1x2 (f, s8x2, u8, 0); + svsudot_lane_za32_vg1x2 (d, s8x2, u8, 0); + svsudot_lane_za32_vg1x2 (pg, s8x2, u8, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svsudot_lane_za32_vg1x2', which expects 'uint32_t'} } */ + + svsudot_lane_za32_vg1x2 (0, 1, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svsudot_lane_za32_vg1x2 (0, pg, u8, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_lane_za32_vg1x2 (0, s8, u8, 0); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_lane_za32_vg1x2 (0, s8x3, u8, 0); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svsudot_lane_za32_vg1x2 (0, s8x4, u8, 0); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svsudot_lane_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svsudot_lane_za32_vg1x2 (0, s8x2, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, pg, 0); /* { dg-error {passing 'svbool_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a vector of unsigned integers} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u32, 0); /* { dg-error {arguments 2 and 3 of 'svsudot_lane_za32_vg1x2' must have the same element size, but the values passed here have type 'svint8x2_t' and 'svuint32_t' respectively} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8x2, 0); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svsudot_lane_za32_vg1x2', which expects a single SVE vector rather than a tuple} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); + svsudot_lane_za32_vg1x2 (0, u8x2, u8, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svuint8x2_t' arguments} } */ + svsudot_lane_za32_vg1x2 (0, s16x2, u16, 0); /* { dg-error {'svsudot_lane_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + + svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3); + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 3); + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane_za32_vg1x2', which expects a value in the range \[0, 3\]} } */ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, f); /* { dg-error {argument 4 of 'svsudot_lane_za32_vg1x2' must be an integer constant expression} } */ +} + +void +f2 (svint8x2_t s8x2, svuint8_t u8) __arm_streaming +{ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint8x2_t s8x2, svuint8_t u8) __arm_inout("za") +{ + svsudot_lane_za32_vg1x2 (0, s8x2, u8, 0); /* { dg-error {ACLE function 'svsudot_lane_za32_s8_vg1x2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..ab5602f0aa69e1a3a02546f48739e56ffb6f968f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowxn_1.c @@ -0,0 +1,89 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("+sme2") + +void +f1 (svboolx2_t pgx2, + svint8x2_t s8x2, svuint8x2_t u8x2, + svint8x4_t s8x4, svuint8x4_t u8x4, + svint16x2_t s16x2, svuint16x2_t u16x2, + svint16x4_t s16x4, svuint16x4_t u16x4, + svint32x2_t s32x2, svuint32x2_t u32x2, + svint32x4_t s32x4, svuint32x4_t u32x4, + svint64x2_t s64x2, svuint64x2_t u64x2, + svint64x4_t s64x4, svuint64x4_t u64x4, + svfloat32x2_t f32x2, int x) __arm_streaming +{ + const int one = 1; + svqrshr_u8 (u32x4); /* { dg-error {too few arguments to function 'svqrshr_u8'} } */ + svqrshr_u8 (u32x4, 1, 1); /* { dg-error {too many arguments to function 'svqrshr_u8'} } */ + + svqrshr_u8 (u32x4, x); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */ + svqrshr_u8 (u32x4, one); /* { dg-error {argument 2 of 'svqrshr_u8' must be an integer constant expression} } */ + svqrshr_u8 (u32x4, 0.4); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */ + svqrshr_u8 (u32x4, 1.0); + + svqrshr_u8 (pgx2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svboolx2_t' arguments} } */ + svqrshr_u8 (u8x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x2_t' arguments} } */ + svqrshr_u8 (u8x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint8x4_t' arguments} } */ + svqrshr_u8 (u16x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x2_t' arguments} } */ + svqrshr_u8 (u16x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint16x4_t' arguments} } */ + svqrshr_u8 (u32x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint32x2_t' arguments} } */ + svqrshr_u8 (u32x4, 1); + svqrshr_u8 (u64x2, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x2_t' arguments} } */ + svqrshr_u8 (u64x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svuint64x4_t' arguments} } */ + svqrshr_u8 (s32x4, 1); /* { dg-error {'svqrshr_u8' has no form that takes 'svint32x4_t' arguments} } */ + + svqrshr_s8 (s8x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x2_t' arguments} } */ + svqrshr_s8 (s8x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint8x4_t' arguments} } */ + svqrshr_s8 (s16x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x2_t' arguments} } */ + svqrshr_s8 (s16x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint16x4_t' arguments} } */ + svqrshr_s8 (s32x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint32x2_t' arguments} } */ + svqrshr_s8 (s32x4, 1); + svqrshr_s8 (s64x2, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x2_t' arguments} } */ + svqrshr_s8 (s64x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svint64x4_t' arguments} } */ + svqrshr_s8 (u32x4, 1); /* { dg-error {'svqrshr_s8' has no form that takes 'svuint32x4_t' arguments} } */ + + svqrshr_u16 (pgx2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svboolx2_t' arguments} } */ + svqrshr_u16 (u8x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x2_t' arguments} } */ + svqrshr_u16 (u8x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint8x4_t' arguments} } */ + svqrshr_u16 (u16x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x2_t' arguments} } */ + svqrshr_u16 (u16x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint16x4_t' arguments} } */ + svqrshr_u16 (u32x2, 1); + svqrshr_u16 (u32x4, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint32x4_t' arguments} } */ + svqrshr_u16 (u64x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svuint64x2_t' arguments} } */ + svqrshr_u16 (u64x4, 1); + svqrshr_u16 (s32x2, 1); /* { dg-error {'svqrshr_u16' has no form that takes 'svint32x2_t' arguments} } */ + + svqrshr_s16 (s8x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x2_t' arguments} } */ + svqrshr_s16 (s8x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint8x4_t' arguments} } */ + svqrshr_s16 (s16x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x2_t' arguments} } */ + svqrshr_s16 (s16x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint16x4_t' arguments} } */ + svqrshr_s16 (s32x2, 1); + svqrshr_s16 (s32x4, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint32x4_t' arguments} } */ + svqrshr_s16 (s64x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svint64x2_t' arguments} } */ + svqrshr_s16 (s64x4, 1); + svqrshr_s16 (u32x2, 1); /* { dg-error {'svqrshr_s16' has no form that takes 'svuint32x2_t' arguments} } */ + + svqrshr_u8 (u32x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */ + svqrshr_u8 (u32x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */ + svqrshr_u8 (u32x4, 1); + svqrshr_u8 (u32x4, 32); + svqrshr_u8 (u32x4, 33); /* { dg-error {passing 33 to argument 2 of 'svqrshr_u8', which expects a value in the range \[1, 32\]} } */ + + svqrshr_u16 (u32x2, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */ + svqrshr_u16 (u32x2, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */ + svqrshr_u16 (u32x2, 1); + svqrshr_u16 (u32x2, 16); + svqrshr_u16 (u32x2, 17); /* { dg-error {passing 17 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 16\]} } */ + + svqrshr_u16 (u64x4, -1); /* { dg-error {passing -1 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */ + svqrshr_u16 (u64x4, 0); /* { dg-error {passing 0 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */ + svqrshr_u16 (u64x4, 1); + svqrshr_u16 (u64x4, 64); + svqrshr_u16 (u64x4, 65); /* { dg-error {passing 65 to argument 2 of 'svqrshr_u16', which expects a value in the range \[1, 64\]} } */ + + svqrshr_u8 (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqrshr_u8', which expects an SVE type rather than a scalar} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c index 6011ab05414ec70e74cce55e27afb6b21014a793..cfe686929f7fc2ee724c270b5cbaf4e0a3af4269 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c @@ -12,7 +12,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, { svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */ - svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */ + svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */ svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */ svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */ svst1 (pg, 0, s8); diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c index 552540bf7ffc90c4d4d40e1daae1490b6f68e551..eb12cbb8af84467cfc44167b7967386a8b5c5e07 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c @@ -11,7 +11,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, { svst1_vnum (pg, s8_ptr, 0); /* { dg-error {too few arguments to function 'svst1_vnum'} } */ svst1_vnum (pg, s8_ptr, 0, s8, 0); /* { dg-error {too many arguments to function 'svst1_vnum'} } */ - svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects 'svbool_t'} } */ + svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects an 'svbool_t' or 'svcount_t'} } */ svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-error "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..7ad4ca8a580ac9623dabc4ba32c0d978070f93c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/storexn_1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99" } */ + +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +struct s { signed char x; }; + +svuint8_t +f1 (svbool_t pg, svcount_t pn, svboolx2_t pgx2, + signed char *s8_ptr, void *void_ptr, struct s *s_ptr, + float *f32_ptr, _Complex float *cf32_ptr, + svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, + svfloat32x4_t f32x4, struct s s) __arm_streaming +{ + svst1 (pn, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ + svst1 (pn, s8_ptr, s8x2, 0); /* { dg-error {too many arguments to function 'svst1'} } */ + svst1 (0, s8_ptr, s8x2); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects an 'svbool_t' or 'svcount_t'} } */ + svst1 (pn, void_ptr, 0x2); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */ + svst1 (pn, void_ptr, pgx2); /* { dg-error {'svst1' has no form that takes 'svboolx2_t' arguments} } */ + svst1 (pn, 0, s8); /* { dg-error {operations on single vectors must be predicated by 'svbool_t' rather than 'svcount_t'} } */ + svst1 (pn, 0, s8x2); + svst1 (pg, 0, s8x2); /* { dg-error {operations on multiple vectors must be predicated by 'svcount_t' rather than 'svbool_t'} } */ + svst1 (pn, 0, s8x3); /* { dg-error {'svst1' has no form that takes 'svint8x3_t' arguments} } */ + svst1 (pn, (int32_t *) 0, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */ + svst1 (pn, void_ptr, s8x2); + svst1 (pn, s_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */ + svst1 (pn, f32_ptr, s8x2); /* { dg-error "passing argument 2 of 'svst1_s8_x2' from incompatible pointer type" } */ + svst1 (pn, f32_ptr, f32x4); + svst1 (pn, cf32_ptr, f32x4); /* { dg-error "passing argument 2 of 'svst1_f32_x4' from incompatible pointer type" } */ + svst1 (pn, s, s8x2); /* { dg-error {passing 'struct s' to argument 2 of 'svst1', which expects a scalar pointer} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c index c69b2d5750353ebdc9378a169993c1b7d1c7fd0e..ed38b78d3cda616a3f7c23be462e245ad4733a8e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c @@ -10,32 +10,32 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */ svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */ svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ - svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */ - svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */ - svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */ + svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' and 'svuint8_t' arguments} } */ + svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */ + svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */ svdot_lane (u32, u8, u8, 0); svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ svdot_lane (s32, s8, s8, 0); - svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ - svdot_lane (s32, s8, u8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ - svdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svdot_lane', after passing 'svint32_t' to argument 1} } */ + svdot_lane (s32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */ + svdot_lane (s32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */ + svdot_lane (s32, s32, s32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint32_t' and 'svint32_t' arguments} } */ svdot_lane (u32, u8, u8, 0); - svdot_lane (u32, s8, u8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ - svdot_lane (u32, u8, s8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ - svdot_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot_lane', after passing 'svuint32_t' to argument 1} } */ + svdot_lane (u32, s8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint8_t'} } */ + svdot_lane (u32, u8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */ + svdot_lane (u32, u32, u32, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */ svdot_lane (s64, s16, s16, 0); - svdot_lane (s64, u16, s16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ - svdot_lane (s64, s16, u16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ - svdot_lane (s64, s64, s64, 0); /* { dg-error {passing 'svint64_t' instead of the expected 'svint16_t' to argument 2 of 'svdot_lane', after passing 'svint64_t' to argument 1} } */ + svdot_lane (s64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */ + svdot_lane (s64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */ + svdot_lane (s64, s64, s64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svint64_t' and 'svint64_t' arguments} } */ svdot_lane (u64, u16, u16, 0); - svdot_lane (u64, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ - svdot_lane (u64, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ - svdot_lane (u64, u64, u64, 0); /* { dg-error {passing 'svuint64_t' instead of the expected 'svuint16_t' to argument 2 of 'svdot_lane', after passing 'svuint64_t' to argument 1} } */ + svdot_lane (u64, s16, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svint16_t'} } */ + svdot_lane (u64, u16, s16, 0); /* { dg-error {passing 'svint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */ + svdot_lane (u64, u64, u64, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint64_t' and 'svuint64_t' arguments} } */ svdot_lane (s32, s8, s8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ svdot_lane (s32, s8, s8, 0); diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c index 85d4b2dd8d54a4189fd15d64c153d8851b684c41..fc92dcedcb22b39e63d3d00f4e1c8c0b357f281d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c @@ -9,13 +9,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */ svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */ svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE type rather than a scalar} } */ - svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */ - svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */ - svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */ + svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' and 'svuint8_t' arguments} }*/ + svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' and 'svuint8_t' arguments} } */ + svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' and 'svuint8_t' arguments} } */ svdot (u32, u8, u8); svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE type rather than a scalar} } */ - svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ + svdot (u32, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot', but argument 2 had type 'svint8_t'} } */ svdot (u32, u8, 0); - svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ - svdot (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot', after passing 'svuint32_t' to argument 1} } */ + svdot (u32, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svdot', but argument 2 had type 'svuint8_t'} } */ + svdot (u32, u32, u32); /* { dg-error {'svdot' has no form that takes 'svuint32_t' and 'svuint32_t' arguments} } */ } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b8968c878e1d315299c39a6dee7ee261cd4e9c43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_or_011_lane_1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +void +f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, + svfloat16_t f16, svfloat32_t f32, int i) __arm_streaming +{ + svdot_lane (u32, u16, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint16_t'} } */ + svdot_lane (u32, u8, u16, 0); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svdot_lane', but argument 2 had type 'svuint8_t'} } */ + svdot_lane (u32, s16, s16, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint32_t' and 'svint16_t' arguments} } */ + + svdot_lane (u32, u16, u16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ + svdot_lane (u32, u16, u16, 0); + svdot_lane (u32, u16, u16, 3); + svdot_lane (u32, u16, u16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ + svdot_lane (u32, u16, u16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ + + svdot_lane (s32, s16, s16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ + svdot_lane (s32, s16, s16, 0); + svdot_lane (s32, s16, s16, 3); + svdot_lane (s32, s16, s16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ + svdot_lane (s32, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ + + svdot_lane (f32, f16, f16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ + svdot_lane (f32, f16, f16, 0); + svdot_lane (f32, f16, f16, 3); + svdot_lane (f32, f16, f16, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ + svdot_lane (f32, f16, f16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..85f8b45032d1a0d0d0a1bd1d8f42d98f65f94be3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_1.c @@ -0,0 +1,28 @@ +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +void +test (svbool_t pg, float f, svint8_t s8, svfloat32_t f32, + svint32x2_t s32x2, svint32x3_t s32x3, svint32x4_t s32x4, + svfloat32x2_t f32x2, svfloat32x3_t f32x3, svfloat32x4_t f32x4) + __arm_streaming +{ + svcvt_bf16 (); /* { dg-error {too few arguments to function 'svcvt_bf16'} } */ + svcvt_bf16 (f32x2, f32x2); /* { dg-error {too many arguments to function 'svcvt_bf16'} } */ + svcvt_bf16 (0); /* { dg-error {passing 'int' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */ + svcvt_bf16 (f); /* { dg-error {passing 'float' to argument 1 of 'svcvt_bf16', which expects an SVE type rather than a scalar} } */ + svcvt_bf16 (pg); /* { dg-error {svcvt_bf16' has no form that takes 'svbool_t' arguments} } */ + svcvt_bf16 (s8); /* { dg-error {svcvt_bf16' has no form that takes 'svint8_t' arguments} } */ + svcvt_bf16 (f32); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32_t' arguments} } */ + svcvt_bf16 (f32x2); + svcvt_bf16 (f32x3); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x3_t' arguments} } */ + svcvt_bf16 (f32x4); /* { dg-error {svcvt_bf16' has no form that takes 'svfloat32x4_t' arguments} } */ + svcvt_bf16 (s32x2); /* { dg-error {svcvt_bf16' has no form that takes 'svint32x2_t' arguments} } */ + svcvt_s32 (f32x2); + svcvt_s32 (f32x3); /* { dg-error {svcvt_s32' has no form that takes 'svfloat32x3_t' arguments} } */ + svcvt_s32 (f32x4); + svcvt_f32 (s32x2); + svcvt_f32 (s32x3); /* { dg-error {svcvt_f32' has no form that takes 'svint32x3_t' arguments} } */ + svcvt_f32 (s32x4); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c new file mode 100644 index 0000000000000000000000000000000000000000..e02fe5405b1f0b0bc95b97c0ecfaf7f6020afcff --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("+sme2") + +void +f1 (svbool_t pg, svint32_t s32, svint16x2_t s16x2, svint32x2_t s32x2, + svint32x3_t s32x3, svint32x4_t s32x4, svint64x2_t s64x2, float f, double d) + __arm_streaming __arm_inout("za") +{ + svadd_za32_vg1x2 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x2'} } */ + svadd_za32_vg1x2 (1, s32x2, s32x2); /* { dg-error {too many arguments to function 'svadd_za32_vg1x2'} } */ + + svadd_za32_vg1x2 (s32x2, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */ + svadd_za32_vg1x2 (f, s32x2); + svadd_za32_vg1x2 (d, s32x2); + svadd_za32_vg1x2 (pg, s32x2); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x2', which expects 'uint32_t'} } */ + + svadd_za32_vg1x2 (1, 1); /* { dg-error {passing 'int' to argument 2 of 'svadd_za32_vg1x2', which expects an SVE type rather than a scalar type} } */ + svadd_za32_vg1x2 (1, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_za32_vg1x2 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_za32_vg1x2 (1, s32x2); + svadd_za32_vg1x2 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */ + svadd_za32_vg1x2 (1, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 2 of 'svadd_za32_vg1x2', which expects a tuple of 2 vectors} } */ + + svadd_za32_vg1x2 (1, s16x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint16x2_t' arguments} } */ + svadd_za32_vg1x2 (1, s64x2); /* { dg-error {'svadd_za32_vg1x2' has no form that takes 'svint64x2_t' arguments} } */ +} + +void +f2 (svint32x2_t s32x2) __arm_streaming +{ + svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint32x2_t s32x2) __arm_inout("za") +{ + svadd_za32_vg1x2 (0, s32x2); /* { dg-error {ACLE function 'svadd_za32_s32_vg1x2' can only be called when SME streaming mode is enabled} } */ +} + +#pragma GCC target ("+sme-i16i64") + +void +f4 (svint32x2_t s32x2, svuint32x2_t u32x2, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svadd_za64_vg1x2 (1, s64x2); + svadd_za64_vg1x2 (1, u64x2); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c new file mode 100644 index 0000000000000000000000000000000000000000..b28b03e3815afcd96d4430be3acf0444bbc67d3e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_2.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("arch=armv9-a+sme2") + +void +f1 (svbool_t pg, svint32_t s32, svint16x4_t s16x4, svint32x2_t s32x2, + svint32x3_t s32x3, svint32x4_t s32x4, svint64x4_t s64x4, float f, double d) + __arm_streaming __arm_inout("za") +{ + svadd_za32_vg1x4 (1); /* { dg-error {too few arguments to function 'svadd_za32_vg1x4'} } */ + svadd_za32_vg1x4 (1, s32x4, s32x4); /* { dg-error {too many arguments to function 'svadd_za32_vg1x4'} } */ + + svadd_za32_vg1x4 (s32x2, s32x4); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */ + svadd_za32_vg1x4 (f, s32x4); + svadd_za32_vg1x4 (d, s32x4); + svadd_za32_vg1x4 (pg, s32x4); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadd_za32_vg1x4', which expects 'uint32_t'} } */ + + svadd_za32_vg1x4 (1, s32); /* { dg-error {passing single vector 'svint32_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_za32_vg1x4 (1, s32x2); /* { dg-error {passing 'svint32x2_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_za32_vg1x4 (1, s32x3); /* { dg-error {passing 'svint32x3_t' to argument 2 of 'svadd_za32_vg1x4', which expects a tuple of 4 vectors} } */ + svadd_za32_vg1x4 (1, s32x4); + + svadd_za32_vg1x4 (1, s16x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint16x4_t' arguments} } */ + svadd_za32_vg1x4 (1, s64x4); /* { dg-error {'svadd_za32_vg1x4' has no form that takes 'svint64x4_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c new file mode 100644 index 0000000000000000000000000000000000000000..22d91b1858b9c1d20241326920128c71e163d5ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_slice_3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target ("arch=armv9-a+sme2+nosme-i16i64") + +void +f1 (svint32x2_t s32x2, svuint32x2_t u32x2, + svint64x2_t s64x2, svuint64x2_t u64x2) + __arm_streaming __arm_inout("za") +{ + svadd_za64_vg1x2 (1, s32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svint32x2_t' arguments} } */ + svadd_za64_vg1x2 (1, u32x2); /* { dg-error {'svadd_za64_vg1x2' has no form that takes 'svuint32x2_t' arguments} } */ + svadd_za64_vg1x2 (1, s64x2); /* { dg-error {ACLE function 'svadd_za64_s64_vg1x2' requires ISA extension 'sme-i16i64'} } */ + svadd_za64_vg1x2 (1, u64x2); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..f478945562c8dcc154f88c1fac658092f07cba3e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unaryxn_1.c @@ -0,0 +1,15 @@ +#include <arm_sve.h> + +#pragma GCC target "+sme2" + +void +test (svfloat32_t f32, svfloat32x2_t f32x2, svfloat32x3_t f32x3, + svfloat32x4_t f32x4) __arm_streaming +{ + svuzp (); /* { dg-error {too few arguments to function 'svuzp'} } */ + svuzp (f32x2, f32x2); /* { dg-error {too many arguments to function 'svuzp'} } */ + svuzp (f32); /* { dg-error {svuzp' has no form that takes 'svfloat32_t' arguments} } */ + svuzp (f32x2); + svuzp (f32x3); /* { dg-error {svuzp' has no form that takes 'svfloat32x3_t' arguments} } */ + svuzp (f32x4); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c new file mode 100644 index 0000000000000000000000000000000000000000..3a45b58b023490e6e33c4f5285c0422530f8f827 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_1.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target "+sme2" + +void +f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4, + svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2, + uint32_t tile) + __arm_streaming __arm_inout("za") +{ + svwrite_ver_za8_vg2 (0, 0); /* { dg-error {too few arguments to function 'svwrite_ver_za8_vg2'} } */ + svwrite_ver_za8_vg2 (0, 0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_vg2'} } */ + svwrite_ver_za8_vg2 (tile, 0, s8x2); /* { dg-error {argument 1 of 'svwrite_ver_za8_vg2' must be an integer constant expression} } */ + svwrite_ver_za8_vg2 (-1, 0, s8x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */ + svwrite_ver_za8_vg2 (1, 0, s8x2); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_vg2', which expects the value 0} } */ + svwrite_ver_za8_vg2 (0, u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_vg2', which expects 'uint32_t'} } */ + svwrite_ver_za8_vg2 (0, 0, tile); /* { dg-error {passing 'uint32_t'.* to argument 3 of 'svwrite_ver_za8_vg2', which expects an SVE type} } */ + svwrite_ver_za8_vg2 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */ + svwrite_ver_za8_vg2 (0, 0, s8x2); + svwrite_ver_za8_vg2 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */ + svwrite_ver_za8_vg2 (0, 0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 3 of 'svwrite_ver_za8_vg2', which expects a tuple of 2 vectors} } */ + + svwrite_ver_za16_vg2 (-1, 0, u16x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */ + svwrite_ver_za16_vg2 (2, 0, u16x2); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_vg2', which expects a value in the range \[0, 1\]} } */ + + svwrite_ver_za32_vg2 (-1, 0, f32x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */ + svwrite_ver_za32_vg2 (4, 0, f32x2); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_vg2', which expects a value in the range \[0, 3\]} } */ + + svwrite_ver_za64_vg2 (-1, 0, s64x2); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */ + svwrite_ver_za64_vg2 (8, 0, s64x2); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_vg2', which expects a value in the range \[0, 7\]} } */ + + svwrite_ver_za8_vg4 (0, 0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */ + svwrite_ver_za8_vg4 (0, 0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */ + svwrite_ver_za8_vg4 (0, 0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 3 of 'svwrite_ver_za8_vg4', which expects a tuple of 4 vectors} } */ + svwrite_ver_za8_vg4 (0, 0, s8x4); +} + +void +f2 (svint8x2_t s8x2) __arm_streaming +{ + svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint8x2_t s8x2) __arm_inout("za") +{ + svwrite_ver_za8_vg2 (0, 0, s8x2); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_vg2' can only be called when SME streaming mode is enabled} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c new file mode 100644 index 0000000000000000000000000000000000000000..dedd4b16ea2450e88a5177c4a00350a70a913245 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_slice_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ + +#include <arm_sme.h> + +#pragma GCC target "+sme2" + +void +f1 (svint8_t s8, svint8x2_t s8x2, svint8x3_t s8x3, svint8x4_t s8x4, + svuint8_t u8, svuint16x2_t u16x2, svfloat32x2_t f32x2, svint64x2_t s64x2, + uint32_t tile) + __arm_streaming __arm_inout("za") +{ + svwrite_za8_vg1x2 (0); /* { dg-error {too few arguments to function 'svwrite_za8_vg1x2'} } */ + svwrite_za8_vg1x2 (0, s8x2, 0); /* { dg-error {too many arguments to function 'svwrite_za8_vg1x2'} } */ + svwrite_za8_vg1x2 (u8, s8x2); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svwrite_za8_vg1x2', which expects 'uint32_t'} } */ + svwrite_za8_vg1x2 (0, tile); /* { dg-error {passing 'uint32_t'.* to argument 2 of 'svwrite_za8_vg1x2', which expects an SVE type} } */ + svwrite_za8_vg1x2 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */ + svwrite_za8_vg1x2 (0, s8x2); + svwrite_za8_vg1x2 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */ + svwrite_za8_vg1x2 (0, s8x4); /* { dg-error {passing 'svint8x4_t' to argument 2 of 'svwrite_za8_vg1x2', which expects a tuple of 2 vectors} } */ + + svwrite_za8_vg1x4 (0, s8); /* { dg-error {passing single vector 'svint8_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */ + svwrite_za8_vg1x4 (0, s8x2); /* { dg-error {passing 'svint8x2_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */ + svwrite_za8_vg1x4 (0, s8x3); /* { dg-error {passing 'svint8x3_t' to argument 2 of 'svwrite_za8_vg1x4', which expects a tuple of 4 vectors} } */ + svwrite_za8_vg1x4 (0, s8x4); +} + +void +f2 (svint8x2_t s8x2) __arm_streaming +{ + svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called from a function that has 'za' state} } */ +} + +void +f3 (svint8x2_t s8x2) __arm_inout("za") +{ + svwrite_za8_vg1x2 (0, s8x2); /* { dg-error {ACLE function 'svwrite_za8_s8_vg1x2' can only be called when SME streaming mode is enabled} } */ +}