From 7cde140863edea536c676096cbc3d84a6d1424e4 Mon Sep 17 00:00:00 2001 From: Jennifer Schmitz <jschmitz@nvidia.com> Date: Tue, 16 Jul 2024 01:59:50 -0700 Subject: [PATCH] SVE intrinsics: Add strength reduction for division by constant. This patch folds SVE division where all divisor elements are the same power of 2 to svasrd (signed) or svlsr (unsigned). Tests were added to check 1) whether the transform is applied (existing test harness was amended), and 2) correctness using runtime tests for all input types of svdiv; for signed and unsigned integers, several corner cases were covered. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Implement strength reduction. gcc/testsuite/ * gcc.target/aarch64/sve/div_const_run.c: New test. * gcc.target/aarch64/sve/acle/asm/div_s32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise. --- .../aarch64/aarch64-sve-builtins-base.cc | 49 +++- .../gcc.target/aarch64/sve/acle/asm/div_s32.c | 273 +++++++++++++++++- .../gcc.target/aarch64/sve/acle/asm/div_s64.c | 273 +++++++++++++++++- .../gcc.target/aarch64/sve/acle/asm/div_u32.c | 201 ++++++++++++- .../gcc.target/aarch64/sve/acle/asm/div_u64.c | 201 ++++++++++++- .../gcc.target/aarch64/sve/div_const_run.c | 91 ++++++ 6 files changed, 1031 insertions(+), 57 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index a2268353ae31..d55bee0b72fa 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -746,6 +746,53 @@ public: } }; +class svdiv_impl : public rtx_code_function +{ +public: + CONSTEXPR svdiv_impl () + : rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {} + + gimple * + fold (gimple_folder &f) const override + { + tree divisor = gimple_call_arg (f.call, 2); + tree divisor_cst = uniform_integer_cst_p (divisor); + + if (!divisor_cst || !integer_pow2p (divisor_cst)) + return NULL; + + tree new_divisor; + gcall *call; + + if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1) + { + function_instance instance ("svlsr", functions::svlsr, + shapes::binary_uint_opt_n, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred); + call = f.redirect_call (instance); + tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst; + new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d)); + } + else + { + if (tree_int_cst_sign_bit (divisor_cst) + || tree_to_shwi (divisor_cst) == 1) + return NULL; + + function_instance instance ("svasrd", functions::svasrd, + shapes::shift_right_imm, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred); + call = f.redirect_call (instance); + new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t], + tree_log2 (divisor_cst)); + } + + gimple_call_set_arg (call, 2, new_divisor); + return call; + } +}; + + class svdot_impl : public function_base { public: @@ -3043,7 +3090,7 @@ FUNCTION (svcreate3, svcreate_impl, (3)) FUNCTION (svcreate4, svcreate_impl, (4)) FUNCTION (svcvt, svcvt_impl,) FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) -FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) +FUNCTION (svdiv, svdiv_impl,) FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdot, svdot_impl,) FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c index c49ca1aa5243..d5a23bf07262 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c @@ -2,6 +2,8 @@ #include "test_sve_acle.h" +#define MAXPOW 1<<30 + /* ** div_s32_m_tied1: ** sdiv z0\.s, p0/m, z0\.s, z1\.s @@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, z0 = svdiv_n_s32_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_1_s32_m_tied1: +** sel z0\.s, p0, z0\.s, z0\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, 1), + z0 = svdiv_m (p0, z0, 1)) + +/* +** div_1_s32_m_untied: +** sel z0\.s, p0, z1\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t, + z0 = svdiv_n_s32_m (p0, z1, 1), + z0 = svdiv_m (p0, z1, 1)) + /* ** div_2_s32_m_tied1: -** mov (z[0-9]+\.s), #2 -** sdiv z0\.s, p0/m, z0\.s, \1 +** asrd z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t, @@ -65,15 +84,75 @@ TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t, /* ** div_2_s32_m_untied: -** mov (z[0-9]+\.s), #2 ** movprfx z0, z1 -** sdiv z0\.s, p0/m, z0\.s, \1 +** asrd z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t, z0 = svdiv_n_s32_m (p0, z1, 2), z0 = svdiv_m (p0, z1, 2)) +/* +** div_3_s32_m_tied1: +** mov (z[0-9]+\.s), #3 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, 3), + z0 = svdiv_m (p0, z0, 3)) + +/* +** div_3_s32_m_untied: +** mov (z[0-9]+\.s), #3 +** movprfx z0, z1 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s32_m_untied, svint32_t, + z0 = svdiv_n_s32_m (p0, z1, 3), + z0 = svdiv_m (p0, z1, 3)) + +/* +** div_maxpow_s32_m_tied1: +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, MAXPOW), + z0 = svdiv_m (p0, z0, MAXPOW)) + +/* +** div_maxpow_s32_m_untied: +** movprfx z0, z1 +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_m_untied, svint32_t, + z0 = svdiv_n_s32_m (p0, z1, MAXPOW), + z0 = svdiv_m (p0, z1, MAXPOW)) + +/* +** div_intmin_s32_m_tied1: +** mov (z[0-9]+\.s), #-2147483648 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, INT32_MIN), + z0 = svdiv_m (p0, z0, INT32_MIN)) + +/* +** div_intmin_s32_m_untied: +** mov (z[0-9]+\.s), #-2147483648 +** movprfx z0, z1 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_m_untied, svint32_t, + z0 = svdiv_n_s32_m (p0, z1, INT32_MIN), + z0 = svdiv_m (p0, z1, INT32_MIN)) + /* ** div_s32_z_tied1: ** movprfx z0\.s, p0/z, z0\.s @@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t, z0 = svdiv_z (p0, z1, x0)) /* -** div_2_s32_z_tied1: -** mov (z[0-9]+\.s), #2 +** div_1_s32_z_tied1: +** mov (z[0-9]+\.s), #1 ** movprfx z0\.s, p0/z, z0\.s ** sdiv z0\.s, p0/m, z0\.s, \1 ** ret */ +TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t, + z0 = svdiv_n_s32_z (p0, z0, 1), + z0 = svdiv_z (p0, z0, 1)) + +/* +** div_1_s32_z_untied: +** mov z0\.s, #1 +** movprfx z0\.s, p0/z, z0\.s +** sdivr z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, 1), + z0 = svdiv_z (p0, z1, 1)) + +/* +** div_2_s32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** asrd z0\.s, p0/m, z0\.s, #1 +** ret +*/ TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t, z0 = svdiv_n_s32_z (p0, z0, 2), z0 = svdiv_z (p0, z0, 2)) /* ** div_2_s32_z_untied: -** mov (z[0-9]+\.s), #2 +** movprfx z0\.s, p0/z, z1\.s +** asrd z0\.s, p0/m, z0\.s, #1 +** ret +*/ +TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, 2), + z0 = svdiv_z (p0, z1, 2)) + +/* +** div_3_s32_z_tied1: +** mov (z[0-9]+\.s), #3 +** movprfx z0\.s, p0/z, z0\.s +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s32_z_tied1, svint32_t, + z0 = svdiv_n_s32_z (p0, z0, 3), + z0 = svdiv_z (p0, z0, 3)) + +/* +** div_3_s32_z_untied: +** mov (z[0-9]+\.s), #3 ** ( ** movprfx z0\.s, p0/z, z1\.s ** sdiv z0\.s, p0/m, z0\.s, \1 @@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t, ** ) ** ret */ -TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t, - z0 = svdiv_n_s32_z (p0, z1, 2), - z0 = svdiv_z (p0, z1, 2)) +TEST_UNIFORM_Z (div_3_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, 3), + z0 = svdiv_z (p0, z1, 3)) + +/* +** div_maxpow_s32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_z_tied1, svint32_t, + z0 = svdiv_n_s32_z (p0, z0, MAXPOW), + z0 = svdiv_z (p0, z0, MAXPOW)) + +/* +** div_maxpow_s32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, MAXPOW), + z0 = svdiv_z (p0, z1, MAXPOW)) + +/* +** div_intmin_s32_z_tied1: +** mov (z[0-9]+\.s), #-2147483648 +** movprfx z0\.s, p0/z, z0\.s +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_z_tied1, svint32_t, + z0 = svdiv_n_s32_z (p0, z0, INT32_MIN), + z0 = svdiv_z (p0, z0, INT32_MIN)) + +/* +** div_intmin_s32_z_untied: +** mov (z[0-9]+\.s), #-2147483648 +** ( +** movprfx z0\.s, p0/z, z1\.s +** sdiv z0\.s, p0/m, z0\.s, \1 +** | +** movprfx z0\.s, p0/z, \1 +** sdivr z0\.s, p0/m, z0\.s, z1\.s +** ) +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, INT32_MIN), + z0 = svdiv_z (p0, z1, INT32_MIN)) /* ** div_s32_x_tied1: @@ -216,10 +384,26 @@ TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t, z0 = svdiv_n_s32_x (p0, z1, x0), z0 = svdiv_x (p0, z1, x0)) +/* +** div_1_s32_x_tied1: +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_x_tied1, svint32_t, + z0 = svdiv_n_s32_x (p0, z0, 1), + z0 = svdiv_x (p0, z0, 1)) + +/* +** div_1_s32_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_x_untied, svint32_t, + z0 = svdiv_n_s32_x (p0, z1, 1), + z0 = svdiv_x (p0, z1, 1)) + /* ** div_2_s32_x_tied1: -** mov (z[0-9]+\.s), #2 -** sdiv z0\.s, p0/m, z0\.s, \1 +** asrd z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t, @@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t, /* ** div_2_s32_x_untied: -** mov z0\.s, #2 -** sdivr z0\.s, p0/m, z0\.s, z1\.s +** movprfx z0, z1 +** asrd z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_s32_x_untied, svint32_t, z0 = svdiv_n_s32_x (p0, z1, 2), z0 = svdiv_x (p0, z1, 2)) + +/* +** div_3_s32_x_tied1: +** mov (z[0-9]+\.s), #3 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s32_x_tied1, svint32_t, + z0 = svdiv_n_s32_x (p0, z0, 3), + z0 = svdiv_x (p0, z0, 3)) + +/* +** div_3_s32_x_untied: +** mov z0\.s, #3 +** sdivr z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_3_s32_x_untied, svint32_t, + z0 = svdiv_n_s32_x (p0, z1, 3), + z0 = svdiv_x (p0, z1, 3)) + +/* +** div_maxpow_s32_x_tied1: +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_x_tied1, svint32_t, + z0 = svdiv_n_s32_x (p0, z0, MAXPOW), + z0 = svdiv_x (p0, z0, MAXPOW)) + +/* +** div_maxpow_s32_x_untied: +** movprfx z0, z1 +** asrd z0\.s, p0/m, z0\.s, #30 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s32_x_untied, svint32_t, + z0 = svdiv_n_s32_x (p0, z1, MAXPOW), + z0 = svdiv_x (p0, z1, MAXPOW)) + +/* +** div_intmin_s32_x_tied1: +** mov (z[0-9]+\.s), #-2147483648 +** sdiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_x_tied1, svint32_t, + z0 = svdiv_n_s32_x (p0, z0, INT32_MIN), + z0 = svdiv_x (p0, z0, INT32_MIN)) + +/* +** div_intmin_s32_x_untied: +** mov z0\.s, #-2147483648 +** sdivr z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s32_x_untied, svint32_t, + z0 = svdiv_n_s32_x (p0, z1, INT32_MIN), + z0 = svdiv_x (p0, z1, INT32_MIN)) + + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c index 464dca28d747..cfed6f9c1b31 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c @@ -2,6 +2,8 @@ #include "test_sve_acle.h" +#define MAXPOW 1ULL<<62 + /* ** div_s64_m_tied1: ** sdiv z0\.d, p0/m, z0\.d, z1\.d @@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t, z0 = svdiv_n_s64_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_1_s64_m_tied1: +** sel z0\.d, p0, z0\.d, z0\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t, + z0 = svdiv_n_s64_m (p0, z0, 1), + z0 = svdiv_m (p0, z0, 1)) + +/* +** div_1_s64_m_untied: +** sel z0\.d, p0, z1\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_s64_m_untied, svint64_t, + z0 = svdiv_n_s64_m (p0, z1, 1), + z0 = svdiv_m (p0, z1, 1)) + /* ** div_2_s64_m_tied1: -** mov (z[0-9]+\.d), #2 -** sdiv z0\.d, p0/m, z0\.d, \1 +** asrd z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t, @@ -65,15 +84,75 @@ TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t, /* ** div_2_s64_m_untied: -** mov (z[0-9]+\.d), #2 ** movprfx z0, z1 -** sdiv z0\.d, p0/m, z0\.d, \1 +** asrd z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t, z0 = svdiv_n_s64_m (p0, z1, 2), z0 = svdiv_m (p0, z1, 2)) +/* +** div_3_s64_m_tied1: +** mov (z[0-9]+\.d), #3 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s64_m_tied1, svint64_t, + z0 = svdiv_n_s64_m (p0, z0, 3), + z0 = svdiv_m (p0, z0, 3)) + +/* +** div_3_s64_m_untied: +** mov (z[0-9]+\.d), #3 +** movprfx z0, z1 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s64_m_untied, svint64_t, + z0 = svdiv_n_s64_m (p0, z1, 3), + z0 = svdiv_m (p0, z1, 3)) + +/* +** div_maxpow_s64_m_tied1: +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_m_tied1, svint64_t, + z0 = svdiv_n_s64_m (p0, z0, MAXPOW), + z0 = svdiv_m (p0, z0, MAXPOW)) + +/* +** div_maxpow_s64_m_untied: +** movprfx z0, z1 +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_m_untied, svint64_t, + z0 = svdiv_n_s64_m (p0, z1, MAXPOW), + z0 = svdiv_m (p0, z1, MAXPOW)) + +/* +** div_intmin_s64_m_tied1: +** mov (z[0-9]+\.d), #-9223372036854775808 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_m_tied1, svint64_t, + z0 = svdiv_n_s64_m (p0, z0, INT64_MIN), + z0 = svdiv_m (p0, z0, INT64_MIN)) + +/* +** div_intmin_s64_m_untied: +** mov (z[0-9]+\.d), #-9223372036854775808 +** movprfx z0, z1 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_m_untied, svint64_t, + z0 = svdiv_n_s64_m (p0, z1, INT64_MIN), + z0 = svdiv_m (p0, z1, INT64_MIN)) + /* ** div_s64_z_tied1: ** movprfx z0\.d, p0/z, z0\.d @@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t, z0 = svdiv_z (p0, z1, x0)) /* -** div_2_s64_z_tied1: -** mov (z[0-9]+\.d), #2 +** div_1_s64_z_tied1: +** mov (z[0-9]+\.d), #1 ** movprfx z0\.d, p0/z, z0\.d ** sdiv z0\.d, p0/m, z0\.d, \1 ** ret */ +TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t, + z0 = svdiv_n_s64_z (p0, z0, 1), + z0 = svdiv_z (p0, z0, 1)) + +/* +** div_1_s64_z_untied: +** mov z0\.d, #1 +** movprfx z0\.d, p0/z, z0\.d +** sdivr z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_s64_z_untied, svint64_t, + z0 = svdiv_n_s64_z (p0, z1, 1), + z0 = svdiv_z (p0, z1, 1)) + +/* +** div_2_s64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** asrd z0\.d, p0/m, z0\.d, #1 +** ret +*/ TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t, z0 = svdiv_n_s64_z (p0, z0, 2), z0 = svdiv_z (p0, z0, 2)) /* ** div_2_s64_z_untied: -** mov (z[0-9]+\.d), #2 +** movprfx z0\.d, p0/z, z1\.d +** asrd z0\.d, p0/m, z0\.d, #1 +** ret +*/ +TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t, + z0 = svdiv_n_s64_z (p0, z1, 2), + z0 = svdiv_z (p0, z1, 2)) + +/* +** div_3_s64_z_tied1: +** mov (z[0-9]+\.d), #3 +** movprfx z0\.d, p0/z, z0\.d +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s64_z_tied1, svint64_t, + z0 = svdiv_n_s64_z (p0, z0, 3), + z0 = svdiv_z (p0, z0, 3)) + +/* +** div_3_s64_z_untied: +** mov (z[0-9]+\.d), #3 ** ( ** movprfx z0\.d, p0/z, z1\.d ** sdiv z0\.d, p0/m, z0\.d, \1 @@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t, ** ) ** ret */ -TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t, - z0 = svdiv_n_s64_z (p0, z1, 2), - z0 = svdiv_z (p0, z1, 2)) +TEST_UNIFORM_Z (div_3_s64_z_untied, svint64_t, + z0 = svdiv_n_s64_z (p0, z1, 3), + z0 = svdiv_z (p0, z1, 3)) + +/* +** div_maxpow_s64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_z_tied1, svint64_t, + z0 = svdiv_n_s64_z (p0, z0, MAXPOW), + z0 = svdiv_z (p0, z0, MAXPOW)) + +/* +** div_maxpow_s64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_z_untied, svint64_t, + z0 = svdiv_n_s64_z (p0, z1, MAXPOW), + z0 = svdiv_z (p0, z1, MAXPOW)) + +/* +** div_intmin_s64_z_tied1: +** mov (z[0-9]+\.d), #-9223372036854775808 +** movprfx z0\.d, p0/z, z0\.d +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_z_tied1, svint64_t, + z0 = svdiv_n_s64_z (p0, z0, INT64_MIN), + z0 = svdiv_z (p0, z0, INT64_MIN)) + +/* +** div_intmin_s64_z_untied: +** mov (z[0-9]+\.d), #-9223372036854775808 +** ( +** movprfx z0\.d, p0/z, z1\.d +** sdiv z0\.d, p0/m, z0\.d, \1 +** | +** movprfx z0\.d, p0/z, \1 +** sdivr z0\.d, p0/m, z0\.d, z1\.d +** ) +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_z_untied, svint64_t, + z0 = svdiv_n_s64_z (p0, z1, INT64_MIN), + z0 = svdiv_z (p0, z1, INT64_MIN)) /* ** div_s64_x_tied1: @@ -216,10 +384,26 @@ TEST_UNIFORM_ZX (div_x0_s64_x_untied, svint64_t, int64_t, z0 = svdiv_n_s64_x (p0, z1, x0), z0 = svdiv_x (p0, z1, x0)) +/* +** div_1_s64_x_tied1: +** ret +*/ +TEST_UNIFORM_Z (div_1_s64_x_tied1, svint64_t, + z0 = svdiv_n_s64_x (p0, z0, 1), + z0 = svdiv_x (p0, z0, 1)) + +/* +** div_1_s64_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_s64_x_untied, svint64_t, + z0 = svdiv_n_s64_x (p0, z1, 1), + z0 = svdiv_x (p0, z1, 1)) + /* ** div_2_s64_x_tied1: -** mov (z[0-9]+\.d), #2 -** sdiv z0\.d, p0/m, z0\.d, \1 +** asrd z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t, @@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t, /* ** div_2_s64_x_untied: -** mov z0\.d, #2 -** sdivr z0\.d, p0/m, z0\.d, z1\.d +** movprfx z0, z1 +** asrd z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_s64_x_untied, svint64_t, z0 = svdiv_n_s64_x (p0, z1, 2), z0 = svdiv_x (p0, z1, 2)) + +/* +** div_3_s64_x_tied1: +** mov (z[0-9]+\.d), #3 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_s64_x_tied1, svint64_t, + z0 = svdiv_n_s64_x (p0, z0, 3), + z0 = svdiv_x (p0, z0, 3)) + +/* +** div_3_s64_x_untied: +** mov z0\.d, #3 +** sdivr z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_3_s64_x_untied, svint64_t, + z0 = svdiv_n_s64_x (p0, z1, 3), + z0 = svdiv_x (p0, z1, 3)) + +/* +** div_maxpow_s64_x_tied1: +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_x_tied1, svint64_t, + z0 = svdiv_n_s64_x (p0, z0, MAXPOW), + z0 = svdiv_x (p0, z0, MAXPOW)) + +/* +** div_maxpow_s64_x_untied: +** movprfx z0, z1 +** asrd z0\.d, p0/m, z0\.d, #62 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_s64_x_untied, svint64_t, + z0 = svdiv_n_s64_x (p0, z1, MAXPOW), + z0 = svdiv_x (p0, z1, MAXPOW)) + +/* +** div_intmin_s64_x_tied1: +** mov (z[0-9]+\.d), #-9223372036854775808 +** sdiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_x_tied1, svint64_t, + z0 = svdiv_n_s64_x (p0, z0, INT64_MIN), + z0 = svdiv_x (p0, z0, INT64_MIN)) + +/* +** div_intmin_s64_x_untied: +** mov z0\.d, #-9223372036854775808 +** sdivr z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_intmin_s64_x_untied, svint64_t, + z0 = svdiv_n_s64_x (p0, z1, INT64_MIN), + z0 = svdiv_x (p0, z1, INT64_MIN)) + + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c index 232ccacf524f..9707664caf4c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c @@ -2,6 +2,8 @@ #include "test_sve_acle.h" +#define MAXPOW 1<<31 + /* ** div_u32_m_tied1: ** udiv z0\.s, p0/m, z0\.s, z1\.s @@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t, z0 = svdiv_n_u32_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_1_u32_m_tied1: +** sel z0\.s, p0, z0\.s, z0\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t, + z0 = svdiv_n_u32_m (p0, z0, 1), + z0 = svdiv_m (p0, z0, 1)) + +/* +** div_1_u32_m_untied: +** sel z0\.s, p0, z1\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_u32_m_untied, svuint32_t, + z0 = svdiv_n_u32_m (p0, z1, 1), + z0 = svdiv_m (p0, z1, 1)) + /* ** div_2_u32_m_tied1: -** mov (z[0-9]+\.s), #2 -** udiv z0\.s, p0/m, z0\.s, \1 +** lsr z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t, @@ -65,15 +84,54 @@ TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t, /* ** div_2_u32_m_untied: -** mov (z[0-9]+\.s), #2 ** movprfx z0, z1 -** udiv z0\.s, p0/m, z0\.s, \1 +** lsr z0\.s, p0/m, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t, z0 = svdiv_n_u32_m (p0, z1, 2), z0 = svdiv_m (p0, z1, 2)) +/* +** div_3_u32_m_tied1: +** mov (z[0-9]+\.s), #3 +** udiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u32_m_tied1, svuint32_t, + z0 = svdiv_n_u32_m (p0, z0, 3), + z0 = svdiv_m (p0, z0, 3)) + +/* +** div_3_u32_m_untied: +** mov (z[0-9]+\.s), #3 +** movprfx z0, z1 +** udiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u32_m_untied, svuint32_t, + z0 = svdiv_n_u32_m (p0, z1, 3), + z0 = svdiv_m (p0, z1, 3)) + +/* +** div_maxpow_u32_m_tied1: +** lsr z0\.s, p0/m, z0\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_m_tied1, svuint32_t, + z0 = svdiv_n_u32_m (p0, z0, MAXPOW), + z0 = svdiv_m (p0, z0, MAXPOW)) + +/* +** div_maxpow_u32_m_untied: +** movprfx z0, z1 +** lsr z0\.s, p0/m, z0\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_m_untied, svuint32_t, + z0 = svdiv_n_u32_m (p0, z1, MAXPOW), + z0 = svdiv_m (p0, z1, MAXPOW)) + /* ** div_u32_z_tied1: ** movprfx z0\.s, p0/z, z0\.s @@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, uint32_t, z0 = svdiv_z (p0, z1, x0)) /* -** div_2_u32_z_tied1: -** mov (z[0-9]+\.s), #2 +** div_1_u32_z_tied1: +** mov (z[0-9]+\.s), #1 ** movprfx z0\.s, p0/z, z0\.s ** udiv z0\.s, p0/m, z0\.s, \1 ** ret */ +TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t, + z0 = svdiv_n_u32_z (p0, z0, 1), + z0 = svdiv_z (p0, z0, 1)) + +/* +** div_1_u32_z_untied: +** mov z0\.s, #1 +** movprfx z0\.s, p0/z, z0\.s +** udivr z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_u32_z_untied, svuint32_t, + z0 = svdiv_n_u32_z (p0, z1, 1), + z0 = svdiv_z (p0, z1, 1)) + +/* +** div_2_u32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** lsr z0\.s, p0/m, z0\.s, #1 +** ret +*/ TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t, z0 = svdiv_n_u32_z (p0, z0, 2), z0 = svdiv_z (p0, z0, 2)) /* ** div_2_u32_z_untied: -** mov (z[0-9]+\.s), #2 +** movprfx z0\.s, p0/z, z1\.s +** lsr z0\.s, p0/m, z0\.s, #1 +** ret +*/ +TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t, + z0 = svdiv_n_u32_z (p0, z1, 2), + z0 = svdiv_z (p0, z1, 2)) + +/* +** div_3_u32_z_tied1: +** mov (z[0-9]+\.s), #3 +** movprfx z0\.s, p0/z, z0\.s +** udiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u32_z_tied1, svuint32_t, + z0 = svdiv_n_u32_z (p0, z0, 3), + z0 = svdiv_z (p0, z0, 3)) + +/* +** div_3_u32_z_untied: +** mov (z[0-9]+\.s), #3 ** ( ** movprfx z0\.s, p0/z, z1\.s ** udiv z0\.s, p0/m, z0\.s, \1 @@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t, ** ) ** ret */ -TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t, - z0 = svdiv_n_u32_z (p0, z1, 2), - z0 = svdiv_z (p0, z1, 2)) +TEST_UNIFORM_Z (div_3_u32_z_untied, svuint32_t, + z0 = svdiv_n_u32_z (p0, z1, 3), + z0 = svdiv_z (p0, z1, 3)) + +/* +** div_maxpow_u32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** lsr z0\.s, p0/m, z0\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_z_tied1, svuint32_t, + z0 = svdiv_n_u32_z (p0, z0, MAXPOW), + z0 = svdiv_z (p0, z0, MAXPOW)) + +/* +** div_maxpow_u32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** lsr z0\.s, p0/m, z0\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_z_untied, svuint32_t, + z0 = svdiv_n_u32_z (p0, z1, MAXPOW), + z0 = svdiv_z (p0, z1, MAXPOW)) /* ** div_u32_x_tied1: @@ -216,10 +336,26 @@ TEST_UNIFORM_ZX (div_w0_u32_x_untied, svuint32_t, uint32_t, z0 = svdiv_n_u32_x (p0, z1, x0), z0 = svdiv_x (p0, z1, x0)) +/* +** div_1_u32_x_tied1: +** ret +*/ +TEST_UNIFORM_Z (div_1_u32_x_tied1, svuint32_t, + z0 = svdiv_n_u32_x (p0, z0, 1), + z0 = svdiv_x (p0, z0, 1)) + +/* +** div_1_u32_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_u32_x_untied, svuint32_t, + z0 = svdiv_n_u32_x (p0, z1, 1), + z0 = svdiv_x (p0, z1, 1)) + /* ** div_2_u32_x_tied1: -** mov (z[0-9]+\.s), #2 -** udiv z0\.s, p0/m, z0\.s, \1 +** lsr z0\.s, z0\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t, @@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t, /* ** div_2_u32_x_untied: -** mov z0\.s, #2 -** udivr z0\.s, p0/m, z0\.s, z1\.s +** lsr z0\.s, z1\.s, #1 ** ret */ TEST_UNIFORM_Z (div_2_u32_x_untied, svuint32_t, z0 = svdiv_n_u32_x (p0, z1, 2), z0 = svdiv_x (p0, z1, 2)) + +/* +** div_3_u32_x_tied1: +** mov (z[0-9]+\.s), #3 +** udiv z0\.s, p0/m, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u32_x_tied1, svuint32_t, + z0 = svdiv_n_u32_x (p0, z0, 3), + z0 = svdiv_x (p0, z0, 3)) + +/* +** div_3_u32_x_untied: +** mov z0\.s, #3 +** udivr z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_3_u32_x_untied, svuint32_t, + z0 = svdiv_n_u32_x (p0, z1, 3), + z0 = svdiv_x (p0, z1, 3)) + +/* +** div_maxpow_u32_x_tied1: +** lsr z0\.s, z0\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_x_tied1, svuint32_t, + z0 = svdiv_n_u32_x (p0, z0, MAXPOW), + z0 = svdiv_x (p0, z0, MAXPOW)) + +/* +** div_maxpow_u32_x_untied: +** lsr z0\.s, z1\.s, #31 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u32_x_untied, svuint32_t, + z0 = svdiv_n_u32_x (p0, z1, MAXPOW), + z0 = svdiv_x (p0, z1, MAXPOW)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c index ac7c026eea37..5247ebdac7ae 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c @@ -2,6 +2,8 @@ #include "test_sve_acle.h" +#define MAXPOW 1ULL<<63 + /* ** div_u64_m_tied1: ** udiv z0\.d, p0/m, z0\.d, z1\.d @@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t, z0 = svdiv_n_u64_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_1_u64_m_tied1: +** sel z0\.d, p0, z0\.d, z0\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t, + z0 = svdiv_n_u64_m (p0, z0, 1), + z0 = svdiv_m (p0, z0, 1)) + +/* +** div_1_u64_m_untied: +** sel z0\.d, p0, z1\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_u64_m_untied, svuint64_t, + z0 = svdiv_n_u64_m (p0, z1, 1), + z0 = svdiv_m (p0, z1, 1)) + /* ** div_2_u64_m_tied1: -** mov (z[0-9]+\.d), #2 -** udiv z0\.d, p0/m, z0\.d, \1 +** lsr z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t, @@ -65,15 +84,54 @@ TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t, /* ** div_2_u64_m_untied: -** mov (z[0-9]+\.d), #2 ** movprfx z0, z1 -** udiv z0\.d, p0/m, z0\.d, \1 +** lsr z0\.d, p0/m, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t, z0 = svdiv_n_u64_m (p0, z1, 2), z0 = svdiv_m (p0, z1, 2)) +/* +** div_3_u64_m_tied1: +** mov (z[0-9]+\.d), #3 +** udiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u64_m_tied1, svuint64_t, + z0 = svdiv_n_u64_m (p0, z0, 3), + z0 = svdiv_m (p0, z0, 3)) + +/* +** div_3_u64_m_untied: +** mov (z[0-9]+\.d), #3 +** movprfx z0, z1 +** udiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u64_m_untied, svuint64_t, + z0 = svdiv_n_u64_m (p0, z1, 3), + z0 = svdiv_m (p0, z1, 3)) + +/* +** div_maxpow_u64_m_tied1: +** lsr z0\.d, p0/m, z0\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_m_tied1, svuint64_t, + z0 = svdiv_n_u64_m (p0, z0, MAXPOW), + z0 = svdiv_m (p0, z0, MAXPOW)) + +/* +** div_maxpow_u64_m_untied: +** movprfx z0, z1 +** lsr z0\.d, p0/m, z0\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_m_untied, svuint64_t, + z0 = svdiv_n_u64_m (p0, z1, MAXPOW), + z0 = svdiv_m (p0, z1, MAXPOW)) + /* ** div_u64_z_tied1: ** movprfx z0\.d, p0/z, z0\.d @@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, uint64_t, z0 = svdiv_z (p0, z1, x0)) /* -** div_2_u64_z_tied1: -** mov (z[0-9]+\.d), #2 +** div_1_u64_z_tied1: +** mov (z[0-9]+\.d), #1 ** movprfx z0\.d, p0/z, z0\.d ** udiv z0\.d, p0/m, z0\.d, \1 ** ret */ +TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t, + z0 = svdiv_n_u64_z (p0, z0, 1), + z0 = svdiv_z (p0, z0, 1)) + +/* +** div_1_u64_z_untied: +** mov z0\.d, #1 +** movprfx z0\.d, p0/z, z0\.d +** udivr z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_u64_z_untied, svuint64_t, + z0 = svdiv_n_u64_z (p0, z1, 1), + z0 = svdiv_z (p0, z1, 1)) + +/* +** div_2_u64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** lsr z0\.d, p0/m, z0\.d, #1 +** ret +*/ TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t, z0 = svdiv_n_u64_z (p0, z0, 2), z0 = svdiv_z (p0, z0, 2)) /* ** div_2_u64_z_untied: -** mov (z[0-9]+\.d), #2 +** movprfx z0\.d, p0/z, z1\.d +** lsr z0\.d, p0/m, z0\.d, #1 +** ret +*/ +TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t, + z0 = svdiv_n_u64_z (p0, z1, 2), + z0 = svdiv_z (p0, z1, 2)) + +/* +** div_3_u64_z_tied1: +** mov (z[0-9]+\.d), #3 +** movprfx z0\.d, p0/z, z0\.d +** udiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u64_z_tied1, svuint64_t, + z0 = svdiv_n_u64_z (p0, z0, 3), + z0 = svdiv_z (p0, z0, 3)) + +/* +** div_3_u64_z_untied: +** mov (z[0-9]+\.d), #3 ** ( ** movprfx z0\.d, p0/z, z1\.d ** udiv z0\.d, p0/m, z0\.d, \1 @@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t, ** ) ** ret */ -TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t, - z0 = svdiv_n_u64_z (p0, z1, 2), - z0 = svdiv_z (p0, z1, 2)) +TEST_UNIFORM_Z (div_3_u64_z_untied, svuint64_t, + z0 = svdiv_n_u64_z (p0, z1, 3), + z0 = svdiv_z (p0, z1, 3)) + +/* +** div_maxpow_u64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** lsr z0\.d, p0/m, z0\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_z_tied1, svuint64_t, + z0 = svdiv_n_u64_z (p0, z0, MAXPOW), + z0 = svdiv_z (p0, z0, MAXPOW)) + +/* +** div_maxpow_u64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** lsr z0\.d, p0/m, z0\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_z_untied, svuint64_t, + z0 = svdiv_n_u64_z (p0, z1, MAXPOW), + z0 = svdiv_z (p0, z1, MAXPOW)) /* ** div_u64_x_tied1: @@ -216,10 +336,26 @@ TEST_UNIFORM_ZX (div_x0_u64_x_untied, svuint64_t, uint64_t, z0 = svdiv_n_u64_x (p0, z1, x0), z0 = svdiv_x (p0, z1, x0)) +/* +** div_1_u64_x_tied1: +** ret +*/ +TEST_UNIFORM_Z (div_1_u64_x_tied1, svuint64_t, + z0 = svdiv_n_u64_x (p0, z0, 1), + z0 = svdiv_x (p0, z0, 1)) + +/* +** div_1_u64_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_1_u64_x_untied, svuint64_t, + z0 = svdiv_n_u64_x (p0, z1, 1), + z0 = svdiv_x (p0, z1, 1)) + /* ** div_2_u64_x_tied1: -** mov (z[0-9]+\.d), #2 -** udiv z0\.d, p0/m, z0\.d, \1 +** lsr z0\.d, z0\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t, @@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t, /* ** div_2_u64_x_untied: -** mov z0\.d, #2 -** udivr z0\.d, p0/m, z0\.d, z1\.d +** lsr z0\.d, z1\.d, #1 ** ret */ TEST_UNIFORM_Z (div_2_u64_x_untied, svuint64_t, z0 = svdiv_n_u64_x (p0, z1, 2), z0 = svdiv_x (p0, z1, 2)) + +/* +** div_3_u64_x_tied1: +** mov (z[0-9]+\.d), #3 +** udiv z0\.d, p0/m, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (div_3_u64_x_tied1, svuint64_t, + z0 = svdiv_n_u64_x (p0, z0, 3), + z0 = svdiv_x (p0, z0, 3)) + +/* +** div_3_u64_x_untied: +** mov z0\.d, #3 +** udivr z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (div_3_u64_x_untied, svuint64_t, + z0 = svdiv_n_u64_x (p0, z1, 3), + z0 = svdiv_x (p0, z1, 3)) + +/* +** div_maxpow_u64_x_tied1: +** lsr z0\.d, z0\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_x_tied1, svuint64_t, + z0 = svdiv_n_u64_x (p0, z0, MAXPOW), + z0 = svdiv_x (p0, z0, MAXPOW)) + +/* +** div_maxpow_u64_x_untied: +** lsr z0\.d, z1\.d, #63 +** ret +*/ +TEST_UNIFORM_Z (div_maxpow_u64_x_untied, svuint64_t, + z0 = svdiv_n_u64_x (p0, z1, MAXPOW), + z0 = svdiv_x (p0, z1, MAXPOW)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c new file mode 100644 index 000000000000..c96bb2763dce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c @@ -0,0 +1,91 @@ +/* { dg-do run { target aarch64_sve128_hw } } */ +/* { dg-options "-O2 -msve-vector-bits=128" } */ + +#include <arm_sve.h> +#include <stdint.h> + +typedef svbool_t pred __attribute__((arm_sve_vector_bits(128))); +typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128))); +typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128))); +typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128))); +typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128))); +typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128))); +typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128))); +typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); + +#define F(T, TS, P, OP1, OP2) \ +{ \ + T##_t op1 = (T##_t) OP1; \ + T##_t op2 = (T##_t) OP2; \ + sv##T##_ res = svdiv_##P (pg, svdup_##TS (op1), svdup_##TS (op2)); \ + sv##T##_ exp = svdup_##TS (op1 / op2); \ + if (svptest_any (pg, svcmpne (pg, exp, res))) \ + __builtin_abort (); \ + \ + sv##T##_ res_n = svdiv_##P (pg, svdup_##TS (op1), op2); \ + if (svptest_any (pg, svcmpne (pg, exp, res_n))) \ + __builtin_abort (); \ +} + +#define TEST_TYPES_1(T, TS) \ + F (T, TS, m, 79, 16) \ + F (T, TS, z, 79, 16) \ + F (T, TS, x, 79, 16) + +#define TEST_TYPES \ + TEST_TYPES_1 (float16, f16) \ + TEST_TYPES_1 (float32, f32) \ + TEST_TYPES_1 (float64, f64) \ + TEST_TYPES_1 (int32, s32) \ + TEST_TYPES_1 (int64, s64) \ + TEST_TYPES_1 (uint32, u32) \ + TEST_TYPES_1 (uint64, u64) + +#define TEST_VALUES_S_1(B, OP1, OP2) \ + F (int##B, s##B, x, OP1, OP2) + +#define TEST_VALUES_S \ + TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \ + TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN) \ + TEST_VALUES_S_1 (32, -7, 4) \ + TEST_VALUES_S_1 (64, -7, 4) \ + TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30)) \ + TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62)) \ + TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30)) \ + TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62)) \ + TEST_VALUES_S_1 (32, INT32_MAX, 1) \ + TEST_VALUES_S_1 (64, INT64_MAX, 1) \ + TEST_VALUES_S_1 (32, INT32_MIN, 16) \ + TEST_VALUES_S_1 (64, INT64_MIN, 16) \ + TEST_VALUES_S_1 (32, INT32_MAX, -5) \ + TEST_VALUES_S_1 (64, INT64_MAX, -5) \ + TEST_VALUES_S_1 (32, INT32_MIN, -4) \ + TEST_VALUES_S_1 (64, INT64_MIN, -4) + +#define TEST_VALUES_U_1(B, OP1, OP2) \ + F (uint##B, u##B, x, OP1, OP2) + +#define TEST_VALUES_U \ + TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX) \ + TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX) \ + TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31)) \ + TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63)) \ + TEST_VALUES_U_1 (32, 7, 4) \ + TEST_VALUES_U_1 (64, 7, 4) \ + TEST_VALUES_U_1 (32, 7, 3) \ + TEST_VALUES_U_1 (64, 7, 3) \ + TEST_VALUES_U_1 (32, 11, 1) \ + TEST_VALUES_U_1 (64, 11, 1) + +#define TEST_VALUES \ + TEST_VALUES_S \ + TEST_VALUES_U + +int +main (void) +{ + const pred pg = svptrue_b8 (); + TEST_TYPES + TEST_VALUES + return 0; +} -- GitLab