diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 6a20a613f8321adcc8aeff00266bf2c014c2a975..ddd6e466ee3a0880639dc71094437f2cf7fa8dc6 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -417,6 +417,34 @@ public: class svsra_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { + /* Fold to svlsr/svasr if op1 is all zeros. */ + tree op1 = gimple_call_arg (f.call, 0); + if (!integer_zerop (op1)) + return NULL; + function_instance instance ("svlsr", functions::svlsr, + shapes::binary_uint_opt_n, MODE_n, + f.type_suffix_ids, GROUP_none, PRED_x); + if (!f.type_suffix (0).unsigned_p) + { + instance.base_name = "svasr"; + instance.base = functions::svasr; + } + gcall *call = f.redirect_call (instance); + /* Add a ptrue as predicate, because unlike svsra, svlsr/svasr are + predicated intrinsics. */ + gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); + /* For svsra, the shift amount (imm3) is uint64_t for all function types, + but for svlsr/svasr, imm3 has the same width as the function type. */ + tree imm3 = gimple_call_arg (f.call, 2); + tree imm3_prec = wide_int_to_tree (f.scalar_type (0), + wi::to_widest (imm3)); + gimple_call_set_arg (call, 2, imm3_prec); + return call; + } public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c index ac992dc7b1c6b9144dde85c08a69187d0003e16f..86cf4bd8137f39911391b86df9ec67eddcf69758 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_32_s32_tied2, svint32_t, TEST_UNIFORM_Z (sra_32_s32_untied, svint32_t, z0 = svsra_n_s32 (z1, z2, 32), z0 = svsra (z1, z2, 32)) + +/* +** sra_2_s32_zeroop1: +** asr z0\.s, z1\.s, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_s32_zeroop1, svint32_t, + z0 = svsra_n_s32 (svdup_s32 (0), z1, 2), + z0 = svsra (svdup_s32 (0), z1, 2)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c index 9ea5657ab88dea927b99918e6f705bb1cf912ae2..7b39798ba1d5b753a23bd83813da583a4301a7d2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_64_s64_tied2, svint64_t, TEST_UNIFORM_Z (sra_64_s64_untied, svint64_t, z0 = svsra_n_s64 (z1, z2, 64), z0 = svsra (z1, z2, 64)) + +/* +** sra_2_s64_zeroop1: +** asr z0\.d, z1\.d, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_s64_zeroop1, svint64_t, + z0 = svsra_n_s64 (svdup_s64 (0), z1, 2), + z0 = svsra (svdup_s64 (0), z1, 2)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c index 090245153f73ae3ecf511b8af37d3a8cebea0217..001e09ca78d783687ea08835896e42ca8f411568 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_32_u32_tied2, svuint32_t, TEST_UNIFORM_Z (sra_32_u32_untied, svuint32_t, z0 = svsra_n_u32 (z1, z2, 32), z0 = svsra (z1, z2, 32)) + +/* +** sra_2_u32_zeroop1: +** lsr z0\.s, z1\.s, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_u32_zeroop1, svuint32_t, + z0 = svsra_n_u32 (svdup_u32 (0), z1, 2), + z0 = svsra (svdup_u32 (0), z1, 2)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u64.c index ff21c368b72a838af753fb63f6bd8397226a66e5..780cf7a7ff683538344e1f419dfe15405791bfdc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u64.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_64_u64_tied2, svuint64_t, TEST_UNIFORM_Z (sra_64_u64_untied, svuint64_t, z0 = svsra_n_u64 (z1, z2, 64), z0 = svsra (z1, z2, 64)) + +/* +** sra_2_u64_zeroop1: +** lsr z0\.d, z1\.d, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_u64_zeroop1, svuint64_t, + z0 = svsra_n_u64 (svdup_u64 (0), z1, 2), + z0 = svsra (svdup_u64 (0), z1, 2))