From c94adf02d31028a25bb7b20ec77aade9d502430b Mon Sep 17 00:00:00 2001 From: Soumya AR <soumyaa@nvidia.com> Date: Tue, 8 Oct 2024 14:37:24 +0100 Subject: [PATCH] aarch64: Expand CTZ to RBIT + CLZ for SVE [PR109498] Currently, we vectorize CTZ for SVE by using the following operation: .CTZ (X) = (PREC - 1) - .CLZ (X & -X) Instead, this patch expands CTZ to RBIT + CLZ for SVE, as suggested in PR109498. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR <soumyaa@nvidia.com> gcc/ChangeLog: PR target/109498 * config/aarch64/aarch64-sve.md (ctz<mode>2): Added pattern to expand CTZ to RBIT + CLZ for SVE. gcc/testsuite/ChangeLog: PR target/109498 * gcc.target/aarch64/sve/ctz.c: New test. --- gcc/config/aarch64/aarch64-sve.md | 17 ++++++++ gcc/testsuite/gcc.target/aarch64/sve/ctz.c | 49 ++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/ctz.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 90db51e51b9d..06bd3e4bb2c0 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3088,6 +3088,23 @@ ;; - NOT ;; ------------------------------------------------------------------------- +(define_expand "ctz<mode>2" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_dup 2) + (ctz:SVE_I + (match_operand:SVE_I 1 "register_operand"))] + UNSPEC_PRED_X))] + "TARGET_SVE" + { + rtx pred = aarch64_ptrue_reg (<VPRED>mode); + rtx temp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1])); + emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp)); + DONE; + } +) + ;; Unpredicated integer unary arithmetic. (define_expand "<optab><mode>2" [(set (match_operand:SVE_I 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ctz.c b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c new file mode 100644 index 000000000000..433a9174f487 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c @@ -0,0 +1,49 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */ + +#include <stdint.h> + +#define FUNC(FUNCTION, NAME, DTYPE) \ +void \ +NAME (DTYPE *__restrict x, DTYPE *__restrict y, int n) { \ + for (int i = 0; i < n; i++) \ + x[i] = FUNCTION (y[i]); \ +} \ + + +/* +** ctz_uint8: +** ... +** rbit z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b +** clz z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b +** ... +*/ +FUNC (__builtin_ctzg, ctz_uint8, uint8_t) + +/* +** ctz_uint16: +** ... +** rbit z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h +** clz z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h +** ... +*/ +FUNC (__builtin_ctzg, ctz_uint16, uint16_t) + +/* +** ctz_uint32: +** ... +** rbit z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s +** clz z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s +** ... +*/ +FUNC (__builtin_ctz, ctz_uint32, uint32_t) + +/* +** ctz_uint64: +** ... +** rbit z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d +** clz z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d +** ... +*/ +FUNC (__builtin_ctzll, ctz_uint64, uint64_t) + -- GitLab