From b381e84c4ecd72029effe1f8919d3aaea0fbd86b Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 24 Oct 2024 14:22:31 +0100
Subject: [PATCH] Simplify (X /[ex] C1) * (C1 * C2) -> X * C2

gcc/
	* match.pd: Simplify (X /[ex] C1) * (C1 * C2) -> X * C2.

gcc/testsuite/
	* gcc.dg/tree-ssa/mulexactdiv-1.c: New test.
	* gcc.dg/tree-ssa/mulexactdiv-2.c: Likewise.
	* gcc.dg/tree-ssa/mulexactdiv-3.c: Likewise.
	* gcc.dg/tree-ssa/mulexactdiv-4.c: Likewise.
	* gcc.target/aarch64/sve/cnt_fold_1.c: Likewise.
	* gcc.target/aarch64/sve/cnt_fold_2.c: Likewise.
---
 gcc/match.pd                                  |   8 ++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c |  23 ++++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c |  19 +++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c |  21 ++++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c |  14 +++
 .../gcc.target/aarch64/sve/cnt_fold_1.c       | 110 ++++++++++++++++++
 .../gcc.target/aarch64/sve/cnt_fold_2.c       |  55 +++++++++
 7 files changed, 250 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 2e7f06ecbe45..e3dabdc15d5f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
    zerop
    initializer_each_zero_or_onep
    CONSTANT_CLASS_P
+   poly_int_tree_p
    tree_expr_nonnegative_p
    tree_expr_nonzero_p
    integer_valued_real_p
@@ -5471,6 +5472,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (mult (convert1? (exact_div @0 @@1)) (convert2? @1))
   (convert @0))
 
+/* (X /[ex] C1) * (C1 * C2) -> X * C2.  */
+(simplify
+ (mult (convert? (exact_div @0 INTEGER_CST@1)) poly_int_tree_p@2)
+ (with { poly_widest_int factor; }
+  (if (multiple_p (wi::to_poly_widest (@2), wi::to_widest (@1), &factor))
+   (mult (convert @0) { wide_int_to_tree (type, factor); }))))
+
 /* Simplify (A / B) * B + (A % B) -> A.  */
 (for div (trunc_div ceil_div floor_div round_div)
      mod (trunc_mod ceil_mod floor_mod round_mod)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
new file mode 100644
index 000000000000..fa853eb7dffc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-1.c
@@ -0,0 +1,23 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+#define TEST_CMP(FN, DIV, MUL)			\
+  int						\
+  FN (int x)   					\
+  {						\
+    if (x & 7)					\
+      __builtin_unreachable ();			\
+    x /= DIV;					\
+    return x * MUL;				\
+  }
+
+TEST_CMP (f1, 2, 6)
+TEST_CMP (f2, 2, 10)
+TEST_CMP (f3, 4, 80)
+TEST_CMP (f4, 8, 200)
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr, } "optimized" } } */
+/* { dg-final { scan-tree-dump-not {<rshift_expr, } "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 20,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 25,} "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
new file mode 100644
index 000000000000..9df49690ab64
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-2.c
@@ -0,0 +1,19 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+#define TEST_CMP(FN, DIV, MUL)			\
+  int						\
+  FN (int x)   					\
+  {						\
+    if (x & 7)					\
+      __builtin_unreachable ();			\
+    x /= DIV;					\
+    return x * MUL;				\
+  }
+
+TEST_CMP (f1, 2, 1)
+TEST_CMP (f2, 2, 5)
+TEST_CMP (f3, 4, 10)
+TEST_CMP (f4, 8, 100)
+TEST_CMP (f5, 16, 32)
+
+/* { dg-final { scan-tree-dump-times {<[a-z]*_div_expr, } 5 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
new file mode 100644
index 000000000000..38778a0d7a55
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-3.c
@@ -0,0 +1,21 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+#define TEST_CMP(FN, TYPE1, DIV, TYPE2, MUL)	\
+  TYPE2						\
+  FN (TYPE1 x) 					\
+  {						\
+    if (x & 7)					\
+      __builtin_unreachable ();			\
+    x /= (TYPE1) (DIV);				\
+    return (TYPE2) x * (TYPE2) (MUL);		\
+  }
+
+TEST_CMP (f1, int, 2, long, (~0UL >> 1) & -2)
+TEST_CMP (f2, int, 4, unsigned long, -8)
+TEST_CMP (f3, int, 8, unsigned int, -24)
+TEST_CMP (f4, long, 2, int, (~0U >> 1) & -2)
+TEST_CMP (f5, long, 4, unsigned int, 100)
+TEST_CMP (f6, long, 8, unsigned long, 200)
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr, } "optimized" } } */
+/* { dg-final { scan-tree-dump-not {<rshift_expr, } "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
new file mode 100644
index 000000000000..b641c0bff6d6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-4.c
@@ -0,0 +1,14 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+int
+f1 (int x)
+{
+  if (x & 15)
+    __builtin_unreachable ();
+  x /= 2;
+  x = (unsigned short) x * 4;
+  return x;
+}
+
+
+/* { dg-final { scan-tree-dump {<exact_div_expr, } "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
new file mode 100644
index 000000000000..afa50f93a386
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_1.c
@@ -0,0 +1,110 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** f1:
+**	cntd	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f1 (int x)
+{
+  if (x & 1)
+    __builtin_unreachable ();
+  x /= 2;
+  return x * svcntw();
+}
+
+/*
+** f2:
+**	cntd	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f2 (int x)
+{
+  if (x & 3)
+    __builtin_unreachable ();
+  x /= 4;
+  return x * svcnth();
+}
+
+/*
+** f3:
+**	cntd	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f3 (int x)
+{
+  if (x & 7)
+    __builtin_unreachable ();
+  x /= 8;
+  return x * svcntb();
+}
+
+/*
+** f4:
+**	cntw	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f4 (int x)
+{
+  if (x & 1)
+    __builtin_unreachable ();
+  x /= 2;
+  return x * svcnth();
+}
+
+/*
+** f5:
+**	cntw	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f5 (int x)
+{
+  if (x & 3)
+    __builtin_unreachable ();
+  x /= 4;
+  return x * svcntb();
+}
+
+/*
+** f6:
+**	cnth	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f6 (int x)
+{
+  if (x & 1)
+    __builtin_unreachable ();
+  x /= 2;
+  return x * svcntb();
+}
+
+/*
+** f7:
+**	cntb	x([0-9]+)
+**	mul	w0, (w0, w\1|w\1, w0)
+**	ret
+*/
+int
+f7 (int x)
+{
+  if (x & 15)
+    __builtin_unreachable ();
+  x /= 16;
+  return x * svcntb() * 16;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
new file mode 100644
index 000000000000..7412b7b964eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_2.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+int
+f1 (int x)
+{
+  x /= 2;
+  return x * svcntw();
+}
+
+int
+f2 (int x)
+{
+  x /= 4;
+  return x * svcnth();
+}
+
+int
+f3 (int x)
+{
+  x /= 8;
+  return x * svcntb();
+}
+
+int
+f4 (int x)
+{
+  x /= 2;
+  return x * svcnth();
+}
+
+int
+f5 (int x)
+{
+  x /= 4;
+  return x * svcntb();
+}
+
+int
+f6 (int x)
+{
+  x /= 2;
+  return x * svcntb();
+}
+
+int
+f7 (int x)
+{
+  x /= 16;
+  return x * svcntb() * 16;
+}
+
+/* { dg-final { scan-assembler-times {\tasr\t} 7 } } */
-- 
GitLab