From 065be0ffbcd676b635d492f4679e635b6ece4fe4 Mon Sep 17 00:00:00 2001
From: Matevos Mehrabyan <matevosmehrabyan@gmail.com>
Date: Fri, 28 Apr 2023 14:01:30 -0600
Subject: [PATCH] RISC-V: Add divmod expansion support

Hi all,
If we have division and remainder calculations with the same operands:

  a = b / c;
  d = b % c;

We can replace the calculation of remainder with multiplication +
subtraction, using the result from the previous division:

  a = b / c;
  d = a * c;
  d = b - d;

Which will be faster.
Currently, it isn't done for RISC-V.

I've added an expander for DIVMOD which replaces 'rem' with 'mul + sub'.

Best regards,
Matevos.

gcc/ChangeLog:

	* config/riscv/iterators.md (only_div, paired_mod): New iterators.
	(u): Add div/udiv cases.
	* config/riscv/riscv-protos.h (riscv_use_divmod_expander): Prototype.
	* config/riscv/riscv.cc (struct riscv_tune_param): Add field for
	divmod expansion.
	(rocket_tune_info, sifive_7_tune_info): Initialize new field.
	(thead_c906_tune_info): Likewise.
	(optimize_size_tune_info): Likewise.
	(riscv_use_divmod_expander): New function.
	* config/riscv/riscv.md (<u>divmod<mode>4): New expander.

gcc/testsuite/ChangeLog:
	* gcc.target/riscv/divmod-1.c: New testcase.
	* gcc.target/riscv/divmod-2.c: New testcase.
---
 gcc/config/riscv/iterators.md             |  6 ++++++
 gcc/config/riscv/riscv-protos.h           |  1 +
 gcc/config/riscv/riscv.cc                 | 15 +++++++++++++++
 gcc/config/riscv/riscv.md                 | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/riscv/divmod-1.c | 11 +++++++++++
 gcc/testsuite/gcc.target/riscv/divmod-2.c | 16 ++++++++++++++++
 6 files changed, 65 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/divmod-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/divmod-2.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 9b7670384523..1d56324df03a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -152,6 +152,11 @@
 ;; from the same template.
 (define_code_iterator any_mod [mod umod])
 
+;; These code iterators allow unsigned and signed divmod to be generated
+;; from the same template.
+(define_code_iterator only_div [div udiv])
+(define_code_attr paired_mod [(div "mod") (udiv "umod")])
+
 ;; These code iterators allow the signed and unsigned scc operations to use
 ;; the same template.
 (define_code_iterator any_gt [gt gtu])
@@ -181,6 +186,7 @@
 		     (lt "") (ltu "u")
 		     (le "") (leu "u")
 		     (fix "") (unsigned_fix "u")
+		     (div "") (udiv "u")
 		     (float "") (unsigned_float "u")])
 
 ;; <su> is like <u>, but the signed form expands to "s" rather than "".
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f87661bde2cf..5a927bdf1b06 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -239,4 +239,5 @@ extern const char*
 th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE);
 #endif
 
+extern bool riscv_use_divmod_expander (void);
 #endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1529855a2b4c..09a30dc260fc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -236,6 +236,7 @@ struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool use_divmod_expansion;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -323,6 +324,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   5,						/* memory_cost */
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* use_divmod_expansion */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -337,6 +339,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   3,						/* memory_cost */
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* use_divmod_expansion */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -351,6 +354,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   5,            /* memory_cost */
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
+  false		/* use_divmod_expansion */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -365,6 +369,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   2,						/* memory_cost */
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
+  false,					/* use_divmod_expansion */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -7210,6 +7215,16 @@ riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
 						  gen_lowpart (QImode, shift)));
 }
 
+/* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
+   allows the behavior to be tuned for specific implementations as well as
+   when optimizing for size.  */
+
+bool
+riscv_use_divmod_expander (void)
+{
+  return tune_param->use_divmod_expansion;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 376a8831820e..c508ee3ad89f 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1063,6 +1063,22 @@
   [(set_attr "type" "idiv")
    (set_attr "mode" "DI")])
 
+(define_expand "<u>divmod<mode>4"
+  [(parallel
+     [(set (match_operand:GPR 0 "register_operand")
+           (only_div:GPR (match_operand:GPR 1 "register_operand")
+                         (match_operand:GPR 2 "register_operand")))
+      (set (match_operand:GPR 3 "register_operand")
+           (<paired_mod>:GPR (match_dup 1) (match_dup 2)))])]
+  "TARGET_DIV && riscv_use_divmod_expander ()"
+  {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_<u>div<GPR:mode>3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_mul<GPR:mode>3 (tmp, operands[0], operands[2]));
+      emit_insn (gen_sub<GPR:mode>3 (operands[3], operands[1], tmp));
+      DONE;
+  })
+
 (define_insn "*<optab>si3_extended"
   [(set (match_operand:DI                 0 "register_operand" "=r")
 	(sign_extend:DI
diff --git a/gcc/testsuite/gcc.target/riscv/divmod-1.c b/gcc/testsuite/gcc.target/riscv/divmod-1.c
new file mode 100644
index 000000000000..9706611b500c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/divmod-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+void
+foo(int a, int b, int *c, int *d)
+{
+   *c = a / b;
+   *d = a % b;
+}
+
+/* { dg-final { scan-assembler-times "\tdiv" 1 } } */
+/* { dg-final { scan-assembler-times "\trem" 1} } */
diff --git a/gcc/testsuite/gcc.target/riscv/divmod-2.c b/gcc/testsuite/gcc.target/riscv/divmod-2.c
new file mode 100644
index 000000000000..dfd319e52c01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/divmod-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* Skip this everywhere for now.  Once we have a target with
+   divmod enabled, only skip for -O0, -O1, -Og, -Oz, -Os.  */
+/* { dg-skip-if "" { *-*-* } { } } */
+
+void
+foo(int a, int b, int *c, int *d)
+{
+   *c = a / b;
+   *d = a % b;
+}
+
+/* { dg-final { scan-assembler-not "\trem" } } */
+/* { dg-final { scan-assembler-times "\tdiv" 1 } } */
+/* { dg-final { scan-assembler-times "\tmul" 1 } } */
+/* { dg-final { scan-assembler-times "\tsub" 1 } } */
-- 
GitLab