From ff596ca15ca105fc571e95cdf35c074cb4f011d4 Mon Sep 17 00:00:00 2001
From: Oleg Endo <olegendo@gcc.gnu.org>
Date: Tue, 4 Sep 2012 08:03:01 +0000
Subject: [PATCH] re PR target/51244 ([SH] Inefficient conditional branch and
 code around T bit)

	PR target/51244
	* config/sh/sh.c (prepare_cbranch_operands): Pull out comparison
	canonicalization code into...
	* (sh_canonicalize_comparison): This new function.
	* config/sh/sh-protos.h: Declare it.
	* config/sh/sh.h: Use it in new macro CANONICALIZE_COMPARISON.
	* config/sh/sh.md (cbranchsi4): Remove TARGET_CBRANCHDI4 check and
	always invoke expand_cbranchsi4.

From-SVN: r190909
---
 gcc/ChangeLog             |  11 +++
 gcc/config/sh/sh-protos.h |   3 +
 gcc/config/sh/sh.c        | 160 ++++++++++++++++++++++++++------------
 gcc/config/sh/sh.h        |   4 +
 gcc/config/sh/sh.md       |   5 +-
 5 files changed, 131 insertions(+), 52 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a433bad52a99..059967a8c3b7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2012-09-04  Oleg Endo  <olegendo@gcc.gnu.org>
+
+	PR target/51244
+	* config/sh/sh.c (prepare_cbranch_operands): Pull out comparison
+	canonicalization code into...
+	* (sh_canonicalize_comparison): This new function.
+	* config/sh/sh-protos.h: Declare it.
+	* config/sh/sh.h: Use it in new macro CANONICALIZE_COMPARISON.
+	* config/sh/sh.md (cbranchsi4): Remove TARGET_CBRANCHDI4 check and
+	always invoke expand_cbranchsi4.
+
 2012-09-03  Andi Kleen <ak@linux.intel.com>
 
 	* tree-ssa-sccvn.c (vn_reference_fold_indirect): Initialize
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 8cc5cc6b4888..2bb318b1bc32 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -106,6 +106,9 @@ extern bool sh_expand_t_scc (rtx *);
 extern rtx sh_gen_truncate (enum machine_mode, rtx, int);
 extern bool sh_vector_mode_supported_p (enum machine_mode);
 extern bool sh_cfun_trap_exit_p (void);
+extern void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
+					enum machine_mode mode = VOIDmode);
+
 #endif /* RTX_CODE */
 
 extern const char *output_jump_label_table (void);
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 3851ec6d50f3..5055d1ff929e 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -21,6 +21,12 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* FIXME: This is a temporary hack, so that we can include <algorithm>
+   below.  <algorithm> will try to include <cstdlib> which will reference
+   malloc & co, which are poisoned by "system.h".  The proper solution is
+   to include <cstdlib> in "system.h" instead of <stdlib.h>.  */
+#include <cstdlib>
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -56,6 +62,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm-constrs.h"
 #include "opts.h"
 
+#include <algorithm>
 
 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
 
@@ -1791,65 +1798,124 @@ prepare_move_operands (rtx operands[], enum machine_mode mode)
     }
 }
 
-enum rtx_code
-prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
-			  enum rtx_code comparison)
+/* Implement the CANONICALIZE_COMPARISON macro for the combine pass.
+   This function is also re-used to canonicalize comparisons in cbranch
+   pattern expanders.  */
+void
+sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
+			    enum machine_mode mode)
 {
-  rtx op1;
-  rtx scratch = NULL_RTX;
+  /* When invoked from within the combine pass the mode is not specified,
+     so try to get it from one of the operands.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
 
-  if (comparison == LAST_AND_UNUSED_RTX_CODE)
-    comparison = GET_CODE (operands[0]);
-  else
-    scratch = operands[4];
-  if (CONST_INT_P (operands[1])
-      && !CONST_INT_P (operands[2]))
-    {
-      rtx tmp = operands[1];
+  // We need to have a mode to do something useful here.
+  if (mode == VOIDmode)
+    return;
+
+  // Currently, we don't deal with floats here.
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return;
 
-      operands[1] = operands[2];
-      operands[2] = tmp;
-      comparison = swap_condition (comparison);
+  // Make sure that the constant operand is the second operand.
+  if (CONST_INT_P (op0) && !CONST_INT_P (op1))
+    {
+      std::swap (op0, op1);
+      cmp = swap_condition (cmp);
     }
-  if (CONST_INT_P (operands[2]))
+
+  if (CONST_INT_P (op1))
     {
-      HOST_WIDE_INT val = INTVAL (operands[2]);
-      if ((val == -1 || val == -0x81)
-	  && (comparison == GT || comparison == LE))
+      /* Try to adjust the constant operand in such a way that available
+         comparison insns can be utilized better and the constant can be
+         loaded with a 'mov #imm,Rm' insn.  This avoids a load from the
+         constant pool.  */
+      const HOST_WIDE_INT val = INTVAL (op1);
+
+      /* x > -1		  --> x >= 0
+	 x > 0xFFFFFF7F	  --> x >= 0xFFFFFF80
+	 x <= -1	  --> x < 0
+	 x <= 0xFFFFFF7F  --> x < 0xFFFFFF80  */
+      if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
+	{
+	  cmp = cmp == GT ? GE : LT;
+	  op1 = gen_int_mode (val + 1, mode);
+        }
+
+      /* x >= 1     --> x > 0
+	 x >= 0x80  --> x > 0x7F
+	 x < 1      --> x <= 0
+	 x < 0x80   --> x <= 0x7F  */
+      else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
 	{
-	  comparison = (comparison == GT) ? GE : LT;
-	  operands[2] = gen_int_mode (val + 1, mode);
+	  cmp = cmp == GE ? GT : LE;
+	  op1 = gen_int_mode (val - 1, mode);
 	}
-      else if ((val == 1 || val == 0x80)
-	       && (comparison == GE || comparison == LT))
+
+      /* unsigned x >= 1  --> x != 0
+	 unsigned x < 1   --> x == 0  */
+      else if (val == 1 && (cmp == GEU || cmp == LTU))
 	{
-	  comparison = (comparison == GE) ? GT : LE;
-	  operands[2] = gen_int_mode (val - 1, mode);
+	  cmp = cmp == GEU ? NE : EQ;
+	  op1 = CONST0_RTX (mode);
 	}
-      else if (val == 1 && (comparison == GEU || comparison == LTU))
+
+      /* unsigned x >= 0x80  --> unsigned x > 0x7F
+	 unsigned x < 0x80   --> unsigned x < 0x7F  */
+      else if (val == 0x80 && (cmp == GEU || cmp == LTU))
 	{
-	  comparison = (comparison == GEU) ? NE : EQ;
-	  operands[2] = CONST0_RTX (mode);
+	  cmp = cmp == GEU ? GTU : LEU;
+	  op1 = gen_int_mode (val - 1, mode);
 	}
-      else if (val == 0x80 && (comparison == GEU || comparison == LTU))
+
+      /* unsigned x > 0   --> x != 0
+	 unsigned x <= 0  --> x == 0  */
+      else if (val == 0 && (cmp == GTU || cmp == LEU))
+	cmp = cmp == GTU ? NE : EQ;
+
+      /* unsigned x > 0x7FFFFFFF   --> signed x < 0
+	 unsigned x <= 0x7FFFFFFF  --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GTU || cmp == LEU)
+	       && val == 0x7FFFFFFF)
 	{
-	  comparison = (comparison == GEU) ? GTU : LEU;
-	  operands[2] = gen_int_mode (val - 1, mode);
+	  cmp = cmp == GTU ? LT : GE;
+	  op1 = const0_rtx;
 	}
-      else if (val == 0 && (comparison == GTU || comparison == LEU))
-	comparison = (comparison == GTU) ? NE : EQ;
-      else if (mode == SImode
-	       && ((val == 0x7fffffff
-		    && (comparison == GTU || comparison == LEU))
-		   || ((unsigned HOST_WIDE_INT) val
-			== (unsigned HOST_WIDE_INT) 0x7fffffff + 1
-		       && (comparison == GEU || comparison == LTU))))
+
+      /* unsigned x >= 0x80000000  --> signed x < 0
+	 unsigned x < 0x80000000   --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GEU || cmp == LTU)
+	       && (unsigned HOST_WIDE_INT)val
+		   == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
 	{
-	  comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
-	  operands[2] = CONST0_RTX (mode);
+	  cmp = cmp == GEU ? LT : GE;
+	  op1 = const0_rtx;
 	}
     }
-  op1 = operands[1];
+}
+
+enum rtx_code
+prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
+			  enum rtx_code comparison)
+{
+  /* The scratch reg is only available when this is invoked from within
+     the cbranchdi4_i splitter, through expand_cbranchdi4.  */
+  rtx scratch = NULL_RTX;
+
+  if (comparison == LAST_AND_UNUSED_RTX_CODE)
+    comparison = GET_CODE (operands[0]);
+  else
+    scratch = operands[4];
+
+  sh_canonicalize_comparison (comparison, operands[1], operands[2], mode);
+
+  /* Notice that this function is also invoked after reload by
+     the cbranchdi4_i pattern, through expand_cbranchdi4.  */
+  rtx op1 = operands[1];
+
   if (can_create_pseudo_p ())
     operands[1] = force_reg (mode, op1);
   /* When we are handling DImode comparisons, we want to keep constants so
@@ -1883,8 +1949,6 @@ void
 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
 {
   rtx (*branch_expander) (rtx, rtx) = gen_branch_true;
-  rtx jump;
-
   comparison = prepare_cbranch_operands (operands, SImode, comparison);
   switch (comparison)
     {
@@ -1896,10 +1960,9 @@ expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
   emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
                           gen_rtx_fmt_ee (comparison, SImode,
                                           operands[1], operands[2])));
-  jump = emit_jump_insn (branch_expander (operands[3], get_t_reg_rtx ()));
+  rtx jump = emit_jump_insn (branch_expander (operands[3], get_t_reg_rtx ()));
   if (probability >= 0)
     add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
-
 }
 
 /* ??? How should we distribute probabilities when more than one branch
@@ -1956,8 +2019,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
       lsw_taken = EQ;
       if (prob >= 0)
 	{
-	  /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
-	   */
+	  // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
 	  msw_skip_prob = rev_prob;
 	  if (REG_BR_PROB_BASE <= 65535)
 	    lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index af7fe0bf545e..b36287276aa5 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1946,6 +1946,10 @@ struct sh_args {
    leave this zero for correct SH3 code.  */
 #define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A)
 
+/* CANONICALIZE_COMPARISON macro for the combine pass.  */
+#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
+  sh_canonicalize_comparison ((CODE), (OP0), (OP1))
+
 /* All integers have the same format so truncation is easy.  */
 /* But SHmedia must sign-extend DImode when truncating to SImode.  */
 #define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d69d2947ebbd..8b44fbda4965 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -881,10 +881,9 @@
   if (TARGET_SHMEDIA)
     emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
 					   operands[2], operands[3]));
-  else if (TARGET_CBRANCHDI4)
-    expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
   else
-    sh_emit_compare_and_branch (operands, SImode);
+    expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
+
   DONE;
 })
 
-- 
GitLab