From 8660aaae7a1320536235fcb7b867a1fa1c76c9aa Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@apple.com>
Date: Fri, 14 Sep 2007 00:06:00 +0000
Subject: [PATCH] dse.c (find_shift_sequence): New function.

2007-09-13  Eric Christopher  <echristo@apple.com>
	    Kenneth Zadeck <zadeck@naturalbridge.com>

	* dse.c (find_shift_sequence): New function.
	(replace_read): Add case to remove read if it requires shift.
	* config/i386/i386.c (ix86_expand_prologue): Fixed typo in comment.



Co-Authored-By: Kenneth Zadeck <zadeck@naturalbridge.com>

From-SVN: r128481
---
 gcc/ChangeLog          |   7 +
 gcc/config/i386/i386.c |   2 +-
 gcc/dse.c              | 289 +++++++++++++++++++++++++++++++++--------
 3 files changed, 240 insertions(+), 58 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e23de7abab35..666d43e2e9a0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2007-09-13  Eric Christopher  <echristo@apple.com>
+	    Kenneth Zadeck <zadeck@naturalbridge.com>
+
+	* dse.c (find_shift_sequence): New function.
+	(replace_read): Add case to remove read if it requires shift.
+	* config/i386/i386.c (ix86_expand_prologue): Fixed typo in comment.
+	
 2007-09-13  Tom Tromey  <tromey@redhat.com>
 
 	* c-common.c (fname_as_string): Update.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0ec93f368045..0d46436c2395 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6384,7 +6384,7 @@ ix86_expand_prologue (void)
         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
     }
 
-  /* Prevent function calls from be scheduled before the call to mcount.
+  /* Prevent function calls from being scheduled before the call to mcount.
      In the pic_reg_used case, make sure that the got load isn't deleted.  */
   if (current_function_profile)
     {
diff --git a/gcc/dse.c b/gcc/dse.c
index 2226ae14dd24..e04e7e820ad9 100644
--- a/gcc/dse.c
+++ b/gcc/dse.c
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "expr.h"
 #include "recog.h"
 #include "dse.h"
+#include "optabs.h"
 #include "dbgcnt.h"
 
 /* This file contains three techniques for performing Dead Store
@@ -1381,6 +1382,112 @@ dump_insn_info (const char * start, insn_info_t insn_info)
 }
 
 
+/* If the modes are different and the value's source and target do not
+   line up, we need to extract the value from lower part of the rhs of
+   the store, shift it, and then put it into a form that can be shoved
+   into the read_insn.  This function generates a right SHIFT of a
+   value that is at least ACCESS_SIZE bytes wide of READ_MODE.  The
+   shift sequence is returned or NULL if we failed to find a
+   shift.  */
+
+static rtx
+find_shift_sequence (rtx read_reg,
+		     int access_size,
+		     store_info_t store_info,
+		     read_info_t read_info,
+		     int shift)
+{
+  enum machine_mode store_mode = GET_MODE (store_info->mem);
+  enum machine_mode read_mode = GET_MODE (read_info->mem);
+
+  /* Some machines like the x86 have shift insns for each size of
+     operand.  Other machines like the ppc or the ia-64 may only have
+     shift insns that shift values within 32 or 64 bit registers.
+     This loop tries to find the smallest shift insn that will right
+     justify the value we want to read but is available in one insn on
+     the machine.  */
+
+  while (access_size < UNITS_PER_WORD)
+    {
+      rtx target;
+      enum machine_mode new_mode
+	= smallest_mode_for_size (access_size * BITS_PER_UNIT,
+				  GET_MODE_CLASS (read_mode));
+      rtx new_reg = gen_reg_rtx (new_mode);
+
+      start_sequence ();
+
+      /* In theory we could also check for an ashr.  Ian Taylor knows
+	 of one dsp where the cost of these two was not the same.  But
+	 this really is a rare case anyway.  */
+      target = expand_binop (new_mode, lshr_optab, new_reg,
+			     GEN_INT (shift), new_reg, 1, OPTAB_DIRECT);
+
+      if (target == new_reg)
+	{
+	  rtx shift_seq = get_insns ();
+	  end_sequence ();
+
+	  /* If cost is too great, set target to NULL and
+	     let the iteration happen. */
+	  if (shift_seq != NULL)
+	    {
+	      int cost = 0;
+	      rtx insn;
+
+	      for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
+		cost += insn_rtx_cost (insn);
+
+	      /* The computation up to here is essentially independent
+		 of the arguments and could be precomputed.  It may
+		 not be worth doing so.  We could precompute if
+		 worthwhile or at least cache the results.  The result
+		 technically depends on SHIFT, ACCESS_SIZE, and
+		 GET_MODE_CLASS (READ_MODE).  But in practice the
+		 answer will depend only on ACCESS_SIZE.  */
+
+	      if (cost <= COSTS_N_INSNS (1))
+		{
+		  /* We found an acceptable shift.  Generate a move to
+		     take the value from the store and put it into the
+		     shift pseudo, then shift it, then generate another
+		     move to put in into the target of the read.  */
+		  start_sequence ();
+		  emit_move_insn (new_reg, gen_lowpart (new_mode, store_info->rhs));
+		  emit_insn (shift_seq);
+		  emit_move_insn (read_reg,  gen_lowpart (read_mode, new_reg));
+		  
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
+			       REGNO (new_reg), GET_MODE_NAME (new_mode),
+			       REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
+		      
+		      fprintf (dump_file, " -- with shift of r%d by %d\n",
+			       REGNO(new_reg), shift);
+		      fprintf (dump_file, " -- and second extract insn r%d:%s = r%d:%s\n",
+			       REGNO (read_reg), GET_MODE_NAME (read_mode),
+			       REGNO (new_reg), GET_MODE_NAME (new_mode));
+		    }
+		  
+		  /* Get the three insn sequence and return it.  */
+		  shift_seq = get_insns ();
+		  end_sequence ();
+		  return shift_seq;
+		}
+	    }
+	}
+      else
+	/* End the sequence.  */
+	end_sequence ();
+
+      access_size = access_size * 2;
+    }
+
+  return NULL;
+}
+
+
 /* Take a sequence of:
      A <- r1
      ...
@@ -1392,7 +1499,23 @@ dump_insn_info (const char * start, insn_info_t insn_info)
    ...
    ... <- r2
 
-   The STORE_INFO and STORE_INFO are for the store and the READ_INFO
+   or
+
+   r3 <- extract (r1)
+   r3 <- r3 >> shift
+   r2 <- extract (r3)
+   ... <- r2
+
+   or
+
+   r2 <- extract (r1)
+   ... <- r2
+
+   Depending on the alignment and the mode of the store and
+   subsequent load.
+
+
+   The STORE_INFO and STORE_INSN are for the store and READ_INFO
    and READ_INSN are for the read.  Return true if the replacement
    went ok.  */
 
@@ -1400,82 +1523,135 @@ static bool
 replace_read (store_info_t store_info, insn_info_t store_insn, 
 	      read_info_t read_info, insn_info_t read_insn, rtx *loc)
 {
+  enum machine_mode store_mode = GET_MODE (store_info->mem);
+  enum machine_mode read_mode = GET_MODE (read_info->mem);
+  int shift;
+  int access_size; /* In bytes.  */
+  rtx read_reg = gen_reg_rtx (read_mode);
+  rtx shift_seq = NULL;
+
   if (!dbg_cnt (dse))
     return false;
 
+  if (GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode))
+    return false;
+
+  /* To get here the read is within the boundaries of the write so
+     shift will never be negative.  Start out with the shift being in
+     bytes.  */
+  if (BYTES_BIG_ENDIAN)
+    shift = store_info->end - read_info->end;
+  else
+    shift = read_info->begin - store_info->begin;
+
+  access_size = shift + GET_MODE_SIZE (read_mode);
+
+  /* From now on it is bits.  */
+  shift *= BITS_PER_UNIT;
+
+  /* We need to keep this in perspective.  We are replacing a read
+     with a sequence of insns, but the read will almost certainly be
+     in cache, so it is not going to be an expensive one.  Thus, we
+     are not willing to do a multi insn shift or worse a subroutine
+     call to get rid of the read.  */
+  if (shift)
+    {
+      if (access_size > UNITS_PER_WORD || FLOAT_MODE_P (store_mode))
+	return false;
+
+      shift_seq = find_shift_sequence (read_reg, access_size, store_info,
+				       read_info, shift);
+      if (!shift_seq)
+	return false;
+    }
+
   if (dump_file)
-    fprintf (dump_file, "generating move to replace load at %d from store at %d\n", 
+    fprintf (dump_file, "replacing load at %d from store at %d\n",
 	     INSN_UID (read_insn->insn), INSN_UID (store_insn->insn)); 
-  if (GET_MODE (store_info->mem) == GET_MODE (read_info->mem))
+
+  if (validate_change (read_insn->insn, loc, read_reg, 0))
     {
-      rtx new_reg = gen_reg_rtx (GET_MODE (store_info->mem));
-      if (validate_change (read_insn->insn, loc, new_reg, 0))
+      rtx insns;
+      deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
+      
+      if (read_mode == store_mode)
 	{
-	  rtx insns;
-	  deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
-
 	  start_sequence ();
-	  emit_move_insn (new_reg, store_info->rhs);
+	  
+	  /* The modes are the same and everything lines up.  Just
+	     generate a simple move.  */
+	  emit_move_insn (read_reg, store_info->rhs);
+	  if (dump_file)
+	    fprintf (dump_file, " -- adding move insn r%d = r%d\n",
+		     REGNO (read_reg), REGNO (store_info->rhs));
 	  insns = get_insns ();
 	  end_sequence ();
-	  emit_insn_before (insns, store_insn->insn);
-
-	  if (dump_file)
-	    fprintf (dump_file, " -- adding move insn %d: r%d = r%d\n", 
-		     INSN_UID (insns), REGNO (new_reg), REGNO (store_info->rhs)); 
-
-	  /* And now for the cludge part: cselib croaks if you just
-	     return at this point.  There are two reasons for this:
-
-	     1) Cselib has an idea of how many pseudos there are and
-	     that does not include the new one we just added.  
-
-	     2) Cselib does not know about the move insn we added
-	     above the store_info, and there is no way to tell it
-	     about it, because it has "moved on".
-
-	     So we are just going to have to lie.  The move insn is
-	     not really an issue, cselib did not see it.  But the use
-	     of the new pseudo read_insn is a real problem.  The way
-	     that we solve this problem is that we are just going to
-	     put the mem back keep a table of mems to get rid of.  At
-	     the end of the basic block we can put it back.  */
-
-	  *loc = read_info->mem;
-	  deferred_change->next = deferred_change_list;
-	  deferred_change_list = deferred_change;
-	  deferred_change->loc = loc;
-	  deferred_change->reg = new_reg;
-
-	  /* Get rid of the read_info, from the point of view of the
-	     rest of dse, play like this read never happened.  */
-	  read_insn->read_rec = read_info->next;
-	  pool_free (read_info_pool, read_info);
-	  return true;
 	}
-      else 
+      else if (shift)
+	insns = shift_seq;
+      else
 	{
+	  /* The modes are different but the lsb are in the same
+	     place, we need to extract the value in the right from the
+	     rhs of the store.  */
+	  start_sequence ();
+	  emit_move_insn (read_reg, gen_lowpart (read_mode, store_info->rhs));
+	  
 	  if (dump_file)
-	    fprintf (dump_file, " -- validation failure\n"); 
-	  return false;
+	    fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
+		     REGNO (read_reg), GET_MODE_NAME (read_mode),
+		     REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
+	  insns = get_insns ();
+	  end_sequence ();
 	}
+
+      /* Insert this right before the store insn where it will be safe
+	 from later insns that might change it before the read.  */
+      emit_insn_before (insns, store_insn->insn);
+      
+      /* And now for the kludge part: cselib croaks if you just
+	 return at this point.  There are two reasons for this:
+	 
+	 1) Cselib has an idea of how many pseudos there are and
+	 that does not include the new ones we just added.
+	 
+	 2) Cselib does not know about the move insn we added
+	 above the store_info, and there is no way to tell it
+	 about it, because it has "moved on".
+	 
+	 Problem (1) is fixable with a certain amount of engineering.
+	 Problem (2) is requires starting the bb from scratch.  This
+	 could be expensive.
+	 
+	 So we are just going to have to lie.  The move/extraction
+	 insns are not really an issue, cselib did not see them.  But
+	 the use of the new pseudo read_insn is a real problem because
+	 cselib has not scanned this insn.  The way that we solve this
+	 problem is that we are just going to put the mem back for now
+	 and when we are finished with the block, we undo this.  We
+	 keep a table of mems to get rid of.  At the end of the basic
+	 block we can put them back.  */
+      
+      *loc = read_info->mem;
+      deferred_change->next = deferred_change_list;
+      deferred_change_list = deferred_change;
+      deferred_change->loc = loc;
+      deferred_change->reg = read_reg;
+      
+      /* Get rid of the read_info, from the point of view of the
+	 rest of dse, play like this read never happened.  */
+      read_insn->read_rec = read_info->next;
+      pool_free (read_info_pool, read_info);
+      return true;
     }
-  else
+  else 
     {
-      /* Someone with excellent rtl skills needs to fill this in.  You
-	 are guaranteed that the read is of the same size or smaller
-	 than the store, and that the read does not hang off one of
-	 the ends of the store.  But the offsets of each must be
-	 checked because the read does not have to line up on either
-	 end of the store so the begin fields need to be examined in
-	 both the store_info and read_info.  */
       if (dump_file)
-	fprintf (dump_file, " -- complex load, currently unsupported.\n"); 
+	fprintf (dump_file, " -- validation failure\n"); 
       return false;
     }
 }
 
-
 /* A for_each_rtx callback in which DATA is the bb_info.  Check to see
    if LOC is a mem and if it is look at the address and kill any
    appropriate stores that may be active.  */
@@ -3105,4 +3281,3 @@ struct tree_opt_pass pass_rtl_dse2 =
   TODO_ggc_collect,                     /* todo_flags_finish */
   'w'                                   /* letter */
 };
-
-- 
GitLab