From b03149e1369b687b5ff9fc7d1fa831137f898018 Mon Sep 17 00:00:00 2001
From: Jie Zhang <jie.zhang@analog.com>
Date: Wed, 31 May 2006 16:46:15 +0000
Subject: [PATCH] bfin-protos.h (bfin_hardware_loop): Declare.

	* config/bfin/bfin-protos.h (bfin_hardware_loop): Declare.
	* config/bfin/bfin.c (basic-block.h): Include.
	(struct machine_function): New.
	(bfin_init_machine_status): New.
	(override_options): Initialize init_machine_status.
	(bfin_hardware_loop): New.
	(MAX_LOOP_DEPTH, MAX_LOOP_LENGTH): Define.
	(DEF_VEC_P (loop_info)): New.
	(DEF_VEC_ALLOC_P (loop_info,heap)): New.
	(struct loop_info): New.
	(loop_info): New typedef.
	(struct loop_work): New.
	(loop_work): New typedef.
	(DEF_VEC_O (loop_work)): New.
	(DEF_VEC_ALLOC_O (loop_work,heap)): New.
	(bfin_dump_loops): New.
	(bfin_bb_in_loop): New.
	(bfin_scan_loop): New.
	(bfin_optimize_loop): New.
	(bfin_reorg_loops): New.
	(bfin_reorg): Use bfin_reorg_loops.
	* config/bfin/bfin.h (FIRST_PSEUDO_REGISTER): Adjust for adding
	loop registers.
	(I_REGNO_P): Simplify.
	(DP_REGNO_P, DPREG_P): New macros.
	(REGISTER_NAMES, FIXED_REGISTERS, CALL_USED_REGISTERS,
	REG_ALLOC_ORDER): Add LT0, LT1, LC0, LC1, LB0, LB1.
	(enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS):
	Add LT_REGS, LC_REGS, LB_REGS.
	(REG_CLASS_FROM_LETTER): Add 't' for LT_REGS, 'k' for LC_REGS,
	'l' for LB_REGS.
	(REGNO_REG_CLASS): Deal with loop registers.
	* config/bfin/bfin.md: Add comment for 't', 'k', 'l' constraint
	letters.
	(REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1):
	New constants for loop registers.
	(UNSPEC_LSETUP_END): New.
	(seq_insns): New define_attr. Set it for appropriate insns.
	(movsi_insn): Add alternatives for move from/to
	loop count registers.
	(doloop_end): New define_expand.
	(loop_end): New define_insn.
	(define_split for bad doloop_end): New.
	(lsetup_with_autoinit): New define_insn.
	(lsetup_without_autoinit): New define_insn.
	(rep_movsi, rep_movhi): Clobber LT1, LC1, LB1.
	* config/bfin/predicates.md (lc_register_operand): New.
	(lt_register_operand): New.
	(lb_register_operand): New.
	(nondp_register_operand): New.
	(nondp_reg_or_memory_operand): New.
        * doc/md.texi: Document Blackfin new 't', 'k', 'l' constraint letters.

From-SVN: r114274
---
 gcc/ChangeLog                 |  55 +++
 gcc/config/bfin/bfin-protos.h |   1 +
 gcc/config/bfin/bfin.c        | 791 +++++++++++++++++++++++++++++++++-
 gcc/config/bfin/bfin.h        |  53 ++-
 gcc/config/bfin/bfin.md       | 188 ++++++--
 gcc/config/bfin/predicates.md |  32 ++
 6 files changed, 1074 insertions(+), 46 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fb4974c7a6e0..6268ad04e09f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,58 @@
+2006-05-31  Jie Zhang  <jie.zhang@analog.com>
+
+	* config/bfin/bfin-protos.h (bfin_hardware_loop): Declare.
+	* config/bfin/bfin.c (basic-block.h): Include.
+	(struct machine_function): New.
+	(bfin_init_machine_status): New.
+	(override_options): Initialize init_machine_status.
+	(bfin_hardware_loop): New.
+	(MAX_LOOP_DEPTH, MAX_LOOP_LENGTH): Define.
+	(DEF_VEC_P (loop_info)): New.
+	(DEF_VEC_ALLOC_P (loop_info,heap)): New.
+	(struct loop_info): New.
+	(loop_info): New typedef.
+	(struct loop_work): New.
+	(loop_work): New typedef.
+	(DEF_VEC_O (loop_work)): New.
+	(DEF_VEC_ALLOC_O (loop_work,heap)): New.
+	(bfin_dump_loops): New.
+	(bfin_bb_in_loop): New.
+	(bfin_scan_loop): New.
+	(bfin_optimize_loop): New.
+	(bfin_reorg_loops): New.
+	(bfin_reorg): Use bfin_reorg_loops.
+	* config/bfin/bfin.h (FIRST_PSEUDO_REGISTER): Adjust for adding
+	loop registers.
+	(I_REGNO_P): Simplify.
+	(DP_REGNO_P, DPREG_P): New macros.
+	(REGISTER_NAMES, FIXED_REGISTERS, CALL_USED_REGISTERS,
+	REG_ALLOC_ORDER): Add LT0, LT1, LC0, LC1, LB0, LB1.
+	(enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS):
+	Add LT_REGS, LC_REGS, LB_REGS.
+	(REG_CLASS_FROM_LETTER): Add 't' for LT_REGS, 'k' for LC_REGS,
+	'l' for LB_REGS.
+	(REGNO_REG_CLASS): Deal with loop registers.
+	* config/bfin/bfin.md: Add comment for 't', 'k', 'l' constraint
+	letters.
+	(REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1):
+	New constants for loop registers.
+	(UNSPEC_LSETUP_END): New.
+	(seq_insns): New define_attr. Set it for appropriate insns.
+	(movsi_insn): Add alternatives for move from/to
+	loop count registers.
+	(doloop_end): New define_expand.
+	(loop_end): New define_insn.
+	(define_split for bad doloop_end): New.
+	(lsetup_with_autoinit): New define_insn.
+	(lsetup_without_autoinit): New define_insn.
+	(rep_movsi, rep_movhi): Clobber LT1, LC1, LB1.
+	* config/bfin/predicates.md (lc_register_operand): New.
+	(lt_register_operand): New.
+	(lb_register_operand): New.
+	(nondp_register_operand): New.
+	(nondp_reg_or_memory_operand): New.
+	* doc/md.texi: Document Blackfin new 't', 'k', 'l' constraint letters.
+
 2006-05-31  Jie Zhang  <jie.zhang@analog.com>
 
 	* config/bfin/bfin.c (bfin_delegitimize_address): New.
diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h
index 4d586acb96ec..bbd1eeb7aa9c 100644
--- a/gcc/config/bfin/bfin-protos.h
+++ b/gcc/config/bfin/bfin-protos.h
@@ -83,6 +83,7 @@ extern void output_push_multiple (rtx, rtx *);
 extern void output_pop_multiple (rtx, rtx *);
 extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int);
 extern rtx bfin_return_addr_rtx (int);
+extern void bfin_hardware_loop (void);
 #undef  Mmode 
 
 #endif
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
index 288e32703a3f..94c16784f32d 100644
--- a/gcc/config/bfin/bfin.c
+++ b/gcc/config/bfin/bfin.c
@@ -51,6 +51,14 @@
 #include "bfin-protos.h"
 #include "tm-preds.h"
 #include "gt-bfin.h"
+#include "basic-block.h"
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct machine_function GTY(())
+{
+  int has_hardware_loops;
+};
 
 /* Test and compare insns in bfin.md store the information needed to
    generate branch and scc insns here.  */
@@ -1957,6 +1965,16 @@ bfin_handle_option (size_t code, const char *arg, int value)
     }
 }
 
+static struct machine_function *
+bfin_init_machine_status (void)
+{
+  struct machine_function *f;
+
+  f = ggc_alloc_cleared (sizeof (struct machine_function));
+
+  return f;
+}
+
 /* Implement the macro OVERRIDE_OPTIONS.  */
 
 void
@@ -1987,6 +2005,8 @@ override_options (void)
     flag_pic = 0;
 
   flag_schedule_insns = 0;
+
+  init_machine_status = bfin_init_machine_status;
 }
 
 /* Return the destination address of BRANCH.
@@ -2704,6 +2724,771 @@ bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 
   return cost;
 }
+
+
+/* Increment the counter for the number of loop instructions in the
+   current function.  */
+
+void
+bfin_hardware_loop (void)
+{
+  cfun->machine->has_hardware_loops++;
+}
+
+/* Maxium loop nesting depth.  */
+#define MAX_LOOP_DEPTH 2
+
+/* Maxium size of a loop.  */
+#define MAX_LOOP_LENGTH 4096
+
+/* We need to keep a vector of loops */
+typedef struct loop_info *loop_info;
+DEF_VEC_P (loop_info);
+DEF_VEC_ALLOC_P (loop_info,heap);
+
+/* Information about a loop we have found (or are in the process of
+   finding).  */
+struct loop_info GTY (())
+{
+  /* loop number, for dumps */
+  int loop_no;
+
+  /* Predecessor block of the loop.   This is the one that falls into
+     the loop and contains the initialization instruction.  */
+  basic_block predecessor;
+
+  /* First block in the loop.  This is the one branched to by the loop_end
+     insn.  */
+  basic_block head;
+
+  /* Last block in the loop (the one with the loop_end insn).  */
+  basic_block tail;
+
+  /* The successor block of the loop.  This is the one the loop_end insn
+     falls into.  */
+  basic_block successor;
+
+  /* The last instruction in the tail.  */
+  rtx last_insn;
+
+  /* The loop_end insn.  */
+  rtx loop_end;
+
+  /* The iteration register.  */
+  rtx iter_reg;
+
+  /* The new initialization insn.  */
+  rtx init;
+
+  /* The new initialization instruction.  */
+  rtx loop_init;
+
+  /* The new label placed at the beginning of the loop. */
+  rtx start_label;
+
+  /* The new label placed at the end of the loop. */
+  rtx end_label;
+
+  /* The length of the loop.  */
+  int length;
+
+  /* The nesting depth of the loop.  Set to -1 for a bad loop.  */
+  int depth;
+
+  /* True if we have visited this loop.  */
+  int visited;
+
+  /* True if this loop body clobbers any of LC0, LT0, or LB0.  */
+  int clobber_loop0;
+
+  /* True if this loop body clobbers any of LC1, LT1, or LB1.  */
+  int clobber_loop1;
+
+  /* Next loop in the graph. */
+  struct loop_info *next;
+
+  /* Immediate outer loop of this loop.  */
+  struct loop_info *outer;
+
+  /* Vector of blocks only within the loop, (excluding those within
+     inner loops).  */
+  VEC (basic_block,heap) *blocks;
+
+  /* Vector of inner loops within this loop  */
+  VEC (loop_info,heap) *loops;
+};
+
+/* Information used during loop detection.  */
+typedef struct loop_work GTY(())
+{
+  /* Basic block to be scanned.  */
+  basic_block block;
+
+  /* Loop it will be within.  */
+  loop_info loop;
+} loop_work;
+
+/* Work list.  */
+DEF_VEC_O (loop_work);
+DEF_VEC_ALLOC_O (loop_work,heap);
+
+static void
+bfin_dump_loops (loop_info loops)
+{
+  loop_info loop;
+
+  for (loop = loops; loop; loop = loop->next)
+    {
+      loop_info i;
+      basic_block b;
+      unsigned ix;
+
+      fprintf (dump_file, ";; loop %d: ", loop->loop_no);
+      fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
+
+      fprintf (dump_file, " blocks: [ ");
+      for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+	fprintf (dump_file, "%d ", b->index);
+      fprintf (dump_file, "] ");
+
+      fprintf (dump_file, " inner loops: [ ");
+      for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++)
+	fprintf (dump_file, "%d ", i->loop_no);
+      fprintf (dump_file, "]\n");
+    }
+  fprintf (dump_file, "\n");
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for basic block
+   BB. Return true, if we find it.  */
+
+static bool
+bfin_bb_in_loop (loop_info loop, basic_block bb)
+{
+  unsigned ix;
+  loop_info inner;
+  basic_block b;
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+    if (b == bb)
+      return true;
+
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    if (bfin_bb_in_loop (inner, bb))
+      return true;
+
+  return false;
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for uses of
+   REG.  Return true, if we find any.  Don't count the loop's loop_end
+   insn if it matches LOOP_END.  */
+
+static bool
+bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
+{
+  unsigned ix;
+  loop_info inner;
+  basic_block bb;
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+	  if (insn == loop_end)
+	    continue;
+	  if (reg_mentioned_p (reg, PATTERN (insn)))
+	    return true;
+	}
+    }
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    if (bfin_scan_loop (inner, reg, NULL_RTX))
+      return true;
+
+  return false;
+}
+
+/* Optimize LOOP.  */
+
+static void
+bfin_optimize_loop (loop_info loop)
+{
+  basic_block bb;
+  loop_info inner, outer;
+  rtx insn, init_insn, last_insn, nop_insn;
+  rtx loop_init, start_label, end_label;
+  rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
+  rtx iter_reg;
+  rtx lc_reg, lt_reg, lb_reg;
+  rtx seq;
+  int length;
+  unsigned ix;
+  int inner_depth = 0;
+  int inner_num;
+  int bb_num;
+
+  if (loop->visited)
+    return;
+
+  loop->visited = 1;
+
+  for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+    {
+      if (inner->loop_no == loop->loop_no)
+	loop->depth = -1;
+      else
+	bfin_optimize_loop (inner);
+
+      if (inner->depth < 0 || inner->depth > MAX_LOOP_DEPTH)
+	{
+	  inner->outer = NULL;
+	  VEC_ordered_remove (loop_info, loop->loops, ix);
+	}
+
+      if (inner_depth < inner->depth)
+	inner_depth = inner->depth;
+
+      loop->clobber_loop0 |= inner->clobber_loop0;
+      loop->clobber_loop1 |= inner->clobber_loop1;
+    }
+
+  if (loop->depth < 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  loop->depth = inner_depth + 1;
+  if (loop->depth > MAX_LOOP_DEPTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Make sure we only have one entry point.  */
+  if (EDGE_COUNT (loop->head->preds) == 2)
+    {
+      loop->predecessor = EDGE_PRED (loop->head, 0)->src;
+      if (loop->predecessor == loop->tail)
+	/* We wanted the other predecessor.  */
+	loop->predecessor = EDGE_PRED (loop->head, 1)->src;
+
+      /* We can only place a loop insn on a fall through edge of a
+	 single exit block.  */
+      if (EDGE_COUNT (loop->predecessor->succs) != 1
+	  || !(EDGE_SUCC (loop->predecessor, 0)->flags & EDGE_FALLTHRU)
+	  /* If loop->predecessor is in loop, loop->head is not really
+	     the head of the loop.  */
+	  || bfin_bb_in_loop (loop, loop->predecessor))
+	loop->predecessor = NULL;
+    }
+
+  if (loop->predecessor == NULL)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad predecessor\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  if (!DPREG_P (iter_reg))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Check if start_label appears before loop_end and calculate the
+     offset between them.  We calculate the length of instructions
+     conservatively.  */
+  length = 0;
+  for (insn = loop->start_label;
+       insn && insn != loop->loop_end;
+       insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
+	{
+	  if (TARGET_CSYNC_ANOMALY)
+	    length += 8;
+	  else if (TARGET_SPECLD_ANOMALY)
+	    length += 6;
+	}
+      else if (LABEL_P (insn))
+	{
+	  if (TARGET_CSYNC_ANOMALY)
+	    length += 4;
+	}
+
+      if (INSN_P (insn))
+	length += get_attr_length (insn);
+    }
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  loop->length = length;
+  if (loop->length > MAX_LOOP_LENGTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the insns to see if the loop body clobber
+     any hardware loop registers. */
+
+  reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
+  reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
+  reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
+  reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
+  reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
+  reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
+
+  for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+
+	  if (reg_set_p (reg_lc0, insn)
+	      || reg_set_p (reg_lt0, insn)
+	      || reg_set_p (reg_lb0, insn))
+	    loop->clobber_loop0 = 1;
+	  
+	  if (reg_set_p (reg_lc1, insn)
+	      || reg_set_p (reg_lt1, insn)
+	      || reg_set_p (reg_lb1, insn))
+	    loop->clobber_loop1 |= 1;
+	}
+    }
+
+  if ((loop->clobber_loop0 && loop->clobber_loop1)
+      || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
+    {
+      loop->depth = MAX_LOOP_DEPTH + 1;
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d no loop reg available\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - CSYNC
+     - SSYNC
+     - Returns (RTS, RTN, etc.)  */
+
+  bb = loop->tail;
+  last_insn = PREV_INSN (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != PREV_INSN (BB_HEAD (bb));
+	   last_insn = PREV_INSN (last_insn))
+	if (INSN_P (last_insn))
+	  break;
+
+      if (last_insn != PREV_INSN (BB_HEAD (bb)))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred (bb) != ENTRY_BLOCK_PTR)
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL_RTX;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  if (JUMP_P (last_insn))
+    {
+      loop_info inner = bb->aux;
+      if (inner
+	  && inner->outer == loop
+	  && inner->loop_end == last_insn
+	  && inner->depth == 1)
+	/* This jump_insn is the exact loop_end of an inner loop
+	   and to be optimized away. So use the inner's last_insn.  */
+	last_insn = inner->last_insn;
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		     loop->loop_no);
+	  goto bad_loop;
+	}
+    }
+  else if (CALL_P (last_insn)
+	   || get_attr_type (last_insn) == TYPE_SYNC
+	   || recog_memoized (last_insn) == CODE_FOR_return_internal)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+      || asm_noperands (PATTERN (last_insn)) >= 0
+      || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI)
+    {
+      nop_insn = emit_insn_after (gen_nop (), last_insn);
+      last_insn = nop_insn;
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  end_label = gen_label_rtx ();
+  iter_reg = loop->iter_reg;
+
+  if (loop->depth == 1 && !loop->clobber_loop1)
+    {
+      lc_reg = reg_lc1;
+      lt_reg = reg_lt1;
+      lb_reg = reg_lb1;
+      loop->clobber_loop1 = 1;
+    }
+  else
+    {
+      lc_reg = reg_lc0;
+      lt_reg = reg_lt0;
+      lb_reg = reg_lb0;
+      loop->clobber_loop0 = 1;
+    }
+
+  /* If iter_reg is a DREG, we need generate an instruction to load
+     the loop count into LC register. */
+  if (D_REGNO_P (REGNO (iter_reg)))
+    {
+      init_insn = gen_movsi (lc_reg, iter_reg);
+      loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
+					       lb_reg, end_label,
+					       lc_reg);
+    }
+  else if (P_REGNO_P (REGNO (iter_reg)))
+    {
+      init_insn = NULL_RTX;
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, iter_reg);
+    }
+  else
+    gcc_unreachable ();
+
+  loop->init = init_insn;
+  loop->end_label = end_label;
+  loop->loop_init = loop_init;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_init);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  start_sequence ();
+
+  if (loop->init != NULL_RTX)
+    emit_insn (loop->init);
+  emit_insn(loop->loop_init);
+  emit_label (loop->start_label);
+
+  seq = get_insns ();
+  end_sequence ();
+
+  emit_insn_after (seq, BB_END (loop->predecessor));
+  delete_insn (loop->loop_end);
+
+  /* Insert the loop end label before the last instruction of the loop.  */
+  emit_label_before (loop->end_label, loop->last_insn);
+
+  return;
+
+bad_loop:
+
+  if (dump_file)
+    fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
+
+  /* Mark this loop bad.  */
+  if (loop->depth <= MAX_LOOP_DEPTH)
+    loop->depth = -1;
+
+  outer = loop->outer;
+
+  /* Move all inner loops to loop's outer loop.  */
+  inner_num = VEC_length (loop_info, loop->loops);
+  if (inner_num)
+    {
+      loop_info l;
+
+      if (outer)
+	VEC_reserve (loop_info, heap, outer->loops, inner_num);
+
+      for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, l); ix++)
+	{
+	  l->outer = outer;
+	  if (outer)
+	    VEC_quick_push (loop_info, outer->loops, l);
+	}
+
+      VEC_free (loop_info, heap, loop->loops);
+    }
+
+  /* Move all blocks to loop's outer loop.  */
+  bb_num = VEC_length (basic_block, loop->blocks);
+  if (bb_num)
+    {
+      basic_block b;
+
+      if (outer)
+	VEC_reserve (basic_block, heap, outer->blocks, bb_num);
+
+      for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+	{
+	  b->aux = outer;
+	  if (outer)
+	    VEC_quick_push (basic_block, outer->blocks, b);
+	}
+
+      VEC_free (basic_block, heap, loop->blocks);
+    }
+
+  if (DPREG_P (loop->iter_reg))
+    {
+      /* If loop->iter_reg is a DREG or PREG, we can split it here
+	 without scratch register.  */
+      rtx insn;
+
+      emit_insn_before (gen_addsi3 (loop->iter_reg,
+				    loop->iter_reg,
+				    constm1_rtx),
+			loop->loop_end);
+
+      emit_insn_before (gen_cmpsi (loop->iter_reg, const0_rtx),
+			loop->loop_end);
+
+      insn = emit_jump_insn_before (gen_bne (loop->start_label),
+				    loop->loop_end);
+
+      JUMP_LABEL (insn) = loop->start_label;
+      LABEL_NUSES (loop->start_label)++;
+      delete_insn (loop->loop_end);
+    }
+}
+
+static void
+bfin_reorg_loops (FILE *dump_file)
+{
+  basic_block bb;
+  loop_info loops = NULL;
+  loop_info loop;
+  int nloops = 0;
+  unsigned dwork = 0;
+  VEC (loop_work,heap) *works = VEC_alloc (loop_work,heap,20);
+  loop_work *work;
+  edge e;
+  edge_iterator ei;
+
+  /* Find all the possible loop tails.  This means searching for every
+     loop_end instruction.  For each one found, create a loop_info
+     structure and add the head block to the work list. */
+  FOR_EACH_BB (bb)
+    {
+      rtx tail = BB_END (bb);
+
+      while (GET_CODE (tail) == NOTE)
+	tail = PREV_INSN (tail);
+
+      bb->aux = NULL;
+      if (recog_memoized (tail) == CODE_FOR_loop_end)
+	{
+	  /* A possible loop end */
+
+	  loop = XNEW (struct loop_info);
+	  loop->next = loops;
+	  loops = loop;
+	  loop->tail = bb;
+	  loop->head = BRANCH_EDGE (bb)->dest;
+	  loop->successor = FALLTHRU_EDGE (bb)->dest;
+	  loop->predecessor = NULL;
+	  loop->loop_end = tail;
+	  loop->last_insn = NULL_RTX;
+	  loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail), 0, 1));
+	  loop->depth = loop->length = 0;
+	  loop->visited = 0;
+	  loop->clobber_loop0 = loop->clobber_loop1 = 0;
+	  loop->blocks = VEC_alloc (basic_block, heap, 20);
+	  VEC_quick_push (basic_block, loop->blocks, bb);
+	  loop->outer = NULL;
+	  loop->loops = NULL;
+	  loop->loop_no = nloops++;
+
+	  loop->init = loop->loop_init = NULL_RTX;
+	  loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail), 0, 0)), 1), 0);
+	  loop->end_label = NULL_RTX;
+
+	  work = VEC_safe_push (loop_work, heap, works, NULL);
+	  work->block = loop->head;
+	  work->loop = loop;
+
+	  bb->aux = loop;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; potential loop %d ending at\n",
+		       loop->loop_no);
+	      print_rtl_single (dump_file, tail);
+	    }
+	}
+    }
+
+  /*  Now find all the closed loops.
+      until work list empty,
+       if block's auxptr is set
+         if != loop slot
+           if block's loop's start != block
+	     mark loop as bad
+	   else
+             append block's loop's fallthrough block to worklist
+	     increment this loop's depth
+       else if block is exit block
+         mark loop as bad
+       else
+	  set auxptr
+	  for each target of block
+	    add to worklist */
+  while (VEC_iterate (loop_work, works, dwork++, work))
+    {
+      loop = work->loop;
+      bb = work->block;
+      if (bb == EXIT_BLOCK_PTR)
+	/* We've reached the exit block.  The loop must be bad. */
+	loop->depth = -1;
+      else if (!bb->aux)
+	{
+	  /* We've not seen this block before.  Add it to the loop's
+	     list and then add each successor to the work list.  */
+	  bb->aux = loop;
+	  VEC_safe_push (basic_block, heap, loop->blocks, bb);
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    {
+	      if (!VEC_space (loop_work, works, 1))
+		{
+		  if (dwork)
+		    {
+		      VEC_block_remove (loop_work, works, 0, dwork);
+		      dwork = 0;
+		    }
+		  else
+		    VEC_reserve (loop_work, heap, works, 1);
+		}
+	      work = VEC_quick_push (loop_work, works, NULL);
+	      work->block = EDGE_SUCC (bb, ei.index)->dest;
+	      work->loop = loop;
+	    }
+	}
+      else if (bb->aux != loop)
+	{
+	  /* We've seen this block in a different loop.  If it's not
+	     the other loop's head, then this loop must be bad.
+	     Otherwise, the other loop might be a nested loop, so
+	     continue from that loop's successor.  */
+	  loop_info other = bb->aux;
+
+	  if (other->head != bb)
+	    loop->depth = -1;
+	  else
+	    {
+	      other->outer = loop;
+	      VEC_safe_push (loop_info, heap, loop->loops, other);
+	      work = VEC_safe_push (loop_work, heap, works, NULL);
+	      work->loop = loop;
+	      work->block = other->successor;
+	    }
+	}
+    }
+  VEC_free (loop_work, heap, works);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; All loops found:\n\n");
+      bfin_dump_loops (loops);
+    }
+  
+  /* Now apply the optimizations.  */
+  for (loop = loops; loop; loop = loop->next)
+    bfin_optimize_loop (loop);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; After hardware loops optimization:\n\n");
+      bfin_dump_loops (loops);
+    }
+
+  /* Free up the loop structures */
+  while (loops)
+    {
+      loop = loops;
+      loops = loop->next;
+      VEC_free (loop_info, heap, loop->loops);
+      VEC_free (basic_block, heap, loop->blocks);
+      XDELETE (loop);
+    }
+
+  if (dump_file)
+    print_rtl (dump_file, get_insns ());
+}
+
 
 /* We use the machine specific reorg pass for emitting CSYNC instructions
    after conditional branches as needed.
@@ -2731,7 +3516,11 @@ bfin_reorg (void)
   rtx insn, last_condjump = NULL_RTX;
   int cycles_since_jump = INT_MAX;
 
-  if (! TARGET_SPECLD_ANOMALY || ! TARGET_CSYNC_ANOMALY)
+  /* Doloop optimization */
+  if (cfun->machine->has_hardware_loops)
+    bfin_reorg_loops (dump_file);
+
+  if (! TARGET_SPECLD_ANOMALY && ! TARGET_CSYNC_ANOMALY)
     return;
 
   /* First pass: find predicted-false branches; if something after them
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 0fb9299d4944..732a9b8a74f1 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -268,15 +268,17 @@ extern const char *bfin_library_id_string;
    5  return address registers RETS/I/X/N/E
    1  arithmetic status register (ASTAT).  */
 
-#define FIRST_PSEUDO_REGISTER 44
+#define FIRST_PSEUDO_REGISTER 50
 
-#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X)))
-#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X)))
-#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
 #define D_REGNO_P(X) ((X) <= REG_R7)
 #define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7)
-#define I_REGNO_P(X) \
-  ((X) == REG_I0 || (X) == REG_I1 || (X) == REG_I2 || (X) == REG_I3)
+#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3)
+#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X))
+#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
+#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X)))
+#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X)))
+#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X)))
+#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X)))
 
 #define REGISTER_NAMES { \
   "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \
@@ -286,7 +288,8 @@ extern const char *bfin_library_id_string;
   "A0", "A1", \
   "CC", \
   "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \
-  "ARGP" \
+  "ARGP", \
+  "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \
 }
 
 #define SHORT_REGISTER_NAMES { \
@@ -316,8 +319,10 @@ extern const char *bfin_library_id_string;
 { 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 1, 0,    \
 /*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
   0, 0, 0, 0, 0, 0, 0, 0,   1, 1, 1, 1, 0, 0, 0, 0,    \
-/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp */ \
-  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1	 \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,    \
+/*lb0/1 */ \
+  1, 1  \
 }
 
 /* 1 for registers not available across function calls.
@@ -332,8 +337,10 @@ extern const char *bfin_library_id_string;
 { 1, 1, 1, 1, 0, 0, 0, 0,   1, 1, 1, 0, 0, 0, 1, 0, \
 /*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   \
-/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp */ \
-  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1	 \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1, \
+/*lb0/1 */ \
+  1, 1  \
 }
 
 /* Order in which to allocate registers.  Each register must be
@@ -350,7 +357,8 @@ extern const char *bfin_library_id_string;
   REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \
   REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE,		  \
   REG_ASTAT, REG_SEQSTAT, REG_USP, 				  \
-  REG_CC, REG_ARGP						  \
+  REG_CC, REG_ARGP,						  \
+  REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1		  \
 }
 
 /* Macro to conditionally modify fixed_regs/call_used_regs.  */
@@ -410,6 +418,9 @@ enum reg_class
   IPREGS,
   DPREGS,
   MOST_REGS,
+  LT_REGS,
+  LC_REGS,
+  LB_REGS,
   PROLOGUE_REGS,
   NON_A_CC_REGS,
   ALL_REGS, LIM_REG_CLASSES
@@ -443,6 +454,9 @@ enum reg_class
    "IPREGS",		\
    "DPREGS",		\
    "MOST_REGS",		\
+   "LT_REGS",		\
+   "LC_REGS",		\
+   "LB_REGS",		\
    "PROLOGUE_REGS",	\
    "NON_A_CC_REGS",	\
    "ALL_REGS" }
@@ -484,9 +498,12 @@ enum reg_class
     { 0x000fff00,    0x800 },		/* IPREGS */	\
     { 0x0000ffff,    0x800 },		/* DPREGS */   \
     { 0xffffffff,    0x800 },		/* MOST_REGS */\
-    { 0x00000000,    0x7f8 },		/* PROLOGUE_REGS */\
-    { 0xffffffff,    0xff8 },		/* NON_A_CC_REGS */\
-    { 0xffffffff,    0xfff }}		/* ALL_REGS */
+    { 0x00000000,    0x3000 },		/* LT_REGS */\
+    { 0x00000000,    0xc000 },		/* LC_REGS */\
+    { 0x00000000,    0x30000 },		/* LB_REGS */\
+    { 0x00000000,    0x3f7f8 },		/* PROLOGUE_REGS */\
+    { 0xffffffff,    0x3fff8 },		/* NON_A_CC_REGS */\
+    { 0xffffffff,    0x3ffff }}		/* ALL_REGS */
 
 #define IREG_POSSIBLE_P(OUTER)				     \
   ((OUTER) == POST_INC || (OUTER) == PRE_INC		     \
@@ -535,6 +552,9 @@ enum reg_class
    (LETTER) == 'f' ? MREGS : 		\
    (LETTER) == 'c' ? CIRCREGS :         \
    (LETTER) == 'C' ? CCREGS : 		\
+   (LETTER) == 't' ? LT_REGS : 		\
+   (LETTER) == 'k' ? LC_REGS : 		\
+   (LETTER) == 'l' ? LB_REGS : 		\
    (LETTER) == 'x' ? MOST_REGS :	\
    (LETTER) == 'y' ? PROLOGUE_REGS :	\
    (LETTER) == 'w' ? NON_A_CC_REGS :	\
@@ -554,6 +574,9 @@ enum reg_class
  : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS	\
  : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS	\
  : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS	\
+ : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS	\
+ : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS	\
+ : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS	\
  : (REGNO) == REG_CC ? CCREGS				\
  : (REGNO) >= REG_RETS ? PROLOGUE_REGS			\
  : NO_REGS)
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
index b2b7cffa5871..2c6e0c7a11be 100644
--- a/gcc/config/bfin/bfin.md
+++ b/gcc/config/bfin/bfin.md
@@ -49,6 +49,9 @@
 ;     B
 ;     c (i0..i3,m0..m3) CIRCREGS
 ;     C (CC)            CCREGS
+;     t  (lt0,lt1)
+;     k  (lc0,lc1)
+;     l  (lb0,lb1)
 ;
 
 ;; Define constants for hard registers.
@@ -109,7 +112,14 @@
    (REG_SEQSTAT 41)
    (REG_USP 42)
 
-   (REG_ARGP 43)])
+   (REG_ARGP 43)
+
+   (REG_LT0 44)
+   (REG_LT1 45)
+   (REG_LC0 46)
+   (REG_LC1 47)
+   (REG_LB0 48)
+   (REG_LB1 49)])
 
 ;; Constants used in UNSPECs and UNSPEC_VOLATILEs.
 
@@ -124,7 +134,8 @@
    (UNSPEC_MUL_WITH_FLAG 6)
    (UNSPEC_MAC_WITH_FLAG 7)
    (UNSPEC_MOVE_FDPIC 8)
-   (UNSPEC_FUNCDESC_GOT17M4 9)])
+   (UNSPEC_FUNCDESC_GOT17M4 9)
+   (UNSPEC_LSETUP_END 10)])
 
 (define_constants
   [(UNSPEC_VOLATILE_EH_RETURN 0)
@@ -243,6 +254,12 @@
 
 	(const_int 2)))
 
+
+;; Classify the insns into those that are one instruction and those that
+;; are more than one in sequence.
+(define_attr "seq_insns" "single,multi"
+  (const_string "single"))
+
 ;; Conditional moves
 
 (define_expand "movsicc"
@@ -268,7 +285,8 @@
     if cc %0 =%2; /* movsicc-1b */
     if !cc %0 =%1; if cc %0=%2; /* movsicc-1 */"
   [(set_attr "length" "2,2,4")
-   (set_attr "type" "move")])
+   (set_attr "type" "move")
+   (set_attr "seq_insns" "*,*,multi")])
 
 (define_insn "*movsicc_insn2"
   [(set (match_operand:SI 0 "register_operand" "=da,da,da")
@@ -283,7 +301,8 @@
    if cc %0 =%1; /* movsicc-2a */
    if cc %0 =%1; if !cc %0=%2; /* movsicc-1 */"
   [(set_attr "length" "2,2,4")
-   (set_attr "type" "move")])
+   (set_attr "type" "move")
+   (set_attr "seq_insns" "*,*,multi")])
 
 ;; Insns to load HIGH and LO_SUM
 
@@ -376,7 +395,8 @@
    %0 = CC;
    R0 = R0 | R0; CC = AC0;"
   [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,alu0")
-   (set_attr "length" "2,2,*,*,2,2,4")])
+   (set_attr "length" "2,2,*,*,2,2,4")
+   (set_attr "seq_insns" "*,*,*,*,*,*,multi")])
 
 (define_insn "movpdi"
   [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e")
@@ -386,7 +406,8 @@
    %0 = %1;
    %0 = %x1; %0 = %w1;
    %w0 = %1; %x0 = %1;"
-  [(set_attr "type" "move,mcst,mcld")])
+  [(set_attr "type" "move,mcst,mcld")
+   (set_attr "seq_insns" "*,multi,multi")])
 
 (define_insn "load_accumulator"
   [(set (match_operand:PDI 0 "register_operand" "=e")
@@ -429,12 +450,14 @@
 ;; The first alternative is used to make reload choose a limited register
 ;; class when faced with a movsi_insn that had its input operand replaced
 ;; with a PLUS.  We generally require fewer secondary reloads this way.
-(define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x*y,da,x,x,x,da,mr")
-        (match_operand:SI 1 "general_operand" "da,x*y,xKs7,xKsh,xKuh,ix,mr,da"))]
 
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x*y,*k,da,da,x,x,x,da,mr")
+	(match_operand:SI 1 "general_operand" "da,x*y,da,*k,xKs7,xKsh,xKuh,ix,mr,da"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
-  "@
+ "@
+   %0 = %1;
+   %0 = %1;
    %0 = %1;
    %0 = %1;
    %0 = %1 (X);
@@ -443,8 +466,8 @@
    #
    %0 = %1;
    %0 = %1;"
-  [(set_attr "type" "move,move,mvi,mvi,mvi,*,mcld,mcst")
-   (set_attr "length" "2,2,2,4,4,*,*,*")])
+  [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst")
+   (set_attr "length" "2,2,2,2,2,4,4,*,*,*")])
 
 (define_insn_and_split "*movv2hi_insn"
   [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm")
@@ -776,7 +799,8 @@
 			(match_operand:DI 2 "register_operand" "d")))]
   ""
   "%0 = %1 <op> %2;\\n\\t%H0 = %H1 <op> %H2;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*<optab>di_zesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -785,7 +809,8 @@
 			(match_operand:DI 1 "register_operand" "d")))]
   ""
   "%0 = %1 <op>  %2;\\n\\t%H0 = <high_result>;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*<optab>di_sesdi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -795,7 +820,8 @@
    (clobber (match_scratch:SI 3 "=&d"))]
   ""
   "%0 = %1 <op> %2;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %H1 <op> %3;"
-  [(set_attr "length" "8")])
+  [(set_attr "length" "8")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "negdi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -804,14 +830,16 @@
    (clobber (reg:CC REG_CC))]
   ""
   "%2 = 0; %2 = %2 - %1; cc = ac0; cc = !cc; %2 = cc;\\n\\t%0 = -%1; %H0 = -%H1; %H0 = %H0 - %2;"
-  [(set_attr "length" "16")])
+  [(set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "one_cmpldi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
         (not:DI (match_operand:DI 1 "register_operand" "d")))]
   ""
   "%0 = ~%1;\\n\\t%H0 = ~%H1;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 ;; DImode zero and sign extend patterns
 
@@ -833,14 +861,16 @@
         (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))]
   ""
   "%0 = %T1 (Z);\\n\\t%H0 = 0;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "zero_extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
         (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))]
   ""
   "%0 = %h1 (Z);\\n\\t%H0 = 0;"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn_and_split "extendsidi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -896,7 +926,8 @@
    %0 += %2; cc = ac0; %3 = cc; %H0 = %H0 + %3;
    %0 = %0 + %2; cc = ac0; %3 = cc; %H0 = %H0 + %H2; %H0 = %H0 + %3;"
   [(set_attr "type" "alu0")
-   (set_attr "length" "10,8,10")])
+   (set_attr "length" "10,8,10")
+   (set_attr "seq_insns" "multi,multi,multi")])
 
 (define_insn "subdi3"
   [(set (match_operand:DI 0 "register_operand" "=&d")
@@ -905,7 +936,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1-%2;\\n\\tcc = ac0;\\n\\t%H0 = %H1-%H2;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "10")])
+  [(set_attr "length" "10")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_di_zesidi"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -916,7 +948,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1 - %2;\\n\\tcc = ac0;\\n\\tcc = ! cc;\\n\\t%3 = cc;\\n\\t%H0 = %H1 - %3;"
-  [(set_attr "length" "10")])
+  [(set_attr "length" "10")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_zesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -927,7 +960,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %2 - %1;\\n\\tcc = ac0;\\n\\tcc = ! cc;\\n\\t%3 = cc;\\n\\t%3 = -%3;\\n\\t%H0 = %3 - %H1"
-  [(set_attr "length" "12")])
+  [(set_attr "length" "12")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_di_sesidi"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -938,7 +972,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %1 - %2;\\n\\tcc = ac0;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %H1 - %3;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "14")])
+  [(set_attr "length" "14")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "*subdi_sesidi_di"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -949,7 +984,8 @@
    (clobber (reg:CC 34))]
   ""
   "%0 = %2 - %1;\\n\\tcc = ac0;\\n\\t%3 = %2;\\n\\t%3 >>>= 31;\\n\\t%H0 = %3 - %H1;\\n\\tif cc jump 1f;\\n\\t%H0 += -1;\\n\\t1:"
-  [(set_attr "length" "14")])
+  [(set_attr "length" "14")
+   (set_attr "seq_insns" "multi")])
 
 ;; Combined shift/add instructions
 
@@ -1496,6 +1532,89 @@
   "jump (%0);"
   [(set_attr "type" "misc")])
 
+;;  Hardware loop
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 4 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+	      (clobber (match_scratch:SI 5 ""))])]
+  ""
+  {bfin_hardware_loop ();})
+
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+a*d,*b*h*f,m")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=X,&r,&r"))]
+  ""
+  "@
+   /* loop end %0 %l1 */
+   #
+   #"
+  [(set_attr "length" "6,10,14")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 0))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:BI REG_CC)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn "lsetup_with_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=l")
+	(label_ref (match_operand 3 "" "")))
+   (set (match_operand:SI 4 "lc_register_operand" "=k")
+	(match_operand:SI 5 "register_operand" "a"))]
+  ""
+  "LSETUP (%1, %3) %4 = %5;"
+  [(set_attr "length" "4")])
+
+(define_insn "lsetup_without_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=l")
+	(label_ref (match_operand 3 "" "")))
+   (use (match_operand:SI 4 "lc_register_operand" "k"))]
+  ""
+  "LSETUP (%1, %3) %4;"
+  [(set_attr "length" "4")])
+
 ;;  Call instructions..
 
 ;; The explicit MEM inside the UNSPEC prevents the compiler from moving
@@ -1760,11 +1879,15 @@
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 2))
-   (clobber (match_scratch:HI 5 "=&d"))]
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
   ""
   "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;"
   [(set_attr "type" "misc")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_insn "rep_movhi"
   [(set (match_operand:SI 0 "register_operand" "=&a")
@@ -1779,11 +1902,15 @@
    (set (mem:BLK (match_dup 3))
 	(mem:BLK (match_dup 4)))
    (use (match_dup 2))
-   (clobber (match_scratch:HI 5 "=&d"))]
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
   ""
   "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;"
   [(set_attr "type" "misc")
-   (set_attr "length" "16")])
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
 
 (define_expand "movmemsi"
   [(match_operand:BLK 0 "general_operand" "")
@@ -2315,7 +2442,8 @@
   ""
   "if !cc jump 4 (bp); excpt 3;"
   [(set_attr "type" "misc")
-   (set_attr "length" "4")])
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
 
 ;;; Vector instructions
 
diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md
index eccee2be4d23..1415ea0ab06b 100644
--- a/gcc/config/bfin/predicates.md
+++ b/gcc/config/bfin/predicates.md
@@ -76,12 +76,44 @@
   return 1;
 })
 
+;; Return nonzero if OP is a LC register.
+(define_predicate "lc_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1")))
+
+;; Return nonzero if OP is a LT register.
+(define_predicate "lt_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1")))
+
+;; Return nonzero if OP is a LB register.
+(define_predicate "lb_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1")))
+
 ;; Return nonzero if OP is a register or a 7 bit signed constant.
 (define_predicate "reg_or_7bit_operand"
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
 	    (match_test "CONST_7BIT_IMM_P (INTVAL (op))"))))
 
+;; Return nonzero if OP is a register other than DREG and PREG.
+(define_predicate "nondp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno));
+})
+
+;; Return nonzero if OP is a register other than DREG and PREG, or MEM.
+(define_predicate "nondp_reg_or_memory_operand"
+  (ior (match_operand 0 "nondp_register_operand")
+       (match_operand 0 "memory_operand")))
+
 ;; Used for secondary reloads, this function returns 1 if OP is of the
 ;; form (plus (fp) (const_int)).
 (define_predicate "fp_plus_const_operand"
-- 
GitLab