diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 15dce87b6c3de46bad4dcf62faed73e2d398b48c..6e0a6e2eca2e429cee23c2b4923f3551d44b348d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2005-07-28  Jan Hubicka  <jh@suse.cz>
+
+	* cgraph.c (cgraph_clone_edge): New UPDATE_ORIGINAL argument.
+	(cgraph_clone_node): Likewise.
+	* cgraph.h (cgraph_clone_edge): Update prototype.
+	(cgraph_clone_node): Likewise.
+	* ipa-inline.c (cgraph_clone_inlined_nodes): Update call of
+	cgraph_clone_node.
+	(lookup_recursive_calls): Consider profile.
+	(cgraph_decide_recursive_inlining): Fix updating; use new
+	probability argument; use profile.
+	* params.def (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY): New.
+	* tree-inline.c (copy_bb): Update clal of clone_edge.
+	* tree-optimize.c (tree_rest_of_compilation): UPdate cal of clone_node.
+
+	* invoke.texi (min-inline-recursive-probability): Document.
+
 2005-07-28  Gerald Pfeifer  <gerald@pfeifer.com>
 
 	* doc/install.texi (Configuration): Update Valgrind homepage.
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index ffc8d25eb5f1600759c6f505ff96c1af37b20045..7a67f6d0da88a7e8e287abaa160f93ae9e8e5c95 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -884,7 +884,8 @@ cgraph_function_possibly_inlined_p (tree decl)
 /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
 struct cgraph_edge *
 cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
-		   tree call_stmt, int count_scale, int loop_nest)
+		   tree call_stmt, int count_scale, int loop_nest,
+		   bool update_original)
 {
   struct cgraph_edge *new;
 
@@ -893,14 +894,20 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
 		            e->loop_nest + loop_nest);
 
   new->inline_failed = e->inline_failed;
-  e->count -= new->count;
+  if (update_original)
+    e->count -= new->count;
   return new;
 }
 
 /* Create node representing clone of N executed COUNT times.  Decrease
-   the execution counts from original node too.  */
+   the execution counts from original node too. 
+
+   When UPDATE_ORIGINAL is true, the counts are subtracted from the original
+   function's profile to reflect the fact that part of execution is handled
+   by node.  */
 struct cgraph_node *
-cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest)
+cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest,
+		   bool update_original)
 {
   struct cgraph_node *new = cgraph_create_node ();
   struct cgraph_edge *e;
@@ -923,10 +930,12 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest)
     count_scale = new->count * REG_BR_PROB_BASE / n->count;
   else
     count_scale = 0;
-  n->count -= count;
+  if (update_original)
+    n->count -= count;
 
   for (e = n->callees;e; e=e->next_callee)
-    cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest);
+    cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest,
+		       update_original);
 
   new->next_clone = n->next_clone;
   new->prev_clone = n;
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 4e2a4c17d9d39bb9823430e078e0c94db621eb03..81eb10b5f3c1852396480b60569ea76e84fd81b2 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -240,8 +240,11 @@ struct cgraph_local_info *cgraph_local_info (tree);
 struct cgraph_global_info *cgraph_global_info (tree);
 struct cgraph_rtl_info *cgraph_rtl_info (tree);
 const char * cgraph_node_name (struct cgraph_node *);
-struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *, struct cgraph_node *, tree, int, int);
-struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int);
+struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
+				        struct cgraph_node *,
+				        tree, int, int, bool);
+struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type,
+					int, bool);
 
 struct cgraph_varpool_node *cgraph_varpool_node (tree);
 struct cgraph_varpool_node *cgraph_varpool_node_for_asm (tree asmname);
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d24e75b76512aa8efb7afd8a9ac6ae7f01555f68..91a32641d371910612e8037d8da7ae6be83e6b22 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5790,6 +5790,18 @@ happens only when @option{-finline-functions} (included in @option{-O3}) is
 enabled and @option{--param max-inline-recursive-depth-auto} is used.  The
 default value is 450.
 
+@item min-inline-recursive-probability
+Recursive inlining is profitable only for function having deep recursion
+in average and can hurt for function having little recursion depth by
+increasing the prologue size or complexity of function body to other
+optimizers.
+
+When profile feedback is available (see @option{-fprofile-generate}) the actual
+recursion depth can be guessed from probability that function will recurse via
+given call expression.  This parameter limits inlining only to call expression
+whose probability exceeds given threshold (in percents).  The default value is
+10.
+
 @item inline-call-cost
 Specify cost of call instruction relative to simple arithmetics operations
 (having cost of 1).  Increasing this cost disqualifies inlining of non-leaf
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index ce5d7fe67dc8024ad5cd58e6fe3ed550a423b6d1..15a2af0dc277bcb06576a428ea385c02e9f5cd34 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -131,7 +131,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate)
     }
    else if (duplicate)
     {
-      n = cgraph_clone_node (e->callee, e->count, e->loop_nest);
+      n = cgraph_clone_node (e->callee, e->count, e->loop_nest, true);
       cgraph_redirect_edge_callee (e, n);
     }
 
@@ -456,10 +456,13 @@ lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
   for (e = where->callees; e; e = e->next_callee)
     if (e->callee == node)
       {
-	/* FIXME: Once counts and frequencies are available we should drive the
-	   order by these.  For now force the order to be simple queue since
-	   we get order dependent on recursion depth for free by this.  */
-        fibheap_insert (heap, priority++, e);
+	/* When profile feedback is available, prioritize by expected number
+	   of calls.  Without profile feedback we maintain simple queue
+	   to order candidates via recursive depths.  */
+        fibheap_insert (heap,
+			!max_count ? priority++
+		        : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
+		        e);
       }
   for (e = where->callees; e; e = e->next_callee)
     if (!e->inline_failed)
@@ -533,6 +536,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node)
 {
   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
+  int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
   fibheap_t heap;
   struct cgraph_edge *e;
   struct cgraph_node *master_clone;
@@ -563,7 +567,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node)
 	     cgraph_node_name (node));
 
   /* We need original clone to copy around.  */
-  master_clone = cgraph_clone_node (node, 0, 1);
+  master_clone = cgraph_clone_node (node, node->count, 1, false);
   master_clone->needed = true;
   for (e = master_clone->callees; e; e = e->next_callee)
     if (!e->inline_failed)
@@ -571,27 +575,60 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node)
 
   /* Do the inlining and update list of recursive call during process.  */
   while (!fibheap_empty (heap)
-	 && cgraph_estimate_size_after_inlining (1, node, master_clone) <= limit)
+	 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
+	     <= limit))
     {
       struct cgraph_edge *curr = fibheap_extract_min (heap);
-      struct cgraph_node *node;
+      struct cgraph_node *cnode;
 
-      depth = 0;
-      for (node = curr->caller;
-	   node; node = node->global.inlined_to)
+      depth = 1;
+      for (cnode = curr->caller;
+	   cnode->global.inlined_to; cnode = cnode->callers->caller)
 	if (node->decl == curr->callee->decl)
 	  depth++;
       if (depth > max_depth)
-	continue;
+	{
+          if (dump_file)
+	    fprintf (dump_file, 
+		     "   maxmal depth reached\n");
+	  continue;
+	}
+
+      if (max_count)
+	{
+          if (!cgraph_maybe_hot_edge_p (curr))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "   Not inlining cold call\n");
+	      continue;
+	    }
+          if (curr->count * 100 / node->count < probability)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, 
+			 "   Probability of edge is too small\n");
+	      continue;
+	    }
+	}
 
       if (dump_file)
-	fprintf (dump_file, 
-		 "   Inlining call of depth %i\n", depth);
+	{
+	  fprintf (dump_file, 
+		   "   Inlining call of depth %i", depth);
+	  if (node->count)
+	    {
+	      fprintf (dump_file, " called approx. %.2f times per call",
+		       (double)curr->count / node->count);
+	    }
+	  fprintf (dump_file, "\n");
+	}
       cgraph_redirect_edge_callee (curr, master_clone);
       cgraph_mark_inline_edge (curr);
       lookup_recursive_calls (node, curr->callee, heap);
       n++;
     }
+  if (!fibheap_empty (heap) && dump_file)
+    fprintf (dump_file, "    Recursive inlining growth limit met.\n");
 
   fibheap_delete (heap);
   if (dump_file)
@@ -607,6 +644,10 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node)
     if (node->global.inlined_to == master_clone)
       cgraph_remove_node (node);
   cgraph_remove_node (master_clone);
+  /* FIXME: Recursive inlining actually reduces number of calls of the
+     function.  At this place we should probably walk the function and
+     inline clones and compensate the counts accordingly.  This probably
+     doesn't matter much in practice.  */
   return true;
 }
 
diff --git a/gcc/params.def b/gcc/params.def
index 36af300184952f727c152dcebad1f02530893922..7b9f97c69df8c78d3858279de302bfdcf8da909e 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -116,6 +116,11 @@ DEFPARAM (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO,
 	  "The maximum depth of recursive inlining for non-inline functions",
 	  8, 0, 0)
 
+DEFPARAM (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY,
+	  "min-inline-recursive-probability",
+	  "Inline recursively only when the probability of call being executed exceeds the parameter",
+	  10, 0, 0)
+
 /* Limit the number of expansions created by the variable expansion
    optimization to avoid register pressure.  */
 DEFPARAM (PARAM_MAX_VARIABLE_EXPANSIONS,
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 21d8ac003ed196caf0ee1ed9a2f30e8fa7536ca2..fa6970362e2ac9ba982a5e3cee0861661d48ec5e 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -752,7 +752,7 @@ copy_bb (inline_data *id, basic_block bb, int frequency_scale, int count_scale)
 		  edge = cgraph_edge (id->current_node, orig_stmt);
 		  if (edge)
 		    cgraph_clone_edge (edge, id->node, stmt,
-				       REG_BR_PROB_BASE, 1);
+				       REG_BR_PROB_BASE, 1, true);
 		}
 	    }
 	  /* If you think we can abort here, you are wrong.
diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c
index 52df0f5af6ddfbda6d238e304883f8dd35a050cf..d160e497cdd5b662542bd16f387c45bcbabd9f18 100644
--- a/gcc/tree-optimize.c
+++ b/gcc/tree-optimize.c
@@ -372,7 +372,7 @@ tree_rest_of_compilation (tree fndecl)
 	{
 	  struct cgraph_edge *e;
 
-	  saved_node = cgraph_clone_node (node, node->count, 1);
+	  saved_node = cgraph_clone_node (node, node->count, 1, false);
 	  for (e = saved_node->callees; e; e = e->next_callee)
 	    if (!e->inline_failed)
 	      cgraph_clone_inlined_nodes (e, true);