From ddb0b8247df6b9192a849508fab2aecc49d8312c Mon Sep 17 00:00:00 2001
From: Jan Hubicka <jh@suse.cz>
Date: Sun, 29 Apr 2018 18:22:35 +0200
Subject: [PATCH] lto-partition.c: Include sreal.h

	* lto-partition.c: Include sreal.h
	(add_symbol_to_partition_1): Use size instead of self_size
	for size estimate.
	(account_reference_p): New.
	(lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup;
	fix accounting errors in boundary size; add debug output; combine cost
	as cost/size instead of cost/internal; reduce the partitioning error to
	+- 1/8 of the parttion size.

From-SVN: r259749
---
 gcc/lto/ChangeLog       |  11 ++++
 gcc/lto/lto-partition.c | 129 +++++++++++++++++++++++++---------------
 2 files changed, 91 insertions(+), 49 deletions(-)

diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog
index 821a462fb871..6d00e4ebdfbc 100644
--- a/gcc/lto/ChangeLog
+++ b/gcc/lto/ChangeLog
@@ -1,3 +1,14 @@
+2018-04-19  Jan Hubicka  <jh@suse.cz>
+
+	* lto-partition.c: Include sreal.h
+	(add_symbol_to_partition_1): Use size instead of self_size
+	for size estimate.
+	(account_reference_p): New.
+	(lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup;
+	fix accounting errors in boundary size; add debug output; combine cost
+	as cost/size instead of cost/internal; reduce the partitioning error to
+	+- 1/8 of the parttion size.
+
 2018-04-19  Martin Liska  <mliska@suse.cz>
 
 	* lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 60daafdf3750..d83eba297ec0 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
 #include "lto-partition.h"
+#include "sreal.h"
 
 vec<ltrans_partition> ltrans_partitions;
 
@@ -152,8 +153,8 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
     {
       struct cgraph_edge *e;
-      if (!node->alias)
-        part->insns += ipa_fn_summaries->get (cnode)->self_size;
+      if (!node->alias && c == SYMBOL_PARTITION)
+        part->insns += ipa_fn_summaries->get (cnode)->size;
 
       /* Add all inline clones and callees that are duplicated.  */
       for (e = cnode->callees; e; e = e->next_callee)
@@ -276,8 +277,9 @@ undo_partition (ltrans_partition partition, unsigned int n_nodes)
 	delete partition->initializers_visited;
       partition->initializers_visited = NULL;
 
-      if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node)))
-        partition->insns -= ipa_fn_summaries->get (cnode)->self_size;
+      if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
+          && node->get_partitioning_class () == SYMBOL_PARTITION)
+        partition->insns -= ipa_fn_summaries->get (cnode)->size;
       lto_symtab_encoder_delete_node (partition->encoder, node);
       node->aux = (void *)((size_t)node->aux - 1);
     }
@@ -408,6 +410,24 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
       add_symbol_to_partition (partition, node);
 }
 
+/* Return true if we should account reference from N1 to N2 in cost
+   of partition boundary.  */
+
+bool
+account_reference_p (symtab_node *n1, symtab_node *n2)
+{
+  if (cgraph_node *cnode = dyn_cast <cgraph_node *> (n1))
+    n1 = cnode;
+  /* Do not account recursion - the code below will handle it incorrectly
+     otherwise.  Also do not account references to external symbols.
+     They will never become local.  */
+  if (n1 == n2 
+      || DECL_EXTERNAL (n2->decl)
+      || !n2->definition)
+    return false;
+  return true;
+}
+
 
 /* Group cgraph nodes into equally-sized partitions.
 
@@ -457,14 +477,14 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
   auto_vec<varpool_node *> varpool_order;
   int i;
   struct cgraph_node *node;
-  int original_total_size, total_size = 0, best_total_size = 0;
-  int partition_size;
+  int64_t original_total_size, total_size = 0;
+  int64_t partition_size;
   ltrans_partition partition;
   int last_visited_node = 0;
   varpool_node *vnode;
-  int cost = 0, internal = 0;
-  int best_n_nodes = 0, best_i = 0, best_cost =
-    INT_MAX, best_internal = 0;
+  int64_t cost = 0, internal = 0;
+  int best_n_nodes = 0, best_i = 0;
+  int64_t best_cost = -1, best_internal = 0, best_size = 0;
   int npartitions;
   int current_order = -1;
   int noreorder_pos = 0;
@@ -513,7 +533,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 
   /* Compute partition size and create the first partition.  */
   if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size)
-    fatal_error (input_location, "min partition size cannot be greater than max partition size");
+    fatal_error (input_location, "min partition size cannot be greater "
+		 "than max partition size");
 
   partition_size = total_size / n_lto_partitions;
   if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
@@ -521,7 +542,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
   npartitions = 1;
   partition = new_partition ("");
   if (symtab->dump_file)
-    fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n",
+    fprintf (symtab->dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
 	     total_size, partition_size);
 
   auto_vec<symtab_node *> next_nodes;
@@ -540,17 +561,11 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 	next_nodes.safe_push (varpool_order[varpool_pos++]);
       while (noreorder_pos < (int)noreorder.length ()
 	     && noreorder[noreorder_pos]->order < current_order)
-	{
-	  if (!noreorder[noreorder_pos]->alias)
-	    total_size -= ipa_fn_summaries->get (noreorder[noreorder_pos])->size;
-	  next_nodes.safe_push (noreorder[noreorder_pos++]);
-	}
+	next_nodes.safe_push (noreorder[noreorder_pos++]);
       add_sorted_nodes (next_nodes, partition);
 
       if (!symbol_partitioned_p (order[i]))
         add_symbol_to_partition (partition, order[i]);
-      if (!order[i]->alias)
-        total_size -= ipa_fn_summaries->get (order[i])->size;
 	  
 
       /* Once we added a new node to the partition, we also want to add
@@ -567,7 +582,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 	 it and thus we need to subtract it from COST.  */
       while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
 	{
-	  symtab_node *refs_node;
 	  int j;
 	  struct ipa_ref *ref = NULL;
 	  symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
@@ -577,7 +591,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 	    {
 	      struct cgraph_edge *edge;
 
-	      refs_node = node;
 
 	      last_visited_node++;
 
@@ -585,7 +598,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 
 	      /* Compute boundary cost of callgraph edges.  */
 	      for (edge = node->callees; edge; edge = edge->next_callee)
-		if (edge->callee->definition)
+		/* Inline edges will always end up local.  */
+		if (edge->inline_failed
+		    && account_reference_p (node, edge->callee))
 		  {
 		    int edge_cost = edge->frequency ();
 		    int index;
@@ -602,6 +617,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 		      cost += edge_cost;
 		  }
 	      for (edge = node->callers; edge; edge = edge->next_caller)
+		if (edge->inline_failed
+		    && account_reference_p (edge->caller, node))
 		{
 		  int edge_cost = edge->frequency ();
 		  int index;
@@ -614,27 +631,24 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 						     edge->caller);
 		  if (index != LCC_NOT_FOUND
 		      && index < last_visited_node - 1)
-		    cost -= edge_cost;
+		    cost -= edge_cost, internal += edge_cost;
 		  else
 		    cost += edge_cost;
 		}
 	    }
 	  else
-	    {
-	      refs_node = snode;
-	      last_visited_node++;
-	    }
+	    last_visited_node++;
 
 	  /* Compute boundary cost of IPA REF edges and at the same time look into
 	     variables referenced from current partition and try to add them.  */
-	  for (j = 0; refs_node->iterate_reference (j, ref); j++)
-	    if (is_a <varpool_node *> (ref->referred))
+	  for (j = 0; snode->iterate_reference (j, ref); j++)
+	    if (!account_reference_p (snode, ref->referred))
+	      ;
+	    else if (is_a <varpool_node *> (ref->referred))
 	      {
 		int index;
 
 		vnode = dyn_cast <varpool_node *> (ref->referred);
-		if (!vnode->definition)
-		  continue;
 		if (!symbol_partitioned_p (vnode)
 		    && !vnode->no_reorder
 		    && vnode->get_partitioning_class () == SYMBOL_PARTITION)
@@ -652,8 +666,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 		int index;
 
 		node = dyn_cast <cgraph_node *> (ref->referred);
-		if (!node->definition)
-		  continue;
 		index = lto_symtab_encoder_lookup (partition->encoder,
 						   node);
 		if (index != LCC_NOT_FOUND
@@ -662,8 +674,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 		else
 		  cost++;
 	      }
-	  for (j = 0; refs_node->iterate_referring (j, ref); j++)
-	    if (is_a <varpool_node *> (ref->referring))
+	  for (j = 0; snode->iterate_referring (j, ref); j++)
+	    if (!account_reference_p (ref->referring, snode))
+	      ;
+	    else if (is_a <varpool_node *> (ref->referring))
 	      {
 		int index;
 
@@ -682,7 +696,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 						   vnode);
 		if (index != LCC_NOT_FOUND
 		    && index < last_visited_node - 1)
-		  cost--;
+		  cost--, internal++;
 		else
 		  cost++;
 	      }
@@ -696,36 +710,41 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 						   node);
 		if (index != LCC_NOT_FOUND
 		    && index < last_visited_node - 1)
-		  cost--;
+		  cost--, internal++;
 		else
 		  cost++;
 	      }
 	}
 
-      /* If the partition is large enough, start looking for smallest boundary cost.  */
-      if (partition->insns < partition_size * 3 / 4
-	  || best_cost == INT_MAX
-	  || ((!cost 
-	       || (best_internal * (HOST_WIDE_INT) cost
-		   > (internal * (HOST_WIDE_INT)best_cost)))
-  	      && partition->insns < partition_size * 5 / 4))
+      gcc_assert (cost >= 0 && internal >= 0);
+
+      /* If the partition is large enough, start looking for smallest boundary cost.
+         If partition still seems too small (less than 7/8 of target weight) accept
+	 any cost.  If partition has right size, optimize for highest internal/cost.
+	 Later we stop building partition if its size is 9/8 of the target wight.  */
+      if (partition->insns < partition_size * 7 / 8
+	  || best_cost == -1
+	  || (!cost 
+	      || ((sreal)best_internal * (sreal) cost
+		  < ((sreal) internal * (sreal)best_cost))))
 	{
 	  best_cost = cost;
 	  best_internal = internal;
+	  best_size = partition->insns;
 	  best_i = i;
 	  best_n_nodes = lto_symtab_encoder_size (partition->encoder);
-	  best_total_size = total_size;
 	  best_varpool_pos = varpool_pos;
 	}
       if (symtab->dump_file)
-	fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, cost %i/%i "
-		 "best %i/%i, step %i\n", i,
+	fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, "
+		 "cost %" PRId64 "/%" PRId64 " "
+		 "best %" PRId64 "/%" PRId64", step %i\n", i,
 		 order[i]->name (), order[i]->order,
 		 partition->insns, cost, internal,
 		 best_cost, best_internal, best_i);
       /* Partition is too large, unwind into step when best cost was reached and
 	 start new partition.  */
-      if (partition->insns > 2 * partition_size
+      if (partition->insns > 9 * partition_size / 8
 	  || partition->insns > max_partition_size)
 	{
 	  if (best_i != i)
@@ -736,21 +755,26 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 	      undo_partition (partition, best_n_nodes);
 	      varpool_pos = best_varpool_pos;
 	    }
+	  gcc_assert (best_size == partition->insns);
 	  i = best_i;
+	  if (symtab->dump_file)
+	    fprintf (symtab->dump_file,
+		     "Partition insns: %i (want %" PRId64 ")\n",
+		     partition->insns, partition_size);
  	  /* When we are finished, avoid creating empty partition.  */
 	  while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1]))
 	    i++;
 	  if (i == n_nodes - 1)
 	    break;
+	  total_size -= partition->insns;
 	  partition = new_partition ("");
 	  last_visited_node = 0;
-	  total_size = best_total_size;
 	  cost = 0;
 
 	  if (symtab->dump_file)
 	    fprintf (symtab->dump_file, "New partition\n");
 	  best_n_nodes = 0;
-	  best_cost = INT_MAX;
+	  best_cost = -1;
 
 	  /* Since the size of partitions is just approximate, update the size after
 	     we finished current one.  */
@@ -760,6 +784,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
 	    /* Watch for overflow.  */
 	    partition_size = INT_MAX / 16;
 
+	  if (symtab->dump_file)
+	    fprintf (symtab->dump_file,
+		     "Total size: %" PRId64 " partition_size: %" PRId64 "\n",
+		     total_size, partition_size);
 	  if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
 	    partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
 	  npartitions ++;
@@ -779,6 +807,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
     next_nodes.safe_push (varpool_order[varpool_pos++]);
   while (noreorder_pos < (int)noreorder.length ())
     next_nodes.safe_push (noreorder[noreorder_pos++]);
+  /* For one partition the cost of boundary should be 0 unless we added final
+     symbols here (these are not accounted) or we have accounting bug.  */
+  gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost);
   add_sorted_nodes (next_nodes, partition);
 
   free (order);
-- 
GitLab