From 0896cc4276b6c2460203e6e1c5749d533ef77ce9 Mon Sep 17 00:00:00 2001 From: Jan Hubicka <jh@suse.cz> Date: Fri, 22 May 2020 15:44:10 +0200 Subject: [PATCH] Improve LTO streaming dumps this patch cleans up dumping of streaming so it is clear how dump is organized and how much space individual components needs. Compiling: int a=1; main() { return a; } The output is now: Creating output block for function_body Streaming tree <result_decl 0x7ffff7457a50 D.1931> Start of LTO_trees of size 1 Encoding indexable <integer_type 0x7ffff7463000 sizetype> as 0 10 bytes ^^^ I do not think we should need 10 bytes to stream single indexable reference to 0 :) Start of LTO_trees of size 1 Encoding indexable <integer_type 0x7ffff74630a8 bitsizetype> as 1 10 bytes Streaming header of <result_decl 0x7ffff7457a50 D.1931> to function_body Streaming body of <result_decl 0x7ffff7457a50 D.1931> to function_body Encoding indexable <integer_type 0x7ffff74635e8 int> as 2 Encoding indexable <function_decl 0x7ffff757b500 main> as 0 Streaming ref to <integer_cst 0x7ffff744af18 32> Streaming ref to <integer_cst 0x7ffff744af30 4> 52 bytes ^^^ Instead of having multiple LTO_trees sections followed by the final tree it would make a lot of sense to have only one LTO_trees where the first tree is one lto_input_tree should return. This is easy to arrange in DFS walk - one does not need to pop after every SCC component but pop once at the end of walk. However this breaks handling of integer_csts because they may now become of LTO_trees block and streamed as header + body. This bypasses the separate code for shared integer_cst streaming. I think I want to stream everything into header and materialize the tree since it is not part of SCC anyway. Streaming tree <block 0x7ffff757e420> Streaming header of <block 0x7ffff757e420> to function_body Streaming body of <block 0x7ffff757e420> to function_body 8 bytes Streaming gimple stmt _2 = a; Streaming ref to <block 0x7ffff757e420> 4 bytes Streaming tree <mem_ref 0x7ffff7576f78> Start of LTO_trees of size 1 Encoding indexable <pointer_type 0x7ffff746b9d8> as 3 10 bytes Start of LTO_trees of size 1 Streaming header of <addr_expr 0x7ffff75893c0> to function_body Streaming body of <addr_expr 0x7ffff75893c0> to function_body Encoding indexable <var_decl 0x7ffff7fcfb40 a> as 0 15 bytes Streaming header of <mem_ref 0x7ffff7576f78> to function_body Streaming body of <mem_ref 0x7ffff7576f78> to function_body Streaming ref to <addr_expr 0x7ffff75893c0> Streaming ref to <integer_cst 0x7ffff75a3240 0> 42 bytes Streaming gimple stmt return _2; Outputting global stream 0: <function_decl 0x7ffff757b500 main> Streaming tree <function_decl 0x7ffff757b500 main> Start of LTO_tree_scc of size 1 Streaming header of <optimization_node 0x7ffff744b000> to decls Streaming body of <optimization_node 0x7ffff744b000> to decls 576 bytes Start of LTO_tree_scc of size 1 Streaming header of <target_option_node 0x7ffff744a018> to decls Streaming body of <target_option_node 0x7ffff744a018> to decls 68 bytes Streaming single tree Streaming header of <identifier_node 0x7ffff7577aa0 main> to decls Streaming body of <identifier_node 0x7ffff7577aa0 main> to decls 3 bytes Streaming single tree Streaming header of <identifier_node 0x7ffff758a8c0 t.c> to decls Streaming body of <identifier_node 0x7ffff758a8c0 t.c> to decls 3 bytes Streaming single tree Streaming header of <translation_unit_decl 0x7ffff7457ac8 t.c> to decls Streaming body of <translation_unit_decl 0x7ffff7457ac8 t.c> to decls Streaming ref to <identifier_node 0x7ffff758a8c0 t.c> 22 bytes Start of LTO_tree_scc of size 1 Streaming header of <function_type 0x7ffff74717e0> to decls Streaming body of <function_type 0x7ffff74717e0> to decls Streaming ref to <integer_type 0x7ffff74635e8 int> Streaming ref to <integer_cst 0x7ffff744adc8 8> Streaming ref to <integer_cst 0x7ffff744ade0 1> Streaming ref to <function_type 0x7ffff74717e0> 38 bytes Start of LTO_tree_scc of size 1 Streaming header of <function_type 0x7ffff75832a0> to decls Streaming body of <function_type 0x7ffff75832a0> to decls Streaming ref to <integer_type 0x7ffff74635e8 int> Streaming ref to <integer_cst 0x7ffff744adc8 8> Streaming ref to <integer_cst 0x7ffff744ade0 1> Streaming ref to <function_type 0x7ffff74717e0> 38 bytes Start of LTO_tree_scc of size 1 Streaming header of <function_decl 0x7ffff757b500 main> to decls Streaming body of <function_decl 0x7ffff757b500 main> to decls Streaming ref to <function_type 0x7ffff75832a0> Streaming ref to <identifier_node 0x7ffff7577aa0 main> Streaming ref to <translation_unit_decl 0x7ffff7457ac8 t.c> Streaming ref to <identifier_node 0x7ffff7577aa0 main> Streaming ref to <target_option_node 0x7ffff744a018> Streaming ref to <optimization_node 0x7ffff744b000> 58 bytes 806 bytes 0: <var_decl 0x7ffff7fcfb40 a> Streaming tree <var_decl 0x7ffff7fcfb40 a> Streaming single tree Streaming header of <identifier_node 0x7ffff758a870 a> to decls Streaming body of <identifier_node 0x7ffff758a870 a> to decls 3 bytes Streaming single tree Streaming ref to <integer_type 0x7ffff7463000 sizetype> 7 bytes Streaming single tree Streaming ref to <integer_type 0x7ffff74630a8 bitsizetype> 7 bytes Start of LTO_tree_scc of size 1 Streaming header of <var_decl 0x7ffff7fcfb40 a> to decls Streaming body of <var_decl 0x7ffff7fcfb40 a> to decls Streaming ref to <integer_type 0x7ffff74635e8 int> Streaming ref to <identifier_node 0x7ffff758a870 a> Streaming ref to <translation_unit_decl 0x7ffff7457ac8 t.c> Streaming ref to <integer_cst 0x7ffff744af18 32> Streaming ref to <integer_cst 0x7ffff744af30 4> Streaming ref to <identifier_node 0x7ffff758a870 a> Streaming ref to <integer_cst 0x7ffff7468090 1> 49 bytes 66 bytes gcc/ChangeLog: 2020-05-22 Jan Hubicka <hubicka@ucw.cz> * lto-section-out.c (lto_output_decl_index): Adjust dump indentation. * lto-streamer-out.c (create_output_block): Fix whitespace (lto_write_tree_1): Add (debug) dump. (DFS::DFS): Add dump. (DFS::DFS_write_tree_body): Do not dump here. (lto_output_tree): Improve dumping; do not stream ref when not needed. (produce_asm_for_decls): Fix whitespace. * tree-streamer-out.c (streamer_write_tree_header): Add dump. --- gcc/ChangeLog | 11 +++++ gcc/lto-section-out.c | 2 +- gcc/lto-streamer-out.c | 95 +++++++++++++++++++++++++++++------------ gcc/tree-streamer-out.c | 9 ++++ 4 files changed, 88 insertions(+), 29 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4049ac308e2e..48a9a5f97017 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2020-05-22 Jan Hubicka <hubicka@ucw.cz> + + * lto-section-out.c (lto_output_decl_index): Adjust dump indentation. + * lto-streamer-out.c (create_output_block): Fix whitespace + (lto_write_tree_1): Add (debug) dump. + (DFS::DFS): Add dump. + (DFS::DFS_write_tree_body): Do not dump here. + (lto_output_tree): Improve dumping; do not stream ref when not needed. + (produce_asm_for_decls): Fix whitespace. + * tree-streamer-out.c (streamer_write_tree_header): Add dump. + 2020-05-22 Hongtao.liu <hongtao.liu@intel.com> PR target/92658 diff --git a/gcc/lto-section-out.c b/gcc/lto-section-out.c index 8eda3b5fde10..0182cd6059ef 100644 --- a/gcc/lto-section-out.c +++ b/gcc/lto-section-out.c @@ -170,7 +170,7 @@ lto_output_decl_index (struct lto_output_stream *obs, index = encoder->trees.length (); if (streamer_dump_file) { - print_node_brief (streamer_dump_file, " Encoding indexable ", + print_node_brief (streamer_dump_file, " Encoding indexable ", name, 4); fprintf (streamer_dump_file, " as %i \n", index); } diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index 09a2e827f8ef..f5daadc657b2 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -72,7 +72,7 @@ create_output_block (enum lto_section_type section_type) struct output_block *ob = XCNEW (struct output_block); if (streamer_dump_file) fprintf (streamer_dump_file, "Creating output block for %s\n", - lto_section_name [section_type]); + lto_section_name[section_type]); ob->section_type = section_type; ob->decl_state = lto_get_out_decl_state (); @@ -417,6 +417,14 @@ get_symbol_initial_value (lto_symtab_encoder_t encoder, tree expr) static void lto_write_tree_1 (struct output_block *ob, tree expr, bool ref_p) { + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, " Streaming body of ", + expr, 4); + fprintf (streamer_dump_file, " to %s\n", + lto_section_name[ob->section_type]); + } + /* Pack all the non-pointer fields in EXPR into a bitpack and write the resulting bitpack. */ streamer_write_tree_bitfields (ob, expr); @@ -737,6 +745,8 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, worklist_vec.pop (); + unsigned int prev_size = ob->main_stream->total_size; + /* Only global decl sections are considered by tree merging. */ if (ob->section_type != LTO_section_decls) { @@ -744,6 +754,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, by itself then we do not need to stream SCC at all. */ if (worklist_vec.is_empty () && first == 0 && size == 1) return; + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_trees of size %i\n", size); + } streamer_write_record_start (ob, LTO_trees); streamer_write_uhwi (ob, size); } @@ -760,6 +775,11 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, { gcc_checking_assert (ob->section_type == LTO_section_decls); + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_tree_scc of size %i\n", size); + } streamer_write_record_start (ob, LTO_tree_scc); /* In wast majority of cases scc_entry_len is 1 and size is small integer. Use extra bit of size to stream info about @@ -773,8 +793,18 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, references work correctly. */ else if (size != 1) { - streamer_write_record_start (ob, LTO_trees); - streamer_write_uhwi (ob, size); + if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Start of LTO_trees of size %i\n", size); + } + streamer_write_record_start (ob, LTO_trees); + streamer_write_uhwi (ob, size); + } + else if (streamer_dump_file) + { + fprintf (streamer_dump_file, + " Streaming single tree\n", size); } /* Write size-1 SCCs without wrapping them inside SCC bundles. @@ -809,6 +839,9 @@ DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, for (unsigned i = 0; i < size; ++i) lto_write_tree_1 (ob, sccstack[first+i].t, ref_p); } + if (streamer_dump_file) + fprintf (streamer_dump_file, " %u bytes\n", + ob->main_stream->total_size - prev_size); /* Finally truncate the vector. */ sccstack.truncate (first); @@ -845,14 +878,6 @@ DFS::DFS_write_tree_body (struct output_block *ob, enum tree_code code; - if (streamer_dump_file) - { - print_node_brief (streamer_dump_file, " Streaming ", - expr, 4); - fprintf (streamer_dump_file, " to %s\n", - lto_section_name [ob->section_type]); - } - code = TREE_CODE (expr); if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) @@ -1246,7 +1271,7 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map<tree, hashval_t> *map, { hstate.add_hwi (TYPE_MODE (t)); /* TYPE_NO_FORCE_BLK is private to stor-layout and need - no streaming. */ + no streaming. */ hstate.add_flag (TYPE_PACKED (t)); hstate.add_flag (TYPE_RESTRICT (t)); hstate.add_flag (TYPE_USER_ALIGN (t)); @@ -1689,6 +1714,10 @@ lto_output_tree (struct output_block *ob, tree expr, { unsigned ix; bool existed_p; + unsigned int size = ob->main_stream->total_size; + /* This is the first time we see EXPR, write all reachable + trees to OB. */ + static bool in_dfs_walk; if (expr == NULL_TREE) { @@ -1705,6 +1734,16 @@ lto_output_tree (struct output_block *ob, tree expr, existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix); if (existed_p) { + if (streamer_dump_file) + { + if (in_dfs_walk) + print_node_brief (streamer_dump_file, " Streaming ref to ", + expr, 4); + else + print_node_brief (streamer_dump_file, " Streaming ref to ", + expr, 4); + fprintf (streamer_dump_file, "\n"); + } /* If a node has already been streamed out, make sure that we don't write it more than once. Otherwise, the reader will instantiate two different nodes for the same object. */ @@ -1716,20 +1755,16 @@ lto_output_tree (struct output_block *ob, tree expr, } else { - /* This is the first time we see EXPR, write all reachable - trees to OB. */ - static bool in_dfs_walk; - /* Protect against recursion which means disconnect between - what tree edges we walk in the DFS walk and what edges + what tree edges we walk in the DFS walk and what edges we stream out. */ gcc_assert (!in_dfs_walk); if (streamer_dump_file) { - print_node_brief (streamer_dump_file, " Streaming SCC of ", + print_node_brief (streamer_dump_file, " Streaming tree ", expr, 4); - fprintf (streamer_dump_file, "\n"); + fprintf (streamer_dump_file, "\n"); } /* Start the DFS walk. */ @@ -1737,7 +1772,6 @@ lto_output_tree (struct output_block *ob, tree expr, /* let's see ... */ in_dfs_walk = true; DFS (ob, expr, ref_p, this_ref_p, false); - in_dfs_walk = false; /* Finally append a reference to the tree we were writing. */ existed_p = streamer_tree_cache_lookup (ob->writer_cache, expr, &ix); @@ -1748,19 +1782,24 @@ lto_output_tree (struct output_block *ob, tree expr, lto_output_tree_1 (ob, expr, 0, ref_p, this_ref_p); else { + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, + " Streaming final ref to ", + expr, 4); + fprintf (streamer_dump_file, "\n"); + } streamer_write_record_start (ob, LTO_tree_pickle_reference); streamer_write_uhwi (ob, ix); streamer_write_enum (ob->main_stream, LTO_tags, LTO_NUM_TAGS, lto_tree_code_to_tag (TREE_CODE (expr))); } - if (streamer_dump_file) - { - print_node_brief (streamer_dump_file, " Finished SCC of ", - expr, 4); - fprintf (streamer_dump_file, "\n\n"); - } + in_dfs_walk = false; lto_stats.num_pickle_refs_output++; } + if (streamer_dump_file && !in_dfs_walk) + fprintf (streamer_dump_file, " %u bytes\n", + ob->main_stream->total_size - size); } @@ -2700,7 +2739,7 @@ write_global_stream (struct output_block *ob, static void write_global_references (struct output_block *ob, - struct lto_tree_ref_encoder *encoder) + struct lto_tree_ref_encoder *encoder) { tree t; uint32_t index; @@ -3136,7 +3175,7 @@ produce_asm_for_decls (void) fn_out_state = lto_function_decl_states[idx]; if (streamer_dump_file) - fprintf (streamer_dump_file, "Outputting stream for %s\n", + fprintf (streamer_dump_file, "Outputting stream for %s\n", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fn_out_state->fn_decl))); lto_output_decl_state_streams (ob, fn_out_state); diff --git a/gcc/tree-streamer-out.c b/gcc/tree-streamer-out.c index 127a3d8c248a..724eaf5e54db 100644 --- a/gcc/tree-streamer-out.c +++ b/gcc/tree-streamer-out.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "alias.h" #include "stor-layout.h" #include "gomp-constants.h" +#include "print-tree.h" /* Output the STRING constant to the string @@ -967,6 +968,14 @@ streamer_write_tree_header (struct output_block *ob, tree expr) enum LTO_tags tag; enum tree_code code; + if (streamer_dump_file) + { + print_node_brief (streamer_dump_file, " Streaming header of ", + expr, 4); + fprintf (streamer_dump_file, " to %s\n", + lto_section_name[ob->section_type]); + } + /* We should not see any tree nodes not handled by the streamer. */ code = TREE_CODE (expr); -- GitLab