From 0ef795c3278e35cdd274f22bb0ab9783a983af57 Mon Sep 17 00:00:00 2001 From: Raiki Tamura <tamaron1203@gmail.com> Date: Wed, 16 Nov 2022 17:15:24 +0900 Subject: [PATCH] gccrs: Improve lexer dump gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::Lexer): Add `dump_lex` boolean flag. (Lexer::skip_token): Dump tokens if flag is enabled. (Lexer::dump_and_skip): New function. * lex/rust-lex.h: Include optional.h and declare functions. * parse/rust-parse-impl.h (Parser::debug_dump_lex_output): Remove old unused function. * parse/rust-parse.h: Likewise. * rust-session-manager.cc (Session::compile_crate): Pass lexer dump option to lexer. (Session::dump_lex): New function. * util/rust-optional.h: Add missing constructor. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com> --- gcc/rust/lex/rust-lex.cc | 47 ++++++++++++++++++++++++++++++-- gcc/rust/lex/rust-lex.h | 12 ++++++-- gcc/rust/parse/rust-parse-impl.h | 41 ---------------------------- gcc/rust/parse/rust-parse.h | 2 -- gcc/rust/rust-session-manager.cc | 41 ++++++++++++---------------- gcc/rust/util/rust-optional.h | 1 + 6 files changed, 72 insertions(+), 72 deletions(-) diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 3467a3160ed5..53c7aecd25b7 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -118,13 +118,15 @@ is_non_decimal_int_literal_separator (char character) Lexer::Lexer (const std::string &input) : input (RAIIFile::create_error ()), current_line (1), current_column (1), - line_map (nullptr), raw_input_source (new BufferInputSource (input, 0)), + line_map (nullptr), dump_lex_out (Optional<std::ofstream &>::none ()), + raw_input_source (new BufferInputSource (input, 0)), input_queue{*raw_input_source}, token_queue (TokenSource (this)) {} -Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap) +Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap, + Optional<std::ofstream &> dump_lex_opt) : input (std::move (file_input)), current_line (1), current_column (1), - line_map (linemap), + line_map (linemap), dump_lex_out (dump_lex_opt), raw_input_source (new FileInputSource (input.get_raw ())), input_queue{*raw_input_source}, token_queue (TokenSource (this)) { @@ -186,6 +188,45 @@ Lexer::skip_input () skip_input (0); } +void +Lexer::skip_token (int n) +{ + // dump tokens if dump-lex option is enabled + if (dump_lex_out.is_some ()) + dump_and_skip (n); + else + token_queue.skip (n); +} + +void +Lexer::dump_and_skip (int n) +{ + std::ofstream &out = dump_lex_out.get (); + bool found_eof = false; + const_TokenPtr tok; + for (int i = 0; i < n + 1; i++) + { + if (!found_eof) + { + tok = peek_token (); + found_eof |= tok->get_id () == Rust::END_OF_FILE; + + Location loc = tok->get_locus (); + + out << "<id="; + out << tok->token_id_to_str (); + out << (tok->has_str () ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : "") + << " "; + out << get_line_map ()->to_string (loc) << " "; + } + + token_queue.skip (0); + } +} + void Lexer::replace_current_token (TokenPtr replacement) { diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 6e8c5999f516..a170e91f2ccc 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -22,6 +22,7 @@ #include "rust-linemap.h" #include "rust-buffered-queue.h" #include "rust-token.h" +#include "rust-optional.h" namespace Rust { // Simple wrapper for FILE* that simplifies destruction. @@ -139,7 +140,9 @@ private: public: // Construct lexer with input file and filename provided - Lexer (const char *filename, RAIIFile input, Linemap *linemap); + Lexer (const char *filename, RAIIFile input, Linemap *linemap, + Optional<std::ofstream &> dump_lex_opt + = Optional<std::ofstream &>::none ()); // Lex the contents of a string instead of a file Lexer (const std::string &input); @@ -161,10 +164,13 @@ public: const_TokenPtr peek_token () { return peek_token (0); } // Advances current token to n + 1 tokens ahead of current position. - void skip_token (int n) { token_queue.skip (n); } + void skip_token (int n); // Skips the current token. void skip_token () { skip_token (0); } + // Dumps and advances by n + 1 tokens. + void dump_and_skip (int n); + // Replaces the current token with a specified token. void replace_current_token (TokenPtr replacement); // FIXME: don't use anymore @@ -197,6 +203,8 @@ private: * allocating new linemap */ static const int max_column_hint = 80; + Optional<std::ofstream &> dump_lex_out; + // Input source wrapper thing. class InputSource { diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index ee0282bdab3d..cbd40efcc9bc 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -14897,47 +14897,6 @@ Parser<ManagedTokenSource>::done_end () return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); } -// Dumps lexer output to stderr. -template <typename ManagedTokenSource> -void -Parser<ManagedTokenSource>::debug_dump_lex_output (std::ostream &out) -{ - /* TODO: a better implementation of "lexer dump" (as in dump what was - * actually tokenised) would actually be to "write" a token to a file every - * time skip_token() here was called. This would reflect the parser - * modifications to the token stream, such as fixing the template angle - * brackets. */ - - const_TokenPtr tok = lexer.peek_token (); - - while (true) - { - if (tok->get_id () == Rust::END_OF_FILE) - break; - - bool has_text = tok->get_id () == Rust::IDENTIFIER - || tok->get_id () == Rust::INT_LITERAL - || tok->get_id () == Rust::FLOAT_LITERAL - || tok->get_id () == Rust::STRING_LITERAL - || tok->get_id () == Rust::CHAR_LITERAL - || tok->get_id () == Rust::BYTE_STRING_LITERAL - || tok->get_id () == Rust::BYTE_CHAR_LITERAL; - - Location loc = tok->get_locus (); - - out << "<id="; - out << tok->token_id_to_str (); - out << has_text ? (std::string (", text=") + tok->get_str () - + std::string (", typehint=") - + std::string (tok->get_type_hint_str ())) - : ""; - out << lexer.get_line_map ()->to_string (loc); - - lexer.skip_token (); - tok = lexer.peek_token (); - } -} - // Parses crate and dumps AST to stderr, recursively. template <typename ManagedTokenSource> void diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index e4c5a2c5c9f6..8449181b12f3 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -671,8 +671,6 @@ public: // Main entry point for parser. std::unique_ptr<AST::Crate> parse_crate (); - // Dumps all lexer output. - void debug_dump_lex_output (std::ostream &out); void debug_dump_ast_output (AST::Crate &crate, std::ostream &out); // Returns whether any parsing errors have occurred. diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 6f51bd2e5a17..732aabe1f261 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -455,7 +455,22 @@ Session::compile_crate (const char *filename) // parse file here /* create lexer and parser - these are file-specific and so aren't instance * variables */ - Lexer lex (filename, std::move (file_wrap), linemap); + Optional<std::ofstream &> dump_lex_opt = Optional<std::ofstream &>::none (); + std::ofstream dump_lex_stream; + if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) + { + dump_lex_stream.open (kLexDumpFile); + if (dump_lex_stream.fail ()) + { + rust_error_at (Linemap::unknown_location (), + "cannot open %s:%m; ignored", kLexDumpFile); + } + auto stream = Optional<std::ofstream &>::some (dump_lex_stream); + dump_lex_opt = std::move (stream); + } + + Lexer lex (filename, std::move (file_wrap), linemap, dump_lex_opt); + Parser<Lexer> parser (lex); // generate crate from parser @@ -464,11 +479,7 @@ Session::compile_crate (const char *filename) // handle crate name handle_crate_name (*ast_crate.get ()); - // dump options - if (options.dump_option_enabled (CompileOptions::LEXER_DUMP)) - { - dump_lex (parser); - } + // dump options except lexer dump if (options.dump_option_enabled (CompileOptions::PARSER_AST_DUMP)) { dump_ast (parser, *ast_crate.get ()); @@ -835,24 +846,6 @@ Session::expansion (AST::Crate &crate) rust_debug ("finished expansion"); } -void -Session::dump_lex (Parser<Lexer> &parser) const -{ - std::ofstream out; - out.open (kLexDumpFile); - if (out.fail ()) - { - rust_error_at (Linemap::unknown_location (), "cannot open %s:%m; ignored", - kLexDumpFile); - return; - } - - // TODO: rewrite lexer dump or something so that it allows for the crate - // to already be parsed - parser.debug_dump_lex_output (out); - out.close (); -} - void Session::dump_ast (Parser<Lexer> &parser, AST::Crate &crate) const { diff --git a/gcc/rust/util/rust-optional.h b/gcc/rust/util/rust-optional.h index eba3a7886ac5..d7349820b387 100644 --- a/gcc/rust/util/rust-optional.h +++ b/gcc/rust/util/rust-optional.h @@ -194,6 +194,7 @@ private: public: Optional (const Optional &other) = default; Optional (Optional &&other) = default; + Optional &operator= (Optional &&other) = default; static Optional<T &> some (T &value) { -- GitLab