From e1839846e663d09c201a3442e84dc9ffb431875b Mon Sep 17 00:00:00 2001 From: "James K. Lowden" <jklowden@symas.com> Date: Sat, 11 May 2024 18:31:22 -0400 Subject: [PATCH] restore scanner state after run_cdf --- gcc/cobol/scan.l | 59 ++++++++++++--------------------------- gcc/cobol/scan_ante.h | 44 +++++++++++++++++++++-------- gcc/cobol/scan_post.h | 64 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 56 deletions(-) diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index e30abfd7d35b..8a1f4fef073d 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -1084,8 +1084,8 @@ USE({SPC}FOR)? { return USE; } yyless(0); } - FD/[[:blank:]]+ { need_level = false; return FD; } - SD/[[:blank:]]+ { need_level = false; return SD; } + FD/[[:blank:]]+ { parsing.need_level(false); return FD; } + SD/[[:blank:]]+ { parsing.need_level(false); return SD; } {NAME} { yylval.string = strdup(yytext); const char *value = is_numeric_constant(yytext); @@ -1857,18 +1857,22 @@ COPY { } } ^[ ]*>>{OSPC}IF { yy_push_state(cdf_state); return CDF_IF; } - ^[ ]*>>{OSPC}ELSE { yy_push_state(cdf_state); return CDF_ELSE; } - ^[ ]*>>{OSPC}END-IF { yy_push_state(cdf_state); return CDF_END_IF; } - - ^[ ]*[$]{OSPC}IF { if( ! dialect_mf() ) dialect_error( yytext, "mf"); - yy_push_state(cdf_state); - return CDF_IF; } - ^[ ]*[$]{OSPC}ELSE { if( ! dialect_mf() ) dialect_error( yytext, "mf"); - yy_push_state(cdf_state); + ^[ ]*>>{OSPC}ELSE { return CDF_ELSE; } + ^[ ]*>>{OSPC}END-IF { return CDF_END_IF; } + + ^[ ]*[$]{OSPC}IF { if( ! dialect_mf() ) { + dialect_error( yytext, "mf"); + } + yy_push_state(cdf_state); return CDF_IF; } + ^[ ]*[$]{OSPC}ELSE { if( ! dialect_mf() ) { + dialect_error( yytext, "mf"); + } return CDF_ELSE; } - ^[ ]*[$]{OSPC}END { if( ! dialect_mf() ) dialect_error( yytext, "mf"); - yy_push_state(cdf_state); + ^[ ]*[$]{OSPC}END { if( ! dialect_mf() ) { + dialect_error( yytext, "mf"); + } return CDF_END_IF; } + ^[ ]*[$]{OSPC}SET({SPC}CONSTANT)? { if( ! dialect_mf() ) dialect_error( yytext, "mf"); yy_push_state(cdf_state); return CDF_DEFINE; } @@ -1969,36 +1973,7 @@ COPY { } <*>. { - const char *state = "???"; - switch(YY_START) { - case INITIAL: state = "INITIAL"; break; - case author_state: state = "author_state"; break; - case basis: state = "basis"; break; - case bool_state: state = "bool_state"; break; - case cdf_state: state = "cdf_state"; break; - case classify: state = "classify"; break; - case copy_state: state = "copy_state"; break; - case date_state: state = "date_state"; break; - case dot_state: state = "dot_state"; break; - case field_level: state = "field_level"; break; - case field_state: state = "field_state"; break; - case function: state = "function"; break; - case hex_state: state = "hex_state"; break; - case ident_state: state = "ident_state"; break; - case integer_count: state = "integer_count"; break; - case name_state: state = "name_state"; break; - case numeric_state: state = "numeric_state"; break; - case para_state: state = "para_state"; break; - case picture: state = "picture"; break; - case picture_count: state = "picture_count"; break; - case procedure_div: state = "procedure_div"; break; - case program_id_state: state = "program_id_state"; break; - case quoted1: state = "quoted1"; break; - case quoted2: state = "quoted2"; break; - case quoteq: state = "quoteq"; break; - case sort_state: state = "sort_state"; break; - case when_not: state = "when_not"; break; - } + auto state = start_condition_is(); yyerrorv("scanner error: " "%sstart condition %s (0x%02x): scanner default rule", YY_AT_BOL()? "(bol) " : "", state, *yytext ); diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h index 3c9c583608dc..f14071dceac8 100644 --- a/gcc/cobol/scan_ante.h +++ b/gcc/cobol/scan_ante.h @@ -133,14 +133,11 @@ original_number( char input[] = NULL ) { return out; } -static bool need_level = true; - -// Used only by parser, so scanner_normal() obviously true. -void field_done() { orig_picture[0] = '\0'; need_level = true; } - /* * Local functions */ +static const char * start_condition_str( int sc ); +static const char * start_condition_is(); static int numstr_of( const char string[], radix_t radix = decimal_e ) { @@ -261,31 +258,54 @@ struct cdf_status_t { * CDF is skipping some code). Because CDF status is nested, status is true * only if the whole stack is true. That is, if B is stacked on A, and A is * false, then all of B is skipped, regardless of >>IF and >>ELSE for B. -*/ + */ +static bool run_cdf( int token ); static class parsing_status_t : public std::stack<cdf_status_t> { - public: + struct parsing_state_t { + bool expect_field_level; + parsing_state_t() : expect_field_level(true) {} + } state, shadow; + int (*parser)(void) = yyparse; + public: + bool on() const { // true only if all true bool parsing = std::all_of( c.begin(), c.end(), []( const auto& status ) { return status.parsing; } ); return parsing; } - bool feed_the_parser() const { + bool feed_a_parser() const { return on() || parser == ydfparse; } + void need_level( bool tf ) { state.expect_field_level = tf; } + bool need_level() const { return state.expect_field_level; } + + void state_save() { + shadow = state; + } + void state_restore() { + state = shadow; + } + + bool in_cdf() const { return parser == ydfparse; } + bool normal() const { return on() && parser == yyparse; } + void splat() const { int i=0; for( const auto& status : c ) { warnx( "%4d\t%s", ++i, status.str() ); } } - + friend bool run_cdf( int token ); } parsing; +// Used only by parser, so scanner_normal() obviously true. +void field_done() { orig_picture[0] = '\0'; parsing.need_level(true); } + static int scanner_token() { if( parsing.empty() ) { yyerror("error: >>ELSE or >>END-IF without >>IF"); @@ -295,7 +315,7 @@ static int scanner_token() { } bool scanner_parsing() { return parsing.on(); } -bool scanner_normal() { return parsing.on() && parsing.parser == yyparse; } +bool scanner_normal() { return parsing.normal(); } void scanner_parsing( int token, bool tf ) { parsing.push( cdf_status_t(token, tf) ); @@ -331,11 +351,11 @@ void scanner_parsing_pop() { static bool level_needed() { - return scanner_normal() && need_level; + return scanner_normal() && parsing.need_level(); } static void level_found() { - if( scanner_normal() ) need_level = false; + if( scanner_normal() ) parsing.need_level(false); } /* diff --git a/gcc/cobol/scan_post.h b/gcc/cobol/scan_post.h index fed531939072..74e794adb746 100644 --- a/gcc/cobol/scan_post.h +++ b/gcc/cobol/scan_post.h @@ -28,6 +28,44 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +static const char * +start_condition_str( int sc ) { + const char *state = "???"; + switch(sc) { + case INITIAL: state = "INITIAL"; break; + case author_state: state = "author_state"; break; + case basis: state = "basis"; break; + case bool_state: state = "bool_state"; break; + case cdf_state: state = "cdf_state"; break; + case classify: state = "classify"; break; + case copy_state: state = "copy_state"; break; + case date_state: state = "date_state"; break; + case dot_state: state = "dot_state"; break; + case field_level: state = "field_level"; break; + case field_state: state = "field_state"; break; + case function: state = "function"; break; + case hex_state: state = "hex_state"; break; + case ident_state: state = "ident_state"; break; + case integer_count: state = "integer_count"; break; + case name_state: state = "name_state"; break; + case numeric_state: state = "numeric_state"; break; + case para_state: state = "para_state"; break; + case picture: state = "picture"; break; + case picture_count: state = "picture_count"; break; + case procedure_div: state = "procedure_div"; break; + case program_id_state: state = "program_id_state"; break; + case quoted1: state = "quoted1"; break; + case quoted2: state = "quoted2"; break; + case quoteq: state = "quoteq"; break; + case sort_state: state = "sort_state"; break; + case when_not: state = "when_not"; break; + } + return state; +} + +static const char * +start_condition_is() { return start_condition_str( YY_START ); } + /* * Match datetime constants. * @@ -161,6 +199,8 @@ run_cdf( int token ) { std::swap(parser, parsing.parser); + if( YY_START == cdf_state ) yy_pop_state(); + return 0 == erc; } @@ -198,6 +238,26 @@ prelex() { token = ydfchar > 0? final_token : next_token(); // re-enter cdf parser if next token is a CDF token } + + /* + * The final, rejected CDF token might be a LEVEL number. + */ + if( YY_START == field_state && level_needed() ) { + if( token == final_token ) { + switch( token ) { + case NUMSTR: + if( yy_flex_debug ) warnx("final token is NUMSTR"); + yylval.number = level_of(yylval.numstr.string); + token = LEVEL; + break; + case YDF_NUMBER: + if( yy_flex_debug ) warnx("final token is YDF_NUMBER"); + yylval.number = ydflval.number; + token = LEVEL; + break; + } + } + } if( yydebug ) warnx( ">>CDF parser done, returning " "%s (because final_token %s, lookhead %d) on line %d", @@ -258,7 +318,7 @@ yylex(void) { do { token = prelex(); if( yy_flex_debug ) { - if( parsing.parser == ydfparse ) { + if( parsing.in_cdf() ) { warnx( "%s:%d: routing %s to CDF parser", __func__, __LINE__, keyword_str(token) ); } else if( !parsing.on() ) { @@ -267,7 +327,7 @@ yylex(void) { } } - } while( token && ! parsing.feed_the_parser() ); + } while( token && ! parsing.feed_a_parser() ); if( next_sentence && token == '.' ) { produce_next_sentence_target = true; -- GitLab