From e1839846e663d09c201a3442e84dc9ffb431875b Mon Sep 17 00:00:00 2001
From: "James K. Lowden" <jklowden@symas.com>
Date: Sat, 11 May 2024 18:31:22 -0400
Subject: [PATCH] restore scanner state after run_cdf

---
 gcc/cobol/scan.l      | 59 ++++++++++++---------------------------
 gcc/cobol/scan_ante.h | 44 +++++++++++++++++++++--------
 gcc/cobol/scan_post.h | 64 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 111 insertions(+), 56 deletions(-)

diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l
index e30abfd7d35b..8a1f4fef073d 100644
--- a/gcc/cobol/scan.l
+++ b/gcc/cobol/scan.l
@@ -1084,8 +1084,8 @@ USE({SPC}FOR)?		{ return USE; }
                 	  yyless(0);
                 	}
 
-  FD/[[:blank:]]+       { need_level = false; return FD; }
-  SD/[[:blank:]]+       { need_level = false; return SD; }
+  FD/[[:blank:]]+       { parsing.need_level(false); return FD; }
+  SD/[[:blank:]]+       { parsing.need_level(false); return SD; }
 
   {NAME}                { yylval.string = strdup(yytext);
                           const char *value = is_numeric_constant(yytext);
@@ -1857,18 +1857,22 @@ COPY		{
 		  }
 		}
   ^[ ]*>>{OSPC}IF		{ yy_push_state(cdf_state); return CDF_IF; }
-  ^[ ]*>>{OSPC}ELSE	 	{ yy_push_state(cdf_state); return CDF_ELSE; }
-  ^[ ]*>>{OSPC}END-IF	 	{ yy_push_state(cdf_state); return CDF_END_IF; }
-
-  ^[ ]*[$]{OSPC}IF		{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
-				  yy_push_state(cdf_state);
-				  return CDF_IF; }
-  ^[ ]*[$]{OSPC}ELSE	 	{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
-				  yy_push_state(cdf_state); 
+  ^[ ]*>>{OSPC}ELSE	 	{ return CDF_ELSE; }
+  ^[ ]*>>{OSPC}END-IF	 	{ return CDF_END_IF; }
+
+  ^[ ]*[$]{OSPC}IF		{ if( ! dialect_mf() )  {
+				    dialect_error( yytext, "mf");
+				  }
+				  yy_push_state(cdf_state); return CDF_IF; }
+  ^[ ]*[$]{OSPC}ELSE	 	{ if( ! dialect_mf() )  {
+				    dialect_error( yytext, "mf");
+				  }
 				  return CDF_ELSE; }
-  ^[ ]*[$]{OSPC}END	 	{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
-				  yy_push_state(cdf_state); 
+  ^[ ]*[$]{OSPC}END	 	{ if( ! dialect_mf() ) {
+				    dialect_error( yytext, "mf");
+				  }
 				  return CDF_END_IF; }
+
   ^[ ]*[$]{OSPC}SET({SPC}CONSTANT)? {
 				  if( ! dialect_mf() ) dialect_error( yytext, "mf");
 				  yy_push_state(cdf_state); return CDF_DEFINE; }
@@ -1969,36 +1973,7 @@ COPY		{
 }
 
 <*>.		{
-		  const char *state = "???";
-		  switch(YY_START) {
-		  case INITIAL: state = "INITIAL"; break;
-		  case author_state: state = "author_state"; break;
-		  case basis: state = "basis"; break;
-		  case bool_state: state = "bool_state"; break;
-		  case cdf_state: state = "cdf_state"; break;
-		  case classify: state = "classify"; break;
-		  case copy_state: state = "copy_state"; break;
-		  case date_state: state = "date_state"; break;
-		  case dot_state: state = "dot_state"; break;
-		  case field_level: state = "field_level"; break;
-		  case field_state: state = "field_state"; break;
-		  case function: state = "function"; break;
-		  case hex_state: state = "hex_state"; break;
-		  case ident_state: state = "ident_state"; break;
-		  case integer_count: state = "integer_count"; break;
-		  case name_state: state = "name_state"; break;
-		  case numeric_state: state = "numeric_state"; break;
-		  case para_state: state = "para_state"; break;
-		  case picture: state = "picture"; break;
-		  case picture_count: state = "picture_count"; break;
-		  case procedure_div: state = "procedure_div"; break;
-		  case program_id_state: state = "program_id_state"; break;
-		  case quoted1: state = "quoted1"; break;
-		  case quoted2: state = "quoted2"; break;
-		  case quoteq: state = "quoteq"; break;
-		  case sort_state: state = "sort_state"; break;
-		  case when_not: state = "when_not"; break;
-		  }
+		  auto state = start_condition_is();
 		  yyerrorv("scanner error: "
 		        "%sstart condition %s (0x%02x): scanner default rule",
 			   YY_AT_BOL()? "(bol) " : "", state, *yytext );
diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h
index 3c9c583608dc..f14071dceac8 100644
--- a/gcc/cobol/scan_ante.h
+++ b/gcc/cobol/scan_ante.h
@@ -133,14 +133,11 @@ original_number( char input[] = NULL ) {
   return out;
 }
 
-static bool need_level = true;
-
-// Used only by parser, so scanner_normal() obviously true. 
-void field_done() { orig_picture[0] = '\0'; need_level = true; }
-
 /*
  * Local functions
  */
+static const char * start_condition_str( int sc );
+static const char * start_condition_is();
 
 static int
 numstr_of( const char string[], radix_t radix = decimal_e ) {
@@ -261,31 +258,54 @@ struct cdf_status_t {
  * CDF is skipping some code).  Because CDF status is nested, status is true
  * only if the whole stack is true. That is, if B is stacked on A, and A is
  * false, then all of B is skipped, regardless of >>IF and >>ELSE for B.
-*/
+ */
+static bool run_cdf( int token );
 
 static class parsing_status_t : public std::stack<cdf_status_t> {
- public:
+  struct parsing_state_t {
+    bool expect_field_level;
+    parsing_state_t() : expect_field_level(true) {}
+  } state, shadow;
+  
   int (*parser)(void) = yyparse;
 
+ public:
+
   bool on() const { // true only if all true
     bool parsing = std::all_of( c.begin(), c.end(),
 			       []( const auto& status ) { return status.parsing; } );
     return parsing;
   }
 
-  bool feed_the_parser() const {
+  bool feed_a_parser() const {
     return on() || parser == ydfparse;
   }
 
+  void need_level( bool tf ) {     state.expect_field_level = tf; }
+  bool need_level() const { return state.expect_field_level; }
+
+  void state_save() {
+    shadow = state;
+  }
+  void state_restore() {
+    state = shadow;
+  }
+
+  bool in_cdf() const { return parser == ydfparse; }
+  bool normal() const { return on() && parser == yyparse; }
+
   void splat() const {
     int i=0;
     for( const auto& status : c ) {
       warnx( "%4d\t%s", ++i, status.str() );
     }
   }
-
+  friend bool run_cdf( int token );
 } parsing;
 
+// Used only by parser, so scanner_normal() obviously true. 
+void field_done() { orig_picture[0] = '\0'; parsing.need_level(true); }
+
 static int scanner_token() {
   if( parsing.empty() ) {
     yyerror("error: >>ELSE or >>END-IF without >>IF");
@@ -295,7 +315,7 @@ static int scanner_token() {
 }
 
 bool scanner_parsing() { return parsing.on(); }
-bool scanner_normal()  { return parsing.on() && parsing.parser == yyparse; }
+bool scanner_normal()  { return parsing.normal(); }
 
 void scanner_parsing( int token, bool tf ) {
   parsing.push( cdf_status_t(token, tf) );
@@ -331,11 +351,11 @@ void scanner_parsing_pop() {
 
 
 static bool level_needed() {
-  return scanner_normal() && need_level;
+  return scanner_normal() && parsing.need_level();
 }
 
 static void level_found() {
-  if( scanner_normal() ) need_level = false;
+  if( scanner_normal() ) parsing.need_level(false);
 }
 
 /*
diff --git a/gcc/cobol/scan_post.h b/gcc/cobol/scan_post.h
index fed531939072..74e794adb746 100644
--- a/gcc/cobol/scan_post.h
+++ b/gcc/cobol/scan_post.h
@@ -28,6 +28,44 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+static const char *
+start_condition_str( int sc ) {
+  const char *state = "???";
+  switch(sc) {
+  case INITIAL: state = "INITIAL"; break;
+  case author_state: state = "author_state"; break;
+  case basis: state = "basis"; break;
+  case bool_state: state = "bool_state"; break;
+  case cdf_state: state = "cdf_state"; break;
+  case classify: state = "classify"; break;
+  case copy_state: state = "copy_state"; break;
+  case date_state: state = "date_state"; break;
+  case dot_state: state = "dot_state"; break;
+  case field_level: state = "field_level"; break;
+  case field_state: state = "field_state"; break;
+  case function: state = "function"; break;
+  case hex_state: state = "hex_state"; break;
+  case ident_state: state = "ident_state"; break;
+  case integer_count: state = "integer_count"; break;
+  case name_state: state = "name_state"; break;
+  case numeric_state: state = "numeric_state"; break;
+  case para_state: state = "para_state"; break;
+  case picture: state = "picture"; break;
+  case picture_count: state = "picture_count"; break;
+  case procedure_div: state = "procedure_div"; break;
+  case program_id_state: state = "program_id_state"; break;
+  case quoted1: state = "quoted1"; break;
+  case quoted2: state = "quoted2"; break;
+  case quoteq: state = "quoteq"; break;
+  case sort_state: state = "sort_state"; break;
+  case when_not: state = "when_not"; break;
+  }
+  return state;
+}
+
+static const char *
+start_condition_is() { return start_condition_str( YY_START ); }
+
 /*
  * Match datetime constants.
  *
@@ -161,6 +199,8 @@ run_cdf( int token ) {
 
   std::swap(parser, parsing.parser);
 
+  if( YY_START == cdf_state ) yy_pop_state(); 
+
   return  0 == erc;
 }
 
@@ -198,6 +238,26 @@ prelex() {
     token = ydfchar > 0? final_token : next_token();
     // re-enter cdf parser if next token is a CDF token
   }
+
+  /*
+   * The final, rejected CDF token might be a LEVEL number. 
+   */
+  if( YY_START == field_state && level_needed() ) {
+    if( token == final_token ) {
+      switch( token ) {
+      case NUMSTR:
+	if( yy_flex_debug ) warnx("final token is NUMSTR");
+	yylval.number = level_of(yylval.numstr.string);
+	token = LEVEL;
+	break;
+      case YDF_NUMBER:
+	if( yy_flex_debug ) warnx("final token is YDF_NUMBER");
+	yylval.number = ydflval.number;
+	token = LEVEL;
+	break;
+      }
+    }
+  }
   
   if( yydebug ) warnx( ">>CDF parser done, returning "
 		       "%s (because final_token %s, lookhead %d) on line %d",
@@ -258,7 +318,7 @@ yylex(void) {
   do {
     token = prelex();
     if( yy_flex_debug ) {
-      if( parsing.parser == ydfparse ) {
+      if( parsing.in_cdf() ) {
 	warnx( "%s:%d: routing %s to CDF parser", __func__, __LINE__,
 	       keyword_str(token) );
       } else if( !parsing.on() ) {
@@ -267,7 +327,7 @@ yylex(void) {
       }
     }
 
-  } while( token && ! parsing.feed_the_parser() );
+  } while( token && ! parsing.feed_a_parser() );
 
   if( next_sentence && token == '.' ) {
     produce_next_sentence_target = true;
-- 
GitLab