From 8eada21f5a1d85ea63634386da1e340337d6f0ac Mon Sep 17 00:00:00 2001
From: "James K. Lowden" <jklowden@symas.com>
Date: Thu, 9 May 2024 16:59:09 -0400
Subject: [PATCH] many fixes for CDF IF

---
 gcc/cobol/Make-lang.in |  6 +--
 gcc/cobol/cdf.y        | 31 ++++++++++----
 gcc/cobol/scan.l       | 96 +++++++++++++++++++++++++-----------------
 gcc/cobol/scan_ante.h  | 12 ++++--
 gcc/cobol/scan_post.h  | 17 +++++---
 5 files changed, 102 insertions(+), 60 deletions(-)

diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in
index ffb4259926d1..d4825f23c2e4 100644
--- a/gcc/cobol/Make-lang.in
+++ b/gcc/cobol/Make-lang.in
@@ -138,7 +138,7 @@ gcobol$(exeext): \
 
 # First, files needed for parsing:
 
-cobol/parse.c: cobol/parse.y cobol/genapi.h
+cobol/parse.c: cobol/parse.y cobol/genapi.h cobol/parse_ante.h
 	$(YACC) -o $@ $(YFLAGS)				\
 		--defines=cobol/parse.h			\
 		--report-file=cobol/parser.out $<
@@ -147,8 +147,8 @@ cobol/cdf.c: cobol/cdf.y cobol/genapi.h
 	$(YACC) -o $@ $(YFLAGS)						\
 		--defines=cobol/cdf.h --report-file=cobol/cdf.out $<
 
-cobol/scan.c: cobol/scan.l
-	$(LEX) -o$@ $(LFLAGS) $^
+cobol/scan.c: cobol/scan.l cobol/scan_ante.h cobol/scan_post.h 
+	$(LEX) -o$@ $(LFLAGS) $<
 
 cobol/scan.o: cobol/parse.c # cobol/parse.h # parse.h gets built along with parse.c
 
diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y
index d38f3fb6703a..e1e0a31f3d31 100644
--- a/gcc/cobol/cdf.y
+++ b/gcc/cobol/cdf.y
@@ -730,20 +730,33 @@ with:           %empty
 bool // used by cobol1.cc
 defined_cmd( const char arg[] )
 {
+  cdfval_t value(1);
+
   char *name = strdup(arg);
   char *p = strchr(name, '=');
-
-  if(p) *p = '\0';
-
-  dictionary[name] = p? cdfval_t(p+1) : cdfval_t("1");
+  if(p) {
+    *p++ = '\0';
+    int pos, number;
+    if( 1 == sscanf(p, "%d%n", &number, &pos) ) {
+      if( size_t(pos) == strlen(p) ) {
+	value = cdfval_t(number);
+      }
+    }
+  }
+  
+  dictionary[name] = value;
 
   auto cdf_name = dictionary.find(name);
   assert(cdf_name != dictionary.end());
-  assert(cdf_name->second.string != NULL);
-
-  if( yydebug )
-    warnx("%s: added -D %s = %s", __func__, name, cdf_name->second.string);
-
+  assert(cdf_name->second.is_numeric() || cdf_name->second.string != NULL);
+  
+  if( yydebug ) {
+    if( cdf_name->second.is_numeric() ) {
+      warnx("%s: added -D %s = %ld", __func__, name, cdf_name->second.as_number());
+    } else {
+      warnx("%s: added -D %s = \"%s\"", __func__, name, cdf_name->second.string);
+    }
+  }
   return true;
 }
 
diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l
index acd7c9f95924..aadaa889ab33 100644
--- a/gcc/cobol/scan.l
+++ b/gcc/cobol/scan.l
@@ -217,18 +217,26 @@ LINE_DIRECTIVE [#]line{SPC}[[:alnum:]]+{SPC}[""''].+\n
 }
 
 <cdf_state>{
+  [+-]?{INTEGERZ}       { int value;
+			  if( is_integer_token(&value) ) {
+			    ydflval.number = value;
+			    return YDF_NUMBER;
+			  }
+			  yyerrorv("logic error: %s not an integer = %d",
+				   yytext, value);
+			  return NO_CONDITION;
+			}
+
   {NAME}	 	{ ydflval.string = strdup(yytext);
-			  yy_pop_state();
 			  return NAME;
 			}
   %EBCDIC-MODE		{ ydflval.number = feature_internal_ebcdic_e;
-			  yy_pop_state();
 			  return FEATURE; }
   %64-BIT-POINTER	{ ydflval.number = feature_embiggen_e;
-			  yy_pop_state();
 			  return FEATURE; }
   [[:blank:]]+
   {BLANK_EOL}
+  . 			{ yyless(0); yy_pop_state(); } // not a CDF token
 }
 
     /* Initial start condition only.  */
@@ -860,32 +868,36 @@ USE({SPC}FOR)?		{ return USE; }
 
 <field_level>{
   66/{SPC}(\f#)?{NAME} { yy_pop_state();
+                          if( !lexing.on() ) orig_picture[0] = '\0';
                           if( need_level ) {
-                            need_level = false;
+                            level_found();
                             yylval.number = level_of(yytext); return LEVEL66;
                           } else {
                             return numstr_of(yytext);
                           }
                         }
   78/{SPC}(\f#)?{NAME} { yy_pop_state();
+                          if( !lexing.on() ) orig_picture[0] = '\0';
                           if( need_level ) {
-                            need_level = false;
+                            level_found();
                             yylval.number = level_of(yytext); return LEVEL78;
                           } else {
                             return numstr_of(yytext);
                           }
                         }
   88/{SPC}(\f#)?{NAME} { yy_pop_state();
+                          if( !lexing.on() ) orig_picture[0] = '\0';
                           if( need_level ) {
-                            need_level = false;
+                            level_found();
                             yylval.number = level_of(yytext); return LEVEL88;
                           } else {
                             return numstr_of(yytext);
                           }
                         }
   [[:digit:]]{1,2}/[[:space:]] { yy_pop_state();
+                          if( !lexing.on() ) orig_picture[0] = '\0';
                           if( need_level ) {
-                            need_level = false;
+                            level_found();
                             yylval.number = level_of(yytext); return LEVEL;
                           } else {
                             return numstr_of(yytext);
@@ -897,7 +909,8 @@ USE({SPC}FOR)?		{ return USE; }
 
 <field_state>{
   ^[[:blank:]]*[[:digit:]]{1,2}{OSPC}/[.] {
-                              need_level = false;
+                              if( !lexing.on() ) orig_picture[0] = '\0';
+                              level_found();
                               yylval.number = level_of(yytext);
                               return LEVEL;
                             }
@@ -1408,28 +1421,7 @@ USE({SPC}FOR)?		{ return USE; }
                  }
 }
 
-<procedure_div>{
-  EXIT/{SECTION} 	{ return EXIT; }
-  {NAME}/{SECTION} 	{ yylval.string = strdup(yytext);
-                          return SECTION_NAME; }
-
-  RETURNING             { return RETURNING; }
-
-  (EJECT{OSPC})[.]/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} {
- 		  if( ! dialect_ibm() ) {
-		    yyerror("error: EJECT is not ISO syntax, requires -dialect ibm");
-		  }
- 		  yy_push_state(para_state); }
-
-  [.]/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} {
-		  yy_push_state(para_state); return '.'; }
-
-  EJECT/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} { 
- 		  if( ! dialect_ibm() ) {
-		    yyerror("error: EJECT is not ISO syntax, requires -dialect ibm");
-		  }
- 		  yy_push_state(para_state); }
-
+<cdf_state,procedure_div>{
   (IS{SPC})?"<"         { return '<'; }
   (IS{SPC})?"<="        { return LE;  }
   (IS{SPC})?"="         { return '='; }
@@ -1450,11 +1442,36 @@ USE({SPC}FOR)?		{ return USE; }
   {ISNT}{SPC}"<="        { return '>'; }
 
   {ISNT}{SPC}GREATER{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '<'; }
-  {ISNT}{SPC}GREATER{SPC}(THAN)?		{ return LE; }
+  {ISNT}{SPC}GREATER{SPC}(THAN)?	{ return LE; }
   {ISNT}{SPC}EQUALS?{SPC}(TO)?		{ return NE; }
   {ISNT}{SPC}LESS{SPC}(THAN)?		{ return GE; }
   {ISNT}{SPC}LESS{SPC}(THAN)?{SPC}{OR_EQUAL}/[[:space:]] { return '>'; }
 
+  "**"        { return POW; }
+}
+
+<procedure_div>{
+  EXIT/{SECTION} 	{ return EXIT; }
+  {NAME}/{SECTION} 	{ yylval.string = strdup(yytext);
+                          return SECTION_NAME; }
+
+  RETURNING             { return RETURNING; }
+
+  (EJECT{OSPC})[.]/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} {
+ 		  if( ! dialect_ibm() ) {
+		    yyerror("error: EJECT is not ISO syntax, requires -dialect ibm");
+		  }
+ 		  yy_push_state(para_state); }
+
+  [.]/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} {
+		  yy_push_state(para_state); return '.'; }
+
+  EJECT/{SPC}(\f#)?{NAME}{OSPC}{DOTSEP} { 
+ 		  if( ! dialect_ibm() ) {
+		    yyerror("error: EJECT is not ISO syntax, requires -dialect ibm");
+		  }
+ 		  yy_push_state(para_state); }
+
   (IS{SPC})?POSITIVE/[[:space:]]  { yylval.number =  IS; return POSITIVE; }
   (IS{SPC})?NEGATIVE/[[:space:]]  { yylval.number =  IS; return NEGATIVE; }
   (IS{SPC})?ZERO/[[:space:]]      { yylval.number =  IS; return ZERO; }
@@ -1463,8 +1480,6 @@ USE({SPC}FOR)?		{ return USE; }
   {ISNT}{SPC}NEGATIVE/[[:space:]] { yylval.number = NOT; return NEGATIVE; }
   {ISNT}{SPC}ZERO/[[:space:]]     { yylval.number = NOT; return ZERO; }
 
-  "**"        { return POW; }
-
   [(:)]                   { return *yytext; }
   [(]/[^(:)""'']*[:][^)]*[)]  { return LPAREN; /* parentheses around a colon */ }
 
@@ -1841,16 +1856,22 @@ COPY		{
 		    if( include_debug() ) yyless(7);
 		  }
 		}
-  ^[ ]*>>{OSPC}IF		{ return CDF_IF; }
-  ^[ ]*>>{OSPC}ELSE	 	{ return CDF_ELSE; }
-  ^[ ]*>>{OSPC}END-IF	 	{ return CDF_END_IF; }
+  ^[ ]*>>{OSPC}IF		{ yy_push_state(cdf_state); return CDF_IF; }
+  ^[ ]*>>{OSPC}ELSE	 	{ yy_push_state(cdf_state); return CDF_ELSE; }
+  ^[ ]*>>{OSPC}END-IF	 	{ yy_push_state(cdf_state); return CDF_END_IF; }
 
- ^[ ]*[$]{OSPC}IF		{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
+  ^[ ]*[$]{OSPC}IF		{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
+				  yy_push_state(cdf_state);
 				  return CDF_IF; }
   ^[ ]*[$]{OSPC}ELSE	 	{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
+				  yy_push_state(cdf_state); 
 				  return CDF_ELSE; }
   ^[ ]*[$]{OSPC}END	 	{ if( ! dialect_mf() ) dialect_error( yytext, "mf");
+				  yy_push_state(cdf_state); 
 				  return CDF_END_IF; }
+  ^[ ]*[$]{OSPC}SET({SPC}CONSTANT)? {
+				  if( ! dialect_mf() ) dialect_error( yytext, "mf");
+				  yy_push_state(cdf_state); return CDF_DEFINE; }
 
   ^[ ]*>>{OSPC}EVALUATE		{ return CDF_EVALUATE; }
   ^[ ]*>>{OSPC}WHEN		{ return CDF_WHEN; }
@@ -1869,7 +1890,6 @@ COPY		{
 				   "unknown CDF token: %s", yytext);
 			}
 
-
   AS		{ return AS; }
   CONSTANT	{ return CONSTANT; }
   (IS{SPC})?DEFINED	{ ydflval.boolean = true;  return DEFINED; }
diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h
index a24222114533..ba618ee95795 100644
--- a/gcc/cobol/scan_ante.h
+++ b/gcc/cobol/scan_ante.h
@@ -136,6 +136,7 @@ original_number( char input[] = NULL ) {
 }
 
 static bool need_level = true;
+static void level_found() { need_level = false; }
 
 void field_done() { orig_picture[0] = '\0'; need_level = true; }
 
@@ -143,7 +144,8 @@ void field_done() { orig_picture[0] = '\0'; need_level = true; }
  * Local functions
  */
 
-static inline int numstr_of( const char string[], radix_t radix = decimal_e ) {
+static int
+numstr_of( const char string[], radix_t radix = decimal_e ) {
   yylval.numstr.radix = radix;
   ydflval.string = yylval.numstr.string = strdup(string);
   char *comma = strchr(yylval.numstr.string, ',');
@@ -391,16 +393,18 @@ picset( int token ) {
   char *p = orig_picture + strlen(orig_picture);
 
   if( eop < p + yyleng ) {
-    yyerrorv("PICTURE exceeds maximum size of %zu bytes", sizeof(orig_picture) - 1);
+    yyerrorv("PICTURE '%s%s' exceeds maximum size of %zu bytes",
+	     p, yytext, sizeof(orig_picture) - 1);
   }
   snprintf( p, eop - p, "%s", yytext );
   return token;
 }
 
 static inline bool
-is_integer_token(void) {
+is_integer_token( int *pvalue = NULL ) {
   int v, n = 0;
-  return 1 == sscanf(yytext, "%d%n", &v, &n) && n == yyleng;
+  if( pvalue == NULL ) pvalue = &v;
+  return 1 == sscanf(yytext, "%d%n", pvalue, &n) && n == yyleng;
 }
 
 static bool need_nume = false;
diff --git a/gcc/cobol/scan_post.h b/gcc/cobol/scan_post.h
index 5ef842b2577b..6b94045e956a 100644
--- a/gcc/cobol/scan_post.h
+++ b/gcc/cobol/scan_post.h
@@ -202,16 +202,21 @@ prelex() {
 
   assert(is_cdf_token(token));
 
-  inject_token(token); // because it will be needed by CDF parser
-
   if( yydebug ) warnx( ">>CDF parser starting for %s, line %d",
 		       keyword_str(token), yylineno );
+  
+  while( is_cdf_token(token) ) {
+    inject_token(token); // because it will be needed by CDF parser
 
-  if( ! run_cdf(token) ) {
-    yyerror( ">>CDF parser failed" );
-    return NO_CONDITION;
+    if( ! run_cdf(token) ) {
+      yyerror( ">>CDF parser failed" );
+      return NO_CONDITION;
+    }
+    token = ydfchar > 0? final_token : next_token();
+    // re-enter cdf parser if next token is a CDF token
+    if( ! lexing.on() ) break;
   }
-  token = ydfchar > 0? final_token : next_token();
+  
   if( yydebug ) warnx( ">>CDF parser done, returning "
 		       "%s (because final_token %s, lookhead %d) on line %d",
 		       keyword_str(token), keyword_str(final_token),
-- 
GitLab