diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index 316d0d2849aa69571ea90ced20fe6f0698760f81..a13af4917793fad4e5f0658a283cffa7a9dcc2da 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -55,5 +55,7 @@ * Normalize #includes in util.cc * Normalize #includes in symfind.cc * Normalize #includes in cdf-copy.cc and copybook.h + * Normalize #includes in lexio.cc + diff --git a/gcc/cobol/cdf_text.h b/gcc/cobol/cdf_text.h deleted file mode 100644 index 99a3eac1ded32f12d8c32d00de5888fd42a3d4ec..0000000000000000000000000000000000000000 --- a/gcc/cobol/cdf_text.h +++ /dev/null @@ -1,609 +0,0 @@ -/* - * Copyright (c) 2021-2024 Symas Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of the Symas Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -static const char * -find_filter( const char filter[] ) { - - if( 0 == access(filter, X_OK) ) { - return filter; - } - - const char *path = getenv("PATH"); - if( ! path ) return NULL; - char *p = strdup(path), *eopath = p + strlen(p); - - while( *p != '\0' ) { - auto pend = std::find( p, eopath, ':' ); - if( *pend == ':' ) *pend++ = '\0'; - - static char name[PATH_MAX]; - - snprintf( name, sizeof(name), "%s/%s", p, filter ); - - if( 0 == access(name, X_OK) ) { - return name; - } - p = pend; - } - return NULL; -} - -bool verbose_file_reader = false; - -typedef std::pair <char *, std::list<std::string> > preprocessor_filter_t; -static std::list<preprocessor_filter_t> preprocessor_filters; -static std::list<const char *> included_files; - -#include "lexio.h" -#include "cbldiag.h" - -#include <unistd.h> - -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> - -bool -include_file_add(const char filename[]) { - struct stat sb; - if( -1 == stat(filename, &sb) ) return false; - included_files.push_back(filename); - return true; -} - -bool -preprocess_filter_add( const char input[] ) { - char filter[ strlen(input) + 1 ]; - strcpy(filter, input); - char *optstr = strchr(filter, ','); - std::list <std::string> options; - - if( optstr ) { - for( char *opt = optstr + 1; (opt = strtok(opt, ",")); opt = NULL ) { - options.push_back(opt); - } - *optstr = '\0'; - } - - auto filename = find_filter(filter); - if( !filename ) { - cbl_warnx("error: preprocessor '%s/%s' not found", - getcwd(NULL, 0), filter); - return false; - } - preprocessor_filters.push_back( std::make_pair(strdup(filename), options) ); - return true; -} - -void -cdftext::echo_input( int input, const char filename[] ) { - int fd; - if( -1 == (fd = dup(input)) ) { - cbl_warn( "could not open preprocessed file %s to echo to standard output", - filename ); - return; - } - - auto mfile = map_file(fd); - - if( -1 == write(STDOUT_FILENO, mfile.data, mfile.size()) ) { - cbl_warn( "could not write preprocessed file %s to standard output", - filename ); - } - if( -1 == munmap(mfile.data, mfile.size()) ) { - cbl_warn( "could not release mapped file" ); - } - if( -1 == close(fd) ) { - cbl_warn( "could not close mapped file" ); - } -} - -static inline ino_t -inode_of( int fd ) { - struct stat sb; - if( -1 == fstat(fd, &sb) ) { - cbl_err(EXIT_FAILURE, "could not stat fd %d", fd); - } - return sb.st_ino; -} - -FILE * -cdftext::lex_open( const char filename[] ) { - int input = open_input( filename ); - if( input == -1 ) return NULL; - - int output = open_output(); - - for( auto name : included_files ) { - int input; - if( -1 == (input = open(name, O_RDONLY)) ) { - yyerrorvl(1, "", "cannot open -include file %s", name); - continue; - } - cobol_filename(name, inode_of(input)); - filespan_t mfile( free_form_reference_format( input ) ); - - process_file( mfile, output ); - } - - cobol_filename(filename, inode_of(input)); - filespan_t mfile( free_form_reference_format( input ) ); - - process_file( mfile, output ); - - if( lexer_echo() ) { - echo_input(output, filename); - } - - for( auto filter_pair : preprocessor_filters ) { - input = output; - output = open_output(); - - char *filter = filter_pair.first; - std::list<std::string>& options = filter_pair.second; - - char * argv[2 + options.size()] = { filter }; - - auto last_argv = std::transform( options.begin(), options.end(), argv + 1, - []( std::string& opt ) { - return strdup(opt.c_str()); - } ); - *last_argv = NULL; - - pid_t pid = fork(); - - switch(pid){ - case -1: cbl_err(EXIT_FAILURE, "%s", __func__); - break; - case 0: // child - if( -1 == dup2(input, STDIN_FILENO) ) { - cbl_err(EXIT_FAILURE, "%s: could not dup input", __func__); - } - if( -1 == dup2(output, STDOUT_FILENO) ) { - cbl_err(EXIT_FAILURE, "%s: could not dup output", __func__); - } - if( -1 == lseek(STDIN_FILENO, SEEK_SET, 0) ) { - cbl_err(EXIT_FAILURE, "%s: could not seek to start of file", __func__); - } - int erc; - if( -1 == (erc = execv(filter, argv)) ) { - cbl_warn("could not execute %s", filter); - } - _exit(erc); - } - int status; - auto kid = wait(&status); - assert(pid == kid); - if( kid == -1 ) cbl_err(EXIT_FAILURE, "failed waiting for pid %d", pid); - - if( WIFSIGNALED(status) ) { - cbl_errx(EXIT_FAILURE, "%s pid %d terminated by %s", - filter, kid, strsignal(WTERMSIG(status)) ); - } - if( WIFEXITED(status) ) { - if( (status = WEXITSTATUS(status)) != 0 ) { - cbl_errx(EXIT_FAILURE, "%s exited with status %d", - filter, status); - } - } - cbl_warnx( "applied %s", filter ); - } - - return fdopen( output, "r"); -} - -int -cdftext::open_input( const char filename[] ) { - extern int yydebug; - int fd = open(filename, O_RDONLY); - if( fd == -1 ) { - if( yydebug ) cbl_warn( "error: could not open '%s'", filename ); - } - - verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); - - if( verbose_file_reader ) { - cbl_warnx("verbose: opening %s for input", filename); - } - return fd; -} - -int -cdftext::open_output() { - static char stem[PATH_MAX]; - char *name = getenv("GCOBOL_TEMPDIR"); - int fd; - - if( name && 0 != strcmp(name, "/") ) { - sprintf(stem, "%sXXXXXX", name); - if( -1 == (fd = mkstemp(stem)) ) { - cbl_err(EXIT_FAILURE, - "error: could not open temporary file '%s' (%s)", - name, realpath(name, stem)); - } - return fd; - } - - FILE *fh = tmpfile(); - if( !fh ) { - cbl_err(EXIT_FAILURE, "error: could not create temporary file"); - } - - return fileno(fh); -} - -filespan_t -cdftext::map_file( int fd ) { - assert(fd > 0); - - filespan_t mfile; - mfile.use_nada(); - - struct stat sb; - do { - if( 0 != fstat(fd, &sb) ) { - cbl_err( EXIT_FAILURE, "%s: could not stat fd %d", __func__, fd ); - } - if( S_ISFIFO(sb.st_mode) ) { - // Copy FIFO to regular file that can be mapped. - int input = open_output(); - std::swap(fd, input); // fd will continue to be the input - static char block[4096 * 4]; - ssize_t n; - while( (n = read(input, block, sizeof(block))) != 0 ) { - ssize_t nout = write(fd, block, n); - if( nout != n ) { - cbl_err(EXIT_FAILURE, "%s: could not prepare map file from FIFO %d", - __func__, input); - } - if( false ) cbl_warnx("%s: copied %ld bytes from FIFO", - __func__, nout); - } - } - } while( S_ISFIFO(sb.st_mode) ); - - if( sb.st_size > 0 ) { - static const int flags = MAP_PRIVATE; - - void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); - if( p == MAP_FAILED ) { - cbl_err( EXIT_FAILURE, "%s: could not map fd %d", __func__, fd ); - } - - mfile.lineno = 0; - mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p); - mfile.eodata += sb.st_size; - } - - return mfile; -} - -bool lexio_dialect_mf(); - -filespan_t -cdftext::free_form_reference_format( int input ) { - filespan_t source_buffer = map_file(input); - filespan_t mfile(source_buffer); - - /* - * current_line_t describes the segment of mapped file that is the - * "current line" being processed. Its only use is for line - * continuation, whether string literals or not. - */ - struct current_line_t { - size_t lineno; - bytespan_t line; - // construct with length zero - current_line_t( char data[] ) : lineno(0), line(data, data) {} - } current( mfile.data ); - - /* - * If the format is not explicitly set on the command line, test the - * first 6 bytes of the first file to determine the format - * heuristically. If the first 6 characters are only digits or - * blanks, then the file is in fixed format. - */ - - if( indicator.inference_pending() ) { - const char *p = mfile.data; - while( p < mfile.eodata ) { - const char * pend = - std::find(p, const_cast<const char *>(mfile.eodata), '\n'); - if( 6 < pend - p ) break; - p = ++pend; - } - if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; - - if( yy_flex_debug ) { - cbl_warnx("%s:%d: %s format detected", __func__, __LINE__, - indicator.column == 7? "FIXED" : "FREE"); - } - } - - while( mfile.next_line() ) { - check_source_format_directive(mfile); - remove_inline_comment(mfile.cur, mfile.eol); - - if( mfile.is_blank_line() ) continue; - - char *indcol = indicated(mfile.cur, mfile.eol); // true only for fixed - // // format - - if( is_fixed_format() && !indcol ) { // short line - erase_source(mfile.cur, mfile.eol); - } - - if( indcol ) { - // Set to blank columns 1-6 and anything past the right margin. - erase_source(mfile.cur, indcol); - if( is_reference_format() ) { - if( mfile.cur + right_margin() < mfile.eol ) { - auto p = std::find(mfile.cur + right_margin(), mfile.eol, '\n'); - erase_source(mfile.cur + right_margin(), p); - } - } - - switch( toupper(*indcol) ) { - case '-': - assert(0 < current.line.size()); - /* - * The "current line" -- the line being continued -- may be many - * lines earlier (with many intervening newlines) or may intrude - * on its succeeding line. Erase the continuation line. - */ - { - char *pend = mfile.eol; - if( right_margin() ) { - pend = std::min(mfile.cur + right_margin(), mfile.eol); - } - // The appended segment has no newline because the erased line retains - // one. - pend = std::find(indcol + 1, pend, '\n'); - char *p = current.line.append(indcol + 1, pend ); - if( (p = std::max(p, mfile.cur)) < mfile.eol ) { - erase_source(p, mfile.eol); - } - } - continue; - case SPACE: - break; - case 'D': - /* - * Pass the D to the lexer, because WITH DEBUGGING MODE is - * parsed in the parser. This assumes too strict a rule: that - * all the source is in one format. In fact, DEBUGGING MODE - * could be set on, and >>SOURCE-FORMAT can switch back and - * forth. To solve that, we'd have to parse WITH DEBUGGING MODE - * in free_form_reference_format(), which is a lot of work for - * an obsolete feature. - */ - break; - case '*': - case '/': - if( indcol < mfile.eol - 1 ) { - erase_source(indcol, mfile.eol); - } - continue; - case '$': - if( lexio_dialect_mf() ) { - break; - } - __attribute__ ((fallthrough)); - default: // flag other characters in indicator area - if( ! isspace(indcol[0]) ) { - yyerrorvl( mfile.lineno, cobol_filename(), - "error: stray indicator '%c' (0x%0x): \"%.*s\"", - indcol[0], indcol[0], - int(mfile.line_length() - 1), mfile.cur ); - *indcol = SPACE; - } - break; - } - } - current.line.update(mfile.cur, mfile.eol, right_margin()); - current.lineno = mfile.lineno; - } // next line - - return source_buffer; -} - -const char * cobol_filename_restore(); - -#include <iostream> -#include <iterator> -#include <ext/stdio_filebuf.h> - -/* - * process_file is a recursive routine that opens and processes - * included files. It uses the input file stack in two ways: to check - * copybook uniqueness, and (via the lexer) to keep track filenames - * and line numbers. - * - * When reading copybook files, the copybook object enforces the rule - * that no copybook may include itself, even indirectly. It does that - * by relying on the unique_stack to deny a push. Because the reader - * makes no attempt to count lines, line numbers in the input stack - * are all 1 at this point. - * - * When returning from the top-level recursion, the input stack has - * the original file's name on top, with depth 1. At that point, the - * lexer begins tokenizing the input. - * - * The input stream sent to the lexer is delimited by #FILE tokens - * denoting the source filename. As far as the lexer is concerned, - * there's only ever one file: the name passed to lex_open() when we - * kicked things off. But messages and the debugger need to know - * which file and line each statment appeared in. - * - * The lexer uses the input stack to keep track of names and - * numbers. The top of the input file stack is the current file - * context, initially set to line 1. When the lexer sees a push, it - * updates the top-of-stack with the current line number, yylineno, - * and then pushes the copybook filename with line 1. When it sees a - * pop, the current file is popped, of course; its line number no - * longer matters. Then the top-of-stack is used to update the current - * cobol filename and yylineno. - */ -void -cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { - static size_t nfiles = 0; - std::list<replace_t> replacements; - - __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out); - std::ostream out(&outbuf); - std::ostream_iterator<char> ofs(out); - - // indicate current file - static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; - - if( !second_pass && nfiles++ ) { - static const char delimiter[] = "\f"; - const char *filename = cobol_filename(); - std::copy(file_push, file_push + strlen(file_push), ofs); - std::copy(filename, filename + strlen(filename), ofs); - std::copy(delimiter, delimiter + strlen(delimiter), ofs); - out.flush(); - } - - // pa§rse CDF directives - while( mfile.next_line() ) { - auto copied = parse_copy_directive(mfile); - if( copied.parsed && copied.fd != -1 ) { - assert(copied.erased_lines.p); - std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs, - []( char ch ) { return ch == '\n'; } ); - struct { int in, out; filespan_t mfile; } copy; - if( yy_flex_debug ) - cbl_warnx("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__, - mfile.lineno, - copybook.current()->source, copybook.current()->fd); - copy.in = copybook.current()->fd; - copy.mfile = free_form_reference_format( copy.in ); - - if( copied.partial_line.size() ) { - std::copy(copied.partial_line.p, copied.partial_line.pend, ofs); - } - out.flush(); - - if( copied.nreplace == 0 ) { - // process with extant REPLACE directive - process_file(copy.mfile, output); - } else { - copy.out = open_output(); - // process to intermediate, applying COPY ... REPLACING - process_file(copy.mfile, copy.out); - copy.mfile = map_file(copy.out); - replace_directives.pop(); - // process intermediate with extant REPLACE directive - process_file(copy.mfile, output, true); - // COPY statement is erased from input if processed successfully - } - cobol_filename_restore(); - } - - auto erased = parse_replace_directive(mfile, mfile.lineno); - if( erased.p ) { - std::copy_if( erased.p, erased.pend, ofs, - []( char ch ) { return ch == '\n'; } ); - } - if( replace_directives.empty() ) { - std::copy(mfile.cur, mfile.eol, ofs); - continue; // No active REPLACE directive. - } - - std::list<span_t> segments = segment_line(mfile); // no replace yields - // // 1 segment - - for( const auto& segment : segments ) { - std::copy(segment.p, segment.pend, ofs); - } - - if( segments.size() == 2 ) { - struct { - size_t before, after; - int delta() const { return before - after; } } nlines; - nlines.before = std::count(segments.front().p, - segments.front().pend, '\n'); - nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); - if( nlines.delta() < 0 ) { - cbl_warnx("warning: line %zu: REPLACED %zu lines with %zu lines, " - "line count off by %d", mfile.lineno, - nlines.before, nlines.after, nlines.delta()); - } - int nnl = nlines.delta(); - while( nnl-- > 0 ) { - static const char nl[] = "\n"; - std::copy(nl, nl + 1, ofs); - } - } - out.flush(); - } - // end of file - if( !second_pass ) { - std::copy(file_pop, file_pop + strlen(file_pop), ofs); - out.flush(); - } -} - -std::list<span_t> -cdftext::segment_line( filespan_t& mfile ) { - std::list<span_t> output; - - assert( ! replace_directives.empty() ); - std::list<replace_t> pending; - recognize_replacements( mfile, pending ); - - if( pending.empty() ) { - output.push_back( span_t(mfile.cur, mfile.eol) ); - return output; - } - - for( const replace_t& segment : pending ) { - assert(mfile.cur <= segment.before.p); - assert(segment.before.pend <= mfile.eodata); - - output.push_back( span_t(mfile.cur, segment.before.p) ); - output.push_back( span_t(segment.after.p, segment.after.pend ) ); - - mfile.cur = const_cast<char*>(segment.before.pend); - } - - if( mfile.eol < mfile.cur ) { - if( (mfile.eol = std::find(mfile.cur, mfile.eodata, '\n')) < mfile.eodata ) { - mfile.eol++; - } - } - - // last segment takes to EOL - output.push_back( span_t(mfile.cur, mfile.eol) ); - - return output; -} - diff --git a/gcc/cobol/cobol-system.h b/gcc/cobol/cobol-system.h index edbdd43489c70677787c2ef34eb1a322181b7b64..956d0187a5c3464c36fec307e8f218156566e0a3 100644 --- a/gcc/cobol/cobol-system.h +++ b/gcc/cobol/cobol-system.h @@ -47,10 +47,11 @@ #define INCLUDE_LIST #define INCLUDE_ALGORITHM #define INCLUDE_NUMERIC -#define INCLUDE_STACK -//#include <cctype> +#include <iterator> +#include <stack> #include <deque> + #include <unordered_map> #include <unordered_set> diff --git a/gcc/cobol/failures/playpen/copy1.CPY b/gcc/cobol/failures/playpen/copy1.CPY deleted file mode 100644 index a858612aaca54f277799f831e08bc6a4e776173f..0000000000000000000000000000000000000000 --- a/gcc/cobol/failures/playpen/copy1.CPY +++ /dev/null @@ -1,3 +0,0 @@ - - COPY copy2. - 01 TEST-VAR PIC X(2) VALUE "V1". diff --git a/gcc/cobol/failures/playpen/copy2.CPY b/gcc/cobol/failures/playpen/copy2.CPY deleted file mode 100644 index 0c1fa648d52016c165eeb033815292bb3454ef8e..0000000000000000000000000000000000000000 --- a/gcc/cobol/failures/playpen/copy2.CPY +++ /dev/null @@ -1,3 +0,0 @@ - - 01 TEST-VAR2 PIC X(2) VALUE "V2". - COPY copy3. diff --git a/gcc/cobol/failures/playpen/copy3.CPY b/gcc/cobol/failures/playpen/copy3.CPY deleted file mode 100644 index b94d37d247d34e38eefdb5367a95e7fae8b01038..0000000000000000000000000000000000000000 --- a/gcc/cobol/failures/playpen/copy3.CPY +++ /dev/null @@ -1,2 +0,0 @@ - COPY "copy1.CPY". - 01 TEST-VAR3 PIC X(2) VALUE "V3". diff --git a/gcc/cobol/lexio.cc b/gcc/cobol/lexio.cc index f7725fb4800a857ef03ee3bbb09628583f525649..6ea1cea64ef7912cf6071f6f2a84c16ffd6a09a8 100644 --- a/gcc/cobol/lexio.cc +++ b/gcc/cobol/lexio.cc @@ -29,19 +29,10 @@ */ #include "lexio.h" - -#include <iterator> -#include <list> -#include <map> -#include <numeric> -#include <string> - -#include <fcntl.h> -#include <stdint.h> -#include <unistd.h> - -#include <sys/stat.h> -#include <sys/types.h> +#include <ext/stdio_filebuf.h> +#include "cobol-system.h" +#include "copybook.h" +#include "cbldiag.h" extern int yy_flex_debug; @@ -102,15 +93,10 @@ bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); } static bool nonblank( const char ch ) { return !isblank(ch); } -#include "util.h" -#include "copybook-old.h" - -#include <string> - static inline char * start_of_line( char *bol, char *eol ) { bol = std::find_if(bol, eol, nonblank); - assert(bol < eol); // must exist + gcc_assert(bol < eol); // must exist return bol; } @@ -129,7 +115,7 @@ indicated( char *bol, char *eol, char ch = '\0' ) { if( indicator.column == 0 && *bol != '*' ) { return NULL; // no indicator column in free format, except for comments } - assert(bol != NULL); + gcc_assert(bol != NULL); auto ind = bol + left_margin(); if( eol <= ind ) return NULL; // left margin would be after end of line // If TAB is in the line-number region, nothing is in the indicator column. @@ -189,7 +175,7 @@ std::stack< std::list<replace_t> > replace_directives; static bool is_word_or_quote( char ch ) { - return ch == '"' || ch == '\'' || isalnum(ch); + return ch == '"' || ch == '\'' || ISALNUM(ch); } /* * If the replacement is not leading/trailing, the edges of the @@ -200,7 +186,7 @@ static void maybe_add_space(const span_t& pattern, replace_t& recognized) { static const char blank[] = " "; const char *befter[2] = { "", "" }; - assert(0 < recognized.before.size()); + gcc_assert(0 < recognized.before.size()); // start of pattern and end of preceding text if( pattern.p[0] == '.' && is_word_or_quote(recognized.before.p[-1]) ) { @@ -275,7 +261,7 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem span_t found(mfile.eodata, mfile.eodata); if( regex_search( mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { - assert(cm[1].matched); + gcc_assert(cm[1].matched); found = span_t( cm[1].first, cm[1].second ); if( yy_flex_debug ) { size_t n = count_newlines(mfile.data, found.p); @@ -293,8 +279,8 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem } } - assert(!futures.empty()); - assert(futures.size() == replace_directives.top().size()); + gcc_assert(!futures.empty()); + gcc_assert(futures.size() == replace_directives.top().size()); replace_t recognized; @@ -325,7 +311,7 @@ recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacem regex re(next.directive.before.p, extended_icase); if( regex_search(bol, (const char *)mfile.eodata, cm, re) ) { - assert(cm[1].matched); + gcc_assert(cm[1].matched); next.found = span_t( cm[1].first, cm[1].second ); size_t n = std::count((const char *)mfile.data, next.found.p, '\n'); if( false ) @@ -352,7 +338,7 @@ check_source_format_directive( filespan_t& mfile ) { // show contents of marked subexpressions within each match cmatch cm; if( regex_search(p, (const char *)mfile.eol, cm, re) ) { - assert(cm.size() > 1); + gcc_assert(cm.size() > 1); switch( cm[3].length() ) { case 4: cobol_set_indicator_column(0); @@ -361,7 +347,7 @@ check_source_format_directive( filespan_t& mfile ) { cobol_set_indicator_column(-7); break; default: - assert(cm[3].length() == 4 || cm[3].length() == 5); + gcc_assert(cm[3].length() == 4 || cm[3].length() == 5); break; } mfile.cur = const_cast<char*>(cm[0].second); @@ -385,10 +371,10 @@ struct buffer_t : public bytespan_t { } size_t nline() const { - assert(data <= pos); + gcc_assert(data <= pos); return std::count(data, pos, '\n'); } - size_t free_space() const { assert(pos <= eodata); return eodata - pos; } + size_t free_space() const { gcc_assert(pos <= eodata); return eodata - pos; } bool pad_lines( size_t goal ) { while( nline() < goal ) { @@ -400,7 +386,7 @@ struct buffer_t : public bytespan_t { void show() const { char *output; - assert(data <= pos); + gcc_assert(data <= pos); if( -1 == asprintf( &output, "%.*s", int(pos - data), data ) ) { return; } @@ -418,7 +404,7 @@ valid_sequence_area( const char *p, const char *eodata ) { if ( eodata < pend ) return false; for( ; p < pend; p++ ) { - if( ! (isdigit(*p) || *p == SPACE) ) { + if( ! (ISDIGIT(*p) || *p == SPACE) ) { return false; } } @@ -443,8 +429,8 @@ is_word_char( char ch ) { static bool is_numeric_char( char ch ) { - return isdigit(ch) - || toupper(ch) == 'E' + return ISDIGIT(ch) + || TOUPPER(ch) == 'E' || ch == '.' || ch == ',' ; @@ -452,7 +438,7 @@ is_numeric_char( char ch ) { static bool is_numeric_term( span_t term ) { - assert(term.p); + gcc_assert(term.p); if( term.p[0] == '+' || term.p[0] == '-' ) term.p++; auto p = std::find_if( term.p, term.pend, []( char ch ) { @@ -472,7 +458,7 @@ struct replacing_term_t { static replacing_term_t parse_replacing_term( const char *stmt, const char *estmt ) { - assert(stmt); assert(estmt); assert(stmt < estmt); + gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt < estmt); replacing_term_t output(stmt); static const char pattern[] = @@ -492,7 +478,7 @@ parse_replacing_term( const char *stmt, const char *estmt ) { if( ! regex_search( stmt, estmt, cm, re) ) return output; - bool replacing_term = cm[2].matched && toupper(cm[2].first[0]) == 'B'; + bool replacing_term = cm[2].matched && TOUPPER(cm[2].first[0]) == 'B'; if( cm[2].matched && ! replacing_term ) { output.leading_trailing = cm[2]; @@ -509,10 +495,10 @@ parse_replacing_term( const char *stmt, const char *estmt ) { output.stmt = span_t(cm[0].first, output.term.pend); if( cm[10].matched ) output.stmt.pend = cm[10].second; - if( cm[15].matched && isspace(cm[15].second[0]) ) { // matched end of statement + if( cm[15].matched && ISSPACE(cm[15].second[0]) ) { // matched end of statement output.done = output.matched = true; output.stmt = cm[0]; - assert(output.stmt.pend[-1] == '.'); + gcc_assert(output.stmt.pend[-1] == '.'); if( yy_flex_debug ) cbl_warnx("%s:%d: done at '%.*s'", __func__, __LINE__, output.term.size(), output.term.p); @@ -521,14 +507,14 @@ parse_replacing_term( const char *stmt, const char *estmt ) { if( is_numeric_term(output.term) ) { output.matched = output.stmt.p < output.term.p; - assert(output.matched); + gcc_assert(output.matched); // look for fractional part - if( is_numeric_char(*output.term.pend) && isdigit(output.term.pend[1]) ) { - assert(!isdigit(*output.term.pend)); + if( is_numeric_char(*output.term.pend) && ISDIGIT(output.term.pend[1]) ) { + gcc_assert(!ISDIGIT(*output.term.pend)); auto p = std::find_if(++output.term.pend, estmt, - []( char ch ) { return !isdigit(ch); } ); + []( char ch ) { return !ISDIGIT(ch); } ); output.stmt.pend = output.term.pend = p; - output.done = '.' == output.stmt.pend[0] && isspace(output.stmt.pend[1]); + output.done = '.' == output.stmt.pend[0] && ISSPACE(output.stmt.pend[1]); if( output.done ) output.stmt.pend++; } if( yy_flex_debug ) @@ -551,18 +537,18 @@ parse_replacing_term( const char *stmt, const char *estmt ) { if( !cm[8].matched ) { output.matched = output.stmt.p < output.term.p; - assert(output.matched); + gcc_assert(output.matched); if( yy_flex_debug ) cbl_warnx("%s:%d: term is '%.*s'", __func__, __LINE__, output.term.size(), output.term.p); return output; } - bool extraneous_replacing = 'R' == toupper(cm[8].first[0]); // maybe + bool extraneous_replacing = 'R' == TOUPPER(cm[8].first[0]); // maybe if( extraneous_replacing ) { // prove it static const char replacing[] = "REPLACING"; for( size_t i=0; i < strlen(replacing); i++ ) { - if( replacing[i] != toupper(cm[8].first[i]) ) { + if( replacing[i] != TOUPPER(cm[8].first[i]) ) { extraneous_replacing = false; break; } @@ -574,8 +560,8 @@ parse_replacing_term( const char *stmt, const char *estmt ) { } } - assert(cm[8].matched); - assert(0 < output.term.size()); + gcc_assert(cm[8].matched); + gcc_assert(0 < output.term.size()); if( yy_flex_debug ) cbl_warnx("%s:%d: more words starting at '%.80s'", __func__, __LINE__, @@ -604,7 +590,7 @@ parse_replacing_term( const char *stmt, const char *estmt ) { const csub_match& done(cm[8]); if( done.matched ) { output.done = output.matched = output.stmt.p < output.term.p; - assert(output.done); + gcc_assert(output.done); goto matched; } } @@ -681,7 +667,7 @@ parse_replacing_pair( const char *stmt, const char *estmt ) { } } else { for( auto p = stmt; (p = std::find(p, estmt, '.')) < estmt; p++ ) { - if( isspace(p[1]) ) { + if( ISSPACE(p[1]) ) { pair.stmt = span_t(stmt, ++p); break; } @@ -740,8 +726,8 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { cm[i].matched? cm[i].first : ""); } } - assert(cm[3].matched); - assert(cm[5].matched); + gcc_assert(cm[3].matched); + gcc_assert(cm[5].matched); parsed.leading_trailing = cm[2]; parsed.replace.before = cm[3]; parsed.replace.after = cm[5]; @@ -749,7 +735,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { parsed.stmt = cm[0]; // If not done, exclude trailing portion from statement match. if( !parsed.done() && cm[8].matched ) { - assert(!cm[9].matched); + gcc_assert(!cm[9].matched); parsed.stmt.pend = cm[8].first; } } @@ -758,14 +744,14 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { span_t& after(parsed.replace.after); const char *befter[2] = { nonword_ch, nonword_ch }; - assert(before.p < before.pend); + gcc_assert(before.p < before.pend); if( !is_word_char(before.p[0]) ) befter[0] = any_ch; if( !is_word_char(before.pend[-1]) ) befter[1] = any_ch; const char *src = esc(before.size(), before.p); if( parsed.leading_trailing.size() > 0 ) { - switch( toupper(parsed.leading_trailing.p[0]) ) { + switch( TOUPPER(parsed.leading_trailing.p[0]) ) { case 'L': // leading befter[1] = word_ch; break; @@ -773,7 +759,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { befter[0] = word_ch; break; default: - assert(false); + gcc_assert(false); } cbl_warnx("%s:%d: dealing with %.*s", __func__, __LINE__, int(parsed.leading_trailing.size()), parsed.leading_trailing.p); @@ -789,7 +775,7 @@ parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) { output.before = span_t(strlen(src), src); output.after = after.dup(); - assert(!before.has_nul()); + gcc_assert(!before.has_nul()); pairs.push_back( replace_t( output.before, output.after ) ); // COPY REPLACING matches end-of-statment here @@ -875,7 +861,7 @@ parse_copy_directive( filespan_t& mfile ) { if( mfile.cur <= copy_stmt.p && copy_stmt.p < mfile.eol ) { outcome.parsed = regex_search(copy_stmt.p, copy_stmt.pend, cm, re); - assert(outcome.parsed); + gcc_assert(outcome.parsed); outcome.partial_line = span_t(mfile.cur, copy_stmt.p); if( yy_flex_debug ) { @@ -895,9 +881,9 @@ parse_copy_directive( filespan_t& mfile ) { bool replacing = !cm[20].matched; if( library_name.matched ) { - copybook.library( strndup(library_name.first, library_name.length()) ); + copybook.library( xstrndup(library_name.first, library_name.length()) ); } - outcome.fd = copybook.open( strndup(copybook_name.first, copybook_name.length()) ); + outcome.fd = copybook.open( xstrndup(copybook_name.first, copybook_name.length()) ); if( outcome.fd == -1 ) { // let parser report missing copybook if( yy_flex_debug ) cbl_warnx("%s: copybook '%s' not found", __func__, copybook.current()->source); @@ -946,12 +932,12 @@ parse_replace_last_off( filespan_t& mfile ) { // REPLACE [LAST] OFF? bool found = regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re); - assert(found); // caller ensures + gcc_assert(found); // caller ensures - assert(cm.size() == 2); + gcc_assert(cm.size() == 2); // LAST OFF removes most recent REPLACE if( cm[1].matched ) { - assert(toupper(cm[1].first[0]) == 'L'); + gcc_assert(TOUPPER(cm[1].first[0]) == 'L'); if( ! replace_directives.empty() ) { replace_directives.pop(); } @@ -992,7 +978,7 @@ parse_replace_text( filespan_t& mfile, size_t current_lineno ) { if( false && yy_flex_debug ) { auto pend = mfile.eol; - assert(mfile.line_length() > 2); + gcc_assert(mfile.line_length() > 2); if( pend[-1] == '\n' ) pend -= 2; auto len = int(pend - mfile.cur); cbl_warnx("%s:%d: line %zu: parsing '%.*s", __func__, __LINE__, @@ -1018,7 +1004,7 @@ parse_replace_text( filespan_t& mfile, size_t current_lineno ) { } } - assert(cm.size() > 7); + gcc_assert(cm.size() > 7); // Update active REPLACE stack if( ! cm[1].matched ) { // ALSO pushes, else clear stack and push one. @@ -1026,7 +1012,7 @@ parse_replace_text( filespan_t& mfile, size_t current_lineno ) { replace_directives.pop(); } } else { - assert(toupper(cm[1].first[0]) == 'A'); + gcc_assert(TOUPPER(cm[1].first[0]) == 'A'); } span_t replace_stmt(cm[0].first, cm[0].second); @@ -1070,12 +1056,12 @@ parse_replace_directive( filespan_t& mfile, size_t current_lineno ) { if( regex_search(mfile.ccur(), (const char *)mfile.eodata, cm, re) ) { - assert(cm[1].matched); + gcc_assert(cm[1].matched); next_directive = cm[0].first; - switch( toupper(cm[1].first[0]) ) { + switch( TOUPPER(cm[1].first[0]) ) { case 'L': - off_coming_up = 'A' == toupper(cm[1].first[1]); // LAST OFF, else LEADING + off_coming_up = 'A' == TOUPPER(cm[1].first[1]); // LAST OFF, else LEADING break; case 'O': // OFF off_coming_up = true; @@ -1084,7 +1070,7 @@ parse_replace_directive( filespan_t& mfile, size_t current_lineno ) { off_coming_up = false; break; default: - assert(false); + gcc_assert(false); } } } @@ -1112,7 +1098,7 @@ parse_replace_directive( filespan_t& mfile, size_t current_lineno ) { */ char * bytespan_t::append( const char *input, const char *eoinput ) { - assert(data < eodata); + gcc_assert(data < eodata); #define LEXIO 0 #if LEXIO @@ -1122,19 +1108,19 @@ bytespan_t::append( const char *input, const char *eoinput ) { int(size()), data, nq, in_string()? "in string" : "not in string"); #endif if( !in_string() ) { // Remove trailing space unless it's part of a literal. - while(data < eodata && isspace(eodata[-1])) eodata--; - assert(isspace(eodata[0])); - assert(data == eodata || !isspace(eodata[-1])); + while(data < eodata && ISSPACE(eodata[-1])) eodata--; + gcc_assert(ISSPACE(eodata[0])); + gcc_assert(data == eodata || !ISSPACE(eodata[-1])); } // skip leading blanks - while( input < eoinput && isspace(*input) ) input++; + while( input < eoinput && ISSPACE(*input) ) input++; if( isquote(*input) ) input++; size_t len = eoinput - input; char * pend = eodata + len; int nnl = std::count(eodata, pend, '\n'); // newlines to be overwritten - assert(0 == std::count(input, eoinput, '\n')); // newlines in input + gcc_assert(0 == std::count(input, eoinput, '\n')); // newlines in input memmove(eodata, input, len); nnl += std::count(pend, pend + nnl, '\n'); // other newlines to be overwritten @@ -1156,7 +1142,7 @@ mapped_file( FILE *input ) { static std::map<int, filespan_t> inputs; int fd = fileno(input); - assert(fd > 0); + gcc_assert(fd > 0); filespan_t& mfile = inputs[fd]; if( mfile.data ) { @@ -1235,7 +1221,7 @@ lexer_input( char buf[], int max_size, FILE *input ) { return 0; } - assert( mfile.data <= mfile.cur && mfile.cur < mfile.eodata ); + gcc_assert( mfile.data <= mfile.cur && mfile.cur < mfile.eodata ); char *next = std::min(mfile.eodata, mfile.cur + max_size); buffer_t output(buf, buf + max_size); // initializes pos @@ -1258,11 +1244,11 @@ lexer_input( char buf[], int max_size, FILE *input ) { at_bol = *p == '\n'; } - assert( output.pos <= output.eodata ); + gcc_assert( output.pos <= output.eodata ); output.eodata = output.pos; mfile.cur = next; - assert(mfile.cur <= mfile.eodata); + gcc_assert(mfile.cur <= mfile.eodata); // Buffer full or input exhausted. print_lexer_input(output.data, output.eodata); @@ -1270,6 +1256,576 @@ lexer_input( char buf[], int max_size, FILE *input ) { return output.size(); } -#include "util.h" -#include "cdf_text.h" +// The following code was originally in a cdf_text.h file. But the only place +// it was accessed was right here. So I copied it here, and eliminated the +// .h file. RJD. + +// #include "cdf_text.h" + +static const char * +find_filter( const char filter[] ) { + + if( 0 == access(filter, X_OK) ) { + return filter; + } + + const char *path = getenv("PATH"); + if( ! path ) return NULL; + char *p = xstrdup(path), *eopath = p + strlen(p); + + while( *p != '\0' ) { + auto pend = std::find( p, eopath, ':' ); + if( *pend == ':' ) *pend++ = '\0'; + + static char name[PATH_MAX]; + + snprintf( name, sizeof(name), "%s/%s", p, filter ); + + if( 0 == access(name, X_OK) ) { + return name; + } + p = pend; + } + return NULL; +} + +bool verbose_file_reader = false; + +typedef std::pair <char *, std::list<std::string> > preprocessor_filter_t; +static std::list<preprocessor_filter_t> preprocessor_filters; +static std::list<const char *> included_files; + +bool +include_file_add(const char filename[]) { + struct stat sb; + if( -1 == stat(filename, &sb) ) return false; + included_files.push_back(filename); + return true; +} + +bool +preprocess_filter_add( const char input[] ) { + char filter[ strlen(input) + 1 ]; + strcpy(filter, input); + char *optstr = strchr(filter, ','); + std::list <std::string> options; + + if( optstr ) { + for( char *opt = optstr + 1; (opt = strtok(opt, ",")); opt = NULL ) { + options.push_back(opt); + } + *optstr = '\0'; + } + + auto filename = find_filter(filter); + if( !filename ) { + cbl_warnx("error: preprocessor '%s/%s' not found", + getcwd(NULL, 0), filter); + return false; + } + preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) ); + return true; +} + +void +cdftext::echo_input( int input, const char filename[] ) { + int fd; + if( -1 == (fd = dup(input)) ) { + cbl_warn( "could not open preprocessed file %s to echo to standard output", + filename ); + return; + } + + auto mfile = map_file(fd); + + if( -1 == write(STDOUT_FILENO, mfile.data, mfile.size()) ) { + cbl_warn( "could not write preprocessed file %s to standard output", + filename ); + } + if( -1 == munmap(mfile.data, mfile.size()) ) { + cbl_warn( "could not release mapped file" ); + } + if( -1 == close(fd) ) { + cbl_warn( "could not close mapped file" ); + } +} + +static inline ino_t +inode_of( int fd ) { + struct stat sb; + if( -1 == fstat(fd, &sb) ) { + cbl_err(EXIT_FAILURE, "could not stat fd %d", fd); + } + return sb.st_ino; +} + +FILE * +cdftext::lex_open( const char filename[] ) { + int input = open_input( filename ); + if( input == -1 ) return NULL; + + int output = open_output(); + + for( auto name : included_files ) { + int input; + if( -1 == (input = open(name, O_RDONLY)) ) { + yyerrorvl(1, "", "cannot open -include file %s", name); + continue; + } + cobol_filename(name, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + } + + cobol_filename(filename, inode_of(input)); + filespan_t mfile( free_form_reference_format( input ) ); + + process_file( mfile, output ); + + if( lexer_echo() ) { + echo_input(output, filename); + } + + for( auto filter_pair : preprocessor_filters ) { + input = output; + output = open_output(); + + char *filter = filter_pair.first; + std::list<std::string>& options = filter_pair.second; + + char * argv[2 + options.size()] = { filter }; + + auto last_argv = std::transform( options.begin(), options.end(), argv + 1, + []( std::string& opt ) { + return xstrdup(opt.c_str()); + } ); + *last_argv = NULL; + + pid_t pid = fork(); + + switch(pid){ + case -1: cbl_err(EXIT_FAILURE, "%s", __func__); + break; + case 0: // child + if( -1 == dup2(input, STDIN_FILENO) ) { + cbl_err(EXIT_FAILURE, "%s: could not dup input", __func__); + } + if( -1 == dup2(output, STDOUT_FILENO) ) { + cbl_err(EXIT_FAILURE, "%s: could not dup output", __func__); + } + if( -1 == lseek(STDIN_FILENO, SEEK_SET, 0) ) { + cbl_err(EXIT_FAILURE, "%s: could not seek to start of file", __func__); + } + int erc; + if( -1 == (erc = execv(filter, argv)) ) { + cbl_warn("could not execute %s", filter); + } + _exit(erc); + } + int status; + auto kid = wait(&status); + gcc_assert(pid == kid); + if( kid == -1 ) cbl_err(EXIT_FAILURE, "failed waiting for pid %d", pid); + + if( WIFSIGNALED(status) ) { + cbl_errx(EXIT_FAILURE, "%s pid %d terminated by %s", + filter, kid, strsignal(WTERMSIG(status)) ); + } + if( WIFEXITED(status) ) { + if( (status = WEXITSTATUS(status)) != 0 ) { + cbl_errx(EXIT_FAILURE, "%s exited with status %d", + filter, status); + } + } + cbl_warnx( "applied %s", filter ); + } + + return fdopen( output, "r"); +} + +int +cdftext::open_input( const char filename[] ) { + extern int yydebug; + int fd = open(filename, O_RDONLY); + if( fd == -1 ) { + if( yydebug ) cbl_warn( "error: could not open '%s'", filename ); + } + + verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR"); + + if( verbose_file_reader ) { + cbl_warnx("verbose: opening %s for input", filename); + } + return fd; +} + +int +cdftext::open_output() { + static char stem[PATH_MAX]; + char *name = getenv("GCOBOL_TEMPDIR"); + int fd; + + if( name && 0 != strcmp(name, "/") ) { + sprintf(stem, "%sXXXXXX", name); + if( -1 == (fd = mkstemp(stem)) ) { + cbl_err(EXIT_FAILURE, + "error: could not open temporary file '%s' (%s)", + name, realpath(name, stem)); + } + return fd; + } + + FILE *fh = tmpfile(); + if( !fh ) { + cbl_err(EXIT_FAILURE, "error: could not create temporary file"); + } + + return fileno(fh); +} + +filespan_t +cdftext::map_file( int fd ) { + gcc_assert(fd > 0); + + filespan_t mfile; + mfile.use_nada(); + + struct stat sb; + do { + if( 0 != fstat(fd, &sb) ) { + cbl_err( EXIT_FAILURE, "%s: could not stat fd %d", __func__, fd ); + } + if( S_ISFIFO(sb.st_mode) ) { + // Copy FIFO to regular file that can be mapped. + int input = open_output(); + std::swap(fd, input); // fd will continue to be the input + static char block[4096 * 4]; + ssize_t n; + while( (n = read(input, block, sizeof(block))) != 0 ) { + ssize_t nout = write(fd, block, n); + if( nout != n ) { + cbl_err(EXIT_FAILURE, "%s: could not prepare map file from FIFO %d", + __func__, input); + } + if( false ) cbl_warnx("%s: copied %ld bytes from FIFO", + __func__, nout); + } + } + } while( S_ISFIFO(sb.st_mode) ); + + if( sb.st_size > 0 ) { + static const int flags = MAP_PRIVATE; + + void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0); + if( p == MAP_FAILED ) { + cbl_err( EXIT_FAILURE, "%s: could not map fd %d", __func__, fd ); + } + + mfile.lineno = 0; + mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p); + mfile.eodata += sb.st_size; + } + + return mfile; +} + +bool lexio_dialect_mf(); + +filespan_t +cdftext::free_form_reference_format( int input ) { + filespan_t source_buffer = map_file(input); + filespan_t mfile(source_buffer); + + /* + * current_line_t describes the segment of mapped file that is the + * "current line" being processed. Its only use is for line + * continuation, whether string literals or not. + */ + struct current_line_t { + size_t lineno; + bytespan_t line; + // construct with length zero + current_line_t( char data[] ) : lineno(0), line(data, data) {} + } current( mfile.data ); + + /* + * If the format is not explicitly set on the command line, test the + * first 6 bytes of the first file to determine the format + * heuristically. If the first 6 characters are only digits or + * blanks, then the file is in fixed format. + */ + + if( indicator.inference_pending() ) { + const char *p = mfile.data; + while( p < mfile.eodata ) { + const char * pend = + std::find(p, const_cast<const char *>(mfile.eodata), '\n'); + if( 6 < pend - p ) break; + p = ++pend; + } + if( valid_sequence_area(p, mfile.eodata) ) indicator.column = 7; + + if( yy_flex_debug ) { + cbl_warnx("%s:%d: %s format detected", __func__, __LINE__, + indicator.column == 7? "FIXED" : "FREE"); + } + } + + while( mfile.next_line() ) { + check_source_format_directive(mfile); + remove_inline_comment(mfile.cur, mfile.eol); + + if( mfile.is_blank_line() ) continue; + + char *indcol = indicated(mfile.cur, mfile.eol); // true only for fixed + // // format + + if( is_fixed_format() && !indcol ) { // short line + erase_source(mfile.cur, mfile.eol); + } + + if( indcol ) { + // Set to blank columns 1-6 and anything past the right margin. + erase_source(mfile.cur, indcol); + if( is_reference_format() ) { + if( mfile.cur + right_margin() < mfile.eol ) { + auto p = std::find(mfile.cur + right_margin(), mfile.eol, '\n'); + erase_source(mfile.cur + right_margin(), p); + } + } + + switch( TOUPPER(*indcol) ) { + case '-': + gcc_assert(0 < current.line.size()); + /* + * The "current line" -- the line being continued -- may be many + * lines earlier (with many intervening newlines) or may intrude + * on its succeeding line. Erase the continuation line. + */ + { + char *pend = mfile.eol; + if( right_margin() ) { + pend = std::min(mfile.cur + right_margin(), mfile.eol); + } + // The appended segment has no newline because the erased line retains + // one. + pend = std::find(indcol + 1, pend, '\n'); + char *p = current.line.append(indcol + 1, pend ); + if( (p = std::max(p, mfile.cur)) < mfile.eol ) { + erase_source(p, mfile.eol); + } + } + continue; + case SPACE: + break; + case 'D': + /* + * Pass the D to the lexer, because WITH DEBUGGING MODE is + * parsed in the parser. This assumes too strict a rule: that + * all the source is in one format. In fact, DEBUGGING MODE + * could be set on, and >>SOURCE-FORMAT can switch back and + * forth. To solve that, we'd have to parse WITH DEBUGGING MODE + * in free_form_reference_format(), which is a lot of work for + * an obsolete feature. + */ + break; + case '*': + case '/': + if( indcol < mfile.eol - 1 ) { + erase_source(indcol, mfile.eol); + } + continue; + case '$': + if( lexio_dialect_mf() ) { + break; + } + __attribute__ ((fallthrough)); + default: // flag other characters in indicator area + if( ! ISSPACE(indcol[0]) ) { + yyerrorvl( mfile.lineno, cobol_filename(), + "error: stray indicator '%c' (0x%0x): \"%.*s\"", + indcol[0], indcol[0], + int(mfile.line_length() - 1), mfile.cur ); + *indcol = SPACE; + } + break; + } + } + current.line.update(mfile.cur, mfile.eol, right_margin()); + current.lineno = mfile.lineno; + } // next line + + return source_buffer; +} + +const char * cobol_filename_restore(); + +/* + * process_file is a recursive routine that opens and processes + * included files. It uses the input file stack in two ways: to check + * copybook uniqueness, and (via the lexer) to keep track filenames + * and line numbers. + * + * When reading copybook files, the copybook object enforces the rule + * that no copybook may include itself, even indirectly. It does that + * by relying on the unique_stack to deny a push. Because the reader + * makes no attempt to count lines, line numbers in the input stack + * are all 1 at this point. + * + * When returning from the top-level recursion, the input stack has + * the original file's name on top, with depth 1. At that point, the + * lexer begins tokenizing the input. + * + * The input stream sent to the lexer is delimited by #FILE tokens + * denoting the source filename. As far as the lexer is concerned, + * there's only ever one file: the name passed to lex_open() when we + * kicked things off. But messages and the debugger need to know + * which file and line each statment appeared in. + * + * The lexer uses the input stack to keep track of names and + * numbers. The top of the input file stack is the current file + * context, initially set to line 1. When the lexer sees a push, it + * updates the top-of-stack with the current line number, yylineno, + * and then pushes the copybook filename with line 1. When it sees a + * pop, the current file is popped, of course; its line number no + * longer matters. Then the top-of-stack is used to update the current + * cobol filename and yylineno. + */ +void +cdftext::process_file( filespan_t mfile, int output, bool second_pass ) { + static size_t nfiles = 0; + std::list<replace_t> replacements; + + __gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "w"), std::ios::out); + std::ostream out(&outbuf); + std::ostream_iterator<char> ofs(out); + + // indicate current file + static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f"; + + if( !second_pass && nfiles++ ) { + static const char delimiter[] = "\f"; + const char *filename = cobol_filename(); + std::copy(file_push, file_push + strlen(file_push), ofs); + std::copy(filename, filename + strlen(filename), ofs); + std::copy(delimiter, delimiter + strlen(delimiter), ofs); + out.flush(); + } + + // pa§rse CDF directives + while( mfile.next_line() ) { + auto copied = parse_copy_directive(mfile); + if( copied.parsed && copied.fd != -1 ) { + gcc_assert(copied.erased_lines.p); + std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + struct { int in, out; filespan_t mfile; } copy; + if( yy_flex_debug ) + cbl_warnx("%s:%d: line %zu, opening %s on fd %d", __func__, __LINE__, + mfile.lineno, + copybook.current()->source, copybook.current()->fd); + copy.in = copybook.current()->fd; + copy.mfile = free_form_reference_format( copy.in ); + + if( copied.partial_line.size() ) { + std::copy(copied.partial_line.p, copied.partial_line.pend, ofs); + } + out.flush(); + + if( copied.nreplace == 0 ) { + // process with extant REPLACE directive + process_file(copy.mfile, output); + } else { + copy.out = open_output(); + // process to intermediate, applying COPY ... REPLACING + process_file(copy.mfile, copy.out); + copy.mfile = map_file(copy.out); + replace_directives.pop(); + // process intermediate with extant REPLACE directive + process_file(copy.mfile, output, true); + // COPY statement is erased from input if processed successfully + } + cobol_filename_restore(); + } + + auto erased = parse_replace_directive(mfile, mfile.lineno); + if( erased.p ) { + std::copy_if( erased.p, erased.pend, ofs, + []( char ch ) { return ch == '\n'; } ); + } + if( replace_directives.empty() ) { + std::copy(mfile.cur, mfile.eol, ofs); + continue; // No active REPLACE directive. + } + + std::list<span_t> segments = segment_line(mfile); // no replace yields + // // 1 segment + + for( const auto& segment : segments ) { + std::copy(segment.p, segment.pend, ofs); + } + + if( segments.size() == 2 ) { + struct { + size_t before, after; + int delta() const { return before - after; } } nlines; + nlines.before = std::count(segments.front().p, + segments.front().pend, '\n'); + nlines.after = std::count(segments.back().p, segments.back().pend, '\n'); + if( nlines.delta() < 0 ) { + cbl_warnx("warning: line %zu: REPLACED %zu lines with %zu lines, " + "line count off by %d", mfile.lineno, + nlines.before, nlines.after, nlines.delta()); + } + int nnl = nlines.delta(); + while( nnl-- > 0 ) { + static const char nl[] = "\n"; + std::copy(nl, nl + 1, ofs); + } + } + out.flush(); + } + // end of file + if( !second_pass ) { + std::copy(file_pop, file_pop + strlen(file_pop), ofs); + out.flush(); + } +} + +std::list<span_t> +cdftext::segment_line( filespan_t& mfile ) { + std::list<span_t> output; + + gcc_assert( ! replace_directives.empty() ); + std::list<replace_t> pending; + recognize_replacements( mfile, pending ); + + if( pending.empty() ) { + output.push_back( span_t(mfile.cur, mfile.eol) ); + return output; + } + + for( const replace_t& segment : pending ) { + gcc_assert(mfile.cur <= segment.before.p); + gcc_assert(segment.before.pend <= mfile.eodata); + + output.push_back( span_t(mfile.cur, segment.before.p) ); + output.push_back( span_t(segment.after.p, segment.after.pend ) ); + + mfile.cur = const_cast<char*>(segment.before.pend); + } + + if( mfile.eol < mfile.cur ) { + if( (mfile.eol = std::find(mfile.cur, mfile.eodata, '\n')) < mfile.eodata ) { + mfile.eol++; + } + } + + // last segment takes to EOL + output.push_back( span_t(mfile.cur, mfile.eol) ); + + return output; +} +//////// End of the cdf_text.h file