Skip to content
Snippets Groups Projects
cobol1.cc 21.6 KiB
Newer Older
 * Copyright (c) 2021-2024 Symas Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following disclaimer
 *   in the documentation and/or other materials provided with the
 *   distribution.
 * * Neither the name of the Symas Corporation nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
/* Cobol compiler
   Copyright (C) 2016 Free Software Foundation, Inc.

GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

rdubner's avatar
rdubner committed

rdubner's avatar
rdubner committed
#include "cobol-system.h"
#include "coretypes.h"
#include "tree.h"
#include "diagnostic.h"
#include "opts.h"
#include "debug.h"
#include "langhooks.h"
#include "langhooks-def.h"
#include "stringpool.h"
rdubner's avatar
rdubner committed
#define HOWEVER_GCC_DEFINES_TREE 1
#include "genapi.h"
#include "ec.h"
#include "exceptl.h"
#include "exceptg.h"
rdubner's avatar
rdubner committed
#include "util.h"
/* Required language-dependent contents of a type.  */

struct GTY (()) lang_type
    {
    char dummy;
    };

/* Language-dependent contents of a decl.  */

struct GTY (()) lang_decl
    {
    char dummy;
    };

/*
 * Language-dependent contents of an identifier.
 * This must include a tree_identifier.
 */
struct GTY (()) lang_identifier
    {
    struct tree_identifier common;
    };

/* The resulting tree type.  */

union GTY ((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"),
                chain_next ("CODE_CONTAINS_STRUCT (TREE_CODE (&%h.generic), "
                            "TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN "
                            "(&%h.generic)) : NULL"))) lang_tree_node
    {
    union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic;
    struct lang_identifier GTY ((tag ("1"))) identifier;
    };

/* We don't use language_function.  */

struct GTY (()) language_function
    {
    int dummy;
    };

/*
 *  Language hooks.
 */

static bool
cobol_langhook_init (void)
    {
    build_common_tree_nodes (true);

    void_list_node = build_tree_list (NULL_TREE, void_type_node);

    build_common_builtin_nodes ();

    return true;
    }


void cobol_set_debugging( bool flex, bool yacc, bool parser );
void cobol_set_indicator_column( int column );
void copybook_directory_add( const char gcob_copybook[] );
James K. Lowden's avatar
James K. Lowden committed
void copybook_extension_add( const char ext[] );
bool defined_cmd( const char arg[] );
void lexer_echo( bool tf );

static void
cobol_langhook_init_options_struct (struct gcc_options *opts) {
  opts->x_yy_flex_debug = 0;
  opts->x_yy_debug = 0;
  opts->x_cobol_trace_debug = 0;

  cobol_set_debugging( false, false, false );

  copybook_directory_add( getenv("GCOB_COPYBOOK") );
}

static unsigned int
cobol_option_lang_mask (void) {
  return CL_Cobol;
}

void parser_next_is_main(bool is_main);
bool use_static_call( bool yn );
void add_cobol_exception( ec_type_t type, bool );
bool include_file_add(const char input[]);
bool preprocess_filter_add( const char filter[] );

bool max_errors_exceeded( int nerr ) {
  return flag_max_errors != 0 && flag_max_errors <= nerr;
}

static void
enable_exceptions( bool enable ) {
  for( char * name = xstrdup(cobol_exceptions);
       NULL != (name = strtok(name, ",")); name = NULL ) {
    ec_type_t type = ec_type_of(name);
    if( type == ec_none_e ) {
      yyerror("unrecognized exception '%s' was ignored", name);
    }
    ec_disposition_t disposition = ec_type_disposition(type);
    if( disposition != ec_implemented(disposition) ) {
      yyerror("exception '%s' is not implemented", name);
    add_cobol_exception(type, enable );
static bool
cobol_langhook_handle_option (size_t scode,
                              const char *arg ATTRIBUTE_UNUSED,
                              HOST_WIDE_INT value,
                              int kind ATTRIBUTE_UNUSED,
                              location_t loc ATTRIBUTE_UNUSED,
                              const struct
                              cl_option_handlers *handlers ATTRIBUTE_UNUSED)
    {
    // process_command (decoded_options_count, decoded_options);
    enum opt_code code = (enum opt_code) scode;

    switch(code)
        {
        case OPT_D:
            defined_cmd(arg);
            return true;
        case OPT_E:
            lexer_echo(true);
            return true;

        case OPT_I:
            copybook_directory_add(arg);
            return true;
James K. Lowden's avatar
James K. Lowden committed
        case OPT_copyext:
            copybook_extension_add(cobol_copyext);
            return true;

        case OPT_fstatic_call:
            use_static_call( arg? true : false );
            return true;
James K. Lowden's avatar
James K. Lowden committed

        case OPT_fdefaultbyte:
            wsclear(cobol_default_byte);
James K. Lowden's avatar
James K. Lowden committed
            return true;
            
        case OPT_fflex_debug:
            yy_flex_debug = 1;
            cobol_set_debugging( true, yy_debug == 1, cobol_trace_debug == 1 );
            return true;
        case OPT_fyacc_debug:
            yy_debug = 1;
            cobol_set_debugging(yy_flex_debug == 1,
                                true,
                                cobol_trace_debug == 1 );
            return true;
        case OPT_ftrace_debug:
            cobol_set_debugging( yy_flex_debug == 1, yy_debug == 1, true );
            return true;

        case OPT_fcobol_exceptions: {
            if( cobol_exceptions[0] == '=' ) cobol_exceptions++;
            enable_exceptions(value == 1);
            return true;
        }

        case OPT_fmax_errors:
            flag_max_errors = atoi(arg);
            return true;

        case OPT_ffixed_form:
            cobol_set_indicator_column(-7);
            return true;
        case OPT_ffree_form:
            cobol_set_indicator_column(0);
            return true;

        case OPT_findicator_column:
            cobol_set_indicator_column( indicator_column );
            return true;

        case OPT_dialect:
            cobol_dialect_set(cbl_dialect_t(cobol_dialect));
            return true;
James K. Lowden's avatar
James K. Lowden committed
        case OPT_fsyntax_only:
          mode_syntax_only(identification_div_e);
          break;
        case OPT_preprocess:
          if( ! preprocess_filter_add(arg) ) {
            cbl_errx( "could not execute preprocessor %s", arg);
        case OPT_include:
          if( ! include_file_add(cobol_include) ) {
            cbl_errx( "could not include %s", cobol_include);

        case OPT_main:
            // This isn't right.  All OPT_main should be replaced
            error("We should never see a non-equal dash-main in cobol1.c");
            exit(1);
            return true;

        case OPT_main_:
            register_main_switch(cobol_main_string);
            return true;

rdubner's avatar
rdubner committed
        case OPT_nomain:
            return true;

            cobol_gcobol_feature_set(feature_internal_ebcdic_e);
            return true;

        default:
            break;
        }

    Cobol_handle_option_auto (&global_options, &global_options_set,
                              scode, arg, value,
                              cobol_option_lang_mask (), kind,
                              loc, handlers, global_dc);

    return true;
    }

void
cobol_parse_files (int nfile, const char **files);

static void
cobol_langhook_parse_file (void)
    {
    cobol_parse_files (num_in_fnames, in_fnames);
    }

static tree
cobol_langhook_type_for_mode (enum machine_mode mode, int unsignedp)
    {
    if (mode == TYPE_MODE (float_type_node))
        return float_type_node;

    if (mode == TYPE_MODE (double_type_node))
        return double_type_node;

    if (mode == TYPE_MODE (float32_type_node))
        return float32_type_node;

    if (mode == TYPE_MODE (float64_type_node))
        return float64_type_node;

    if (mode == TYPE_MODE (float128_type_node))
        return float128_type_node;

    if (mode == TYPE_MODE (intQI_type_node))
        return unsignedp ? unsigned_intQI_type_node : intQI_type_node;
    if (mode == TYPE_MODE (intHI_type_node))
        return unsignedp ? unsigned_intHI_type_node : intHI_type_node;
    if (mode == TYPE_MODE (intSI_type_node))
        return unsignedp ? unsigned_intSI_type_node : intSI_type_node;
    if (mode == TYPE_MODE (intDI_type_node))
        return unsignedp ? unsigned_intDI_type_node : intDI_type_node;
    if (mode == TYPE_MODE (intTI_type_node))
        return unsignedp ? unsigned_intTI_type_node : intTI_type_node;

    if (mode == TYPE_MODE (integer_type_node))
        return unsignedp ? unsigned_type_node : integer_type_node;

    if (mode == TYPE_MODE (long_integer_type_node))
        return unsignedp ? long_unsigned_type_node : long_integer_type_node;

    if (mode == TYPE_MODE (long_long_integer_type_node))
        return unsignedp ? long_long_unsigned_type_node
               : long_long_integer_type_node;

    if (COMPLEX_MODE_P (mode))
        {
        if (mode == TYPE_MODE (complex_float_type_node))
            return complex_float_type_node;
        if (mode == TYPE_MODE (complex_double_type_node))
            return complex_double_type_node;
        if (mode == TYPE_MODE (complex_long_double_type_node))
            return complex_long_double_type_node;
        if (mode == TYPE_MODE (complex_integer_type_node) && !unsignedp)
            return complex_integer_type_node;
        }

    /* gcc_unreachable */
    return NULL;
    }

static tree
cobol_langhook_type_for_size (unsigned int bits ATTRIBUTE_UNUSED,
                              int unsignedp ATTRIBUTE_UNUSED)
    {
    gcc_unreachable ();
    return NULL;
    }

/* Record a builtin function.  We just ignore builtin functions.  */

static tree
cobol_langhook_builtin_function (tree decl)
    {
    return decl;
    }

static bool
cobol_langhook_global_bindings_p (void)
    {
    return false;
    }

static tree
cobol_langhook_pushdecl (tree decl ATTRIBUTE_UNUSED)
    {
    gcc_unreachable ();
    }

static tree
cobol_langhook_getdecls (void)
    {
    return NULL;
    }

char *
cobol_name_mangler(const char *cobol_name_)
    {
    // This is a definitive solution to the problem of hyphens.  COBOL
    // names can't start with underscore; GNU assembler names can.
    // Thus, the 12-char name RE-TURN-CODE gets converted to
    // _12_RE_TURN_CODE2_7  The 2 and 7 are the offsets to the underscore
    // characters that get turned back into hyphens to reconstruct the original

    // Names without hyphens just get converted to lowercase; no prefix or postfix.

    // We need to build that string, which means we need room to build it in.

    // The length N is strlen(cobol_name_).  The worst-case valid cobol
    // symbol can be a character followed by (N-1) hyphens. That would turn
    // into a string about this number of characters
    // _123_    5 characters.  Room for names with three digits of characters
    // N                       The actual label
    // Each hyphen gets "xxx_ tacked on.  This is an overestimate, but it's not
    // worth trying to get it exactly right.

    char *cobol_name = xstrdup(cobol_name_);

    size_t N = strlen(cobol_name);
    size_t NLENGTH = 5 + N + (N-1)*4;

    // Convert the cobol_name_ to lowercase
    char *d = cobol_name;
    const char *s = cobol_name_;
    bool has_dash = false;

    if( cobol_name_[0] >= '0' && cobol_name_[0] <= '9' )
        {
        // The name starts with 0-9, so we are going to lead it
        // with an underscore
        has_dash = true;
        }

    while(*s)
        {
        int ch = *s++;
        if( ch == '-' )
            {
            // There is an embedded hyphen, so we are going to lead it
            // with an underscore
            has_dash = true;
            }
        *d++ = TOLOWER(ch);
        }
    *d++ = '\0';

    int nlength = strlen(cobol_name);
    char *psz = (char *)xmalloc(NLENGTH);
    char achsuffix[12];
    int offset;
    if( has_dash )
        {
        // There is at least one hyphen:
        // Put the length of cobol_string at the front:
        offset = sprintf(psz,"_%zd_",strlen(cobol_name));

        // tack on the actual cobol_name
        strcat(psz,cobol_name);

        // walk the name, looking for hyphens:
        int more_than_one = 0;
        for(int i=0; i<nlength; i++)
            {
            if( psz[offset + i] == '-' )
                {
                // Convert the hyphen to an underscore
                psz[offset + i] = '_';

                // Append the index to the underscore:
                sprintf(achsuffix,"%s%d",(more_than_one++ ? "_" : ""),i);
                strcat(psz, achsuffix);
                }
            }
        }
    else
        {
        strcpy(psz, cobol_name);
        }
    free(cobol_name);

    if( getenv("SHOW_MANGLED") )
      {
      fprintf(stderr, "mangled %32s to %-32s in %s\n", cobol_name_, psz, __func__);
      }

    for(size_t i=0; i<strlen(psz); i++)
      {
      if( psz[i] & 0x80 )
        {
        fprintf(stderr, "non-ASCII character in %s\n",__func__);
        fprintf(stderr, "   the symbol is \"%s\"\n", cobol_name_);
        gcc_assert(false);
        break;
        }
      }

    return psz;
    }

char *
cobol_name_mangler_callback(const char *cobol_name_)
    {
    // This is a definitive solution to the problem of hyphens.  COBOL
    // names can't start with underscore; GNU assembler names can.
    // Thus, the 12-char name RE-TURN-CODE gets converted to
    // _12_RE_TURN_CODE2_7  The 2 and 7 are the offsets to the underscore
    // characters that get turned back into hyphens to reconstruct the original

    // Names without hyphens just get converted to lowercase; no prefix or postfix.

    // We need to build that string, which means we need room to build it in.

    // The length N is strlen(cobol_name_).  The worst-case valid cobol
    // symbol can be a character followed by (N-1) hyphens. That would turn
    // into a string about this number of characters
    // _123_    5 characters.  Room for names with three digits of characters
    // N                       The actual label
    // Each hyphen gets "xxx_ tacked on.  This is an overestimate, but it's not
    // worth trying to get it exactly right.

    char *cobol_name = xstrdup(cobol_name_);

    size_t N = strlen(cobol_name);
    size_t NLENGTH = 5 + N + (N-1)*4;

    // Convert the cobol_name_ to lowercase
    char *d = cobol_name;
    const char *s = cobol_name_;
    bool has_dash = false;

    if( cobol_name_[0] >= '0' && cobol_name_[0] <= '9' )
        {
        // The name starts with 0-9, so we are going to lead it
        // with an underscore
        has_dash = true;
        }

    while(*s)
        {
        int ch = *s++;
        if( ch == '-' )
            {
            // There is an embedded hyphen, so we are going to lead it
            // with an underscore
            has_dash = true;
            }
        *d++ = TOLOWER(ch);
        }
    *d++ = '\0';

    int nlength = strlen(cobol_name);
    char *psz = (char *)xmalloc(NLENGTH);
    char achsuffix[12];
    int offset;
    if( has_dash )
        {
        // There is at least one hyphen:
        // Put the length of cobol_string at the front:
        offset = sprintf(psz,"_%zd_",strlen(cobol_name));

        // tack on the actual cobol_name
        strcat(psz,cobol_name);

        // walk the name, looking for hyphens:
        int more_than_one = 0;
        for(int i=0; i<nlength; i++)
            {
            if( psz[offset + i] == '-' )
                {
                // Convert the hyphen to an underscore
                psz[offset + i] = '_';

                // Append the index to the underscore:
                sprintf(achsuffix,"%s%d",(more_than_one++ ? "_" : ""),i);
                strcat(psz, achsuffix);
                }
            }
        }
    else
        {
        strcpy(psz, cobol_name);
        }
    free(cobol_name);

    if( getenv("SHOW_MANGLED") )
      {
      fprintf(stderr, "mangled %32s to %-32s in %s\n", cobol_name_, psz, __func__);
      }

    for(size_t i=0; i<strlen(psz); i++)
      {
      if( psz[i] & 0x80 )
        {
        fprintf(stderr, "non-ASCII character in %s\n",__func__);
        fprintf(stderr, "   the symbol is \"%s\"\n", cobol_name_);
        gcc_assert(false);
        break;
        }
      }

    return psz;
    }

cbl_call_convention_t parser_call_target_convention( tree func );

static
void
cobol_set_decl_assembler_name (tree decl)
    {
    tree id;

      /* set_decl_assembler_name may be called on TYPE_DECL to record ODR
         name for C++ types.  By default types have no ODR names.  */
    if (TREE_CODE (decl) == TYPE_DECL)
        {
        return;
        }

    /* The language-independent code should never use the
     DECL_ASSEMBLER_NAME for lots of DECLs.  Only FUNCTION_DECLs and
     VAR_DECLs for variables with static storage duration need a real
     DECL_ASSEMBLER_NAME.  */
    gcc_assert (TREE_CODE (decl) == FUNCTION_DECL
        || (VAR_P (decl) && (TREE_STATIC (decl)
                            || DECL_EXTERNAL (decl)
                            || TREE_PUBLIC (decl))));

    const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
    char *mangled_name = cobol_name_mangler_callback(name);

    // A verbatim CALL does not get mangled.
    if( cbl_call_verbatim_e == parser_call_target_convention(decl) )
      {
      strcpy(mangled_name, name);
      }

    if(getenv("SHOW_MANGLE"))
        {
        fprintf(stderr, "%s(): %30s becomes %30s\n", __func__, name, mangled_name);
        }

    extern int yydebug;
    bool is_cobol_name(const char name[]);

    if( false && yydebug ) {
        if (TREE_PUBLIC (decl) || DECL_FILE_SCOPE_P (decl)) {
            if( is_cobol_name(name) ) {
                // Names of things called externally.
                yyerror("Public/FileScope %s", name);
            }
        }
#if 0
    else {
        char vis[5] = "";
        vis[0] = TREE_STATIC(decl)? 'S' : ' ';
        vis[1] = DECL_EXTERNAL(decl)? 'E' : ' ';
        vis[2] = DECL_VISIBILITY(decl)? 'V' : ' ';
        vis[2] = DECL_NONLOCAL(decl)? 'L' : ' ';

        tree parent = get_containing_scope (decl);
        const_tree context = get_ultimate_context (decl);
        struct { const char *parent, *context; } names = {};

        vis[3] = parent? 'P' : ' ';
        vis[4] = context? 'F' : ' ';
        if (parent)
          names.parent = IDENTIFIER_POINTER (DECL_NAME (parent));
        if (context)
          names.context = IDENTIFIER_POINTER (DECL_NAME (context));

        // WORKING-STORAGE static variables
        yyerror("variable %s     %-16s of %-14s of %s: %%s",
          vis, name, names.parent, names.context);
    }
#endif

    if(getenv("SEE_NAMES") && strncmp( name, "__", 2) )
        {
        fprintf(stderr, "set       %30.30s", name);
        fprintf(stderr, "  to  %30.30s\n", mangled_name);
        }
    }

    id = get_identifier(mangled_name);
    free(mangled_name);

    SET_DECL_ASSEMBLER_NAME (decl, id);
    }


#undef LANG_HOOKS_BUILTIN_FUNCTION
#undef LANG_HOOKS_GETDECLS
#undef LANG_HOOKS_GLOBAL_BINDINGS_P
#undef LANG_HOOKS_HANDLE_OPTION
#undef LANG_HOOKS_INIT
#undef LANG_HOOKS_INIT_OPTIONS_STRUCT
#undef LANG_HOOKS_NAME
#undef LANG_HOOKS_OPTION_LANG_MASK
#undef LANG_HOOKS_PARSE_FILE
#undef LANG_HOOKS_PUSHDECL
#undef LANG_HOOKS_TYPE_FOR_MODE
#undef LANG_HOOKS_TYPE_FOR_SIZE
#undef LANG_HOOKS_SET_DECL_ASSEMBLER_NAME

#define LANG_HOOKS_NAME "Cobol"

#define LANG_HOOKS_INIT                     cobol_langhook_init
#define LANG_HOOKS_OPTION_LANG_MASK         cobol_option_lang_mask

#define LANG_HOOKS_INIT_OPTIONS_STRUCT      cobol_langhook_init_options_struct
#define LANG_HOOKS_HANDLE_OPTION            cobol_langhook_handle_option

#define LANG_HOOKS_BUILTIN_FUNCTION         cobol_langhook_builtin_function
#define LANG_HOOKS_GETDECLS                 cobol_langhook_getdecls
#define LANG_HOOKS_GLOBAL_BINDINGS_P        cobol_langhook_global_bindings_p
#define LANG_HOOKS_PARSE_FILE               cobol_langhook_parse_file
#define LANG_HOOKS_PUSHDECL                 cobol_langhook_pushdecl

#define LANG_HOOKS_TYPE_FOR_MODE            cobol_langhook_type_for_mode
#define LANG_HOOKS_TYPE_FOR_SIZE            cobol_langhook_type_for_size

#define LANG_HOOKS_SET_DECL_ASSEMBLER_NAME cobol_set_decl_assembler_name

struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER;

#include "gt-cobol-cobol1.h"
#include "gtype-cobol.h"