Commit d95ff0b9 authored by rdubner's avatar rdubner

Significantly improved the logic and accuracy of the cross referencing

parent 4493aade
......@@ -48,7 +48,7 @@
using namespace std;
#define DUMPING
//#define DUMPING
void
scan_test(const string &filename)
......@@ -291,12 +291,6 @@ main(int argc, char *argv[])
COB_DATA cob_data;
COB_FIELD_ATTRIBUTES cob_field_attributes;
COB_FIELDS cob_fields;
DATA_DESCRIPTIONS data_description;
string source = params.basename;
const string GLOBAL = "GLOBAL";
const string LOCAL = "LOCAL";
const string MAIN = "MAIN";
// We embark on a multi-step process, where we scan a number of
// different files separately. This is a bit like putting a jigsaw
......@@ -311,7 +305,7 @@ main(int argc, char *argv[])
variable_tree.ReadFromFile(params.cbl_lst);
variable_tree.BuildCanonicalName();
#if defined(DUMPING)
//variable_tree.Dump();
// variable_tree.Dump();
#endif
// Scan the .h files for working storage and local storage locations
......@@ -322,7 +316,6 @@ main(int argc, char *argv[])
cob_field_attributes,
cob_fields
);
// Scan the .c file for program identification and linkage information.
ScanForLinkage(params.c_file,
params.c_filename.GetFname(),
......@@ -330,6 +323,11 @@ main(int argc, char *argv[])
cob_data,
cob_fields
);
// The f_/b_ cross reference is as complete as we know how to make it.
// Scan cob_fields, updating the parent variables and building the lookup map
cob_fields.FixLookup(cob_data);
#if defined(DUMPING)
cout << endl;
//program_labels.Dump();
......@@ -339,12 +337,6 @@ main(int argc, char *argv[])
cob_fields.Dump();
#endif
// We now have a nice complete list of COB_DATA, COB_FIELD_ATTRIBUTES, COB_FIELDS
// Flatten them down into the composite table:
data_description.Flatten(cob_fields,cob_field_attributes,cob_pic_symbols,cob_data);
#if defined(DUMPING)
data_description.Dump();
#endif
// The most complete description of the COBOL variable identifiers is
// found in variable_tree. But as of right now, it knows nothing about
......@@ -354,7 +346,7 @@ main(int argc, char *argv[])
// The Consolidate() routine puts them together.
variable_tree.Consolidate(data_description,cob_data);
variable_tree.Consolidate(cob_fields);
#if defined(DUMPING)
variable_tree.Dump();
variable_tree.DumpFlatList();
......
......@@ -309,25 +309,25 @@ ProcessDataStorage(ifstream &ifs,
}
}
// Skip to the name_of_data
string name_of_data;
string b_name;
if( nfound < input.length() ) {
// Pick up the b
name_of_data += input[nfound++];
b_name += input[nfound++];
}
if( nfound < input.length() ) {
// Pick up the _
name_of_data += input[nfound++];
b_name += input[nfound++];
}
while( nfound < input.length() ) {
char ch = input[nfound++];
if( !isdigit(ch) ) {
break;
}
name_of_data += ch;
b_name += ch;
}
if(name_of_data EQ "NU") {
name_of_data = "";
if(b_name EQ "NU") {
b_name = "";
}
// Skip past the ampersand:
......@@ -339,21 +339,21 @@ ProcessDataStorage(ifstream &ifs,
}
// Pick up the pointer to the attribute
string name_of_attr;
string a_name;
if( nfound < input.length() ) {
// Pick up the a
name_of_attr += input[nfound++];
a_name += input[nfound++];
}
if( nfound < input.length() ) {
// Pick up the _
name_of_attr += input[nfound++];
a_name += input[nfound++];
}
while( nfound < input.length() ) {
char ch = input[nfound++];
if( !isdigit(ch) ) {
break;
}
name_of_attr += ch;
a_name += ch;
}
// See if there is an offset:
......@@ -380,18 +380,25 @@ ProcessDataStorage(ifstream &ifs,
// variables lined up with the .cbl.lst variable list.
map<string,string>::const_iterator it =
b_symbol_to_program_id.find(name_of_data);
b_symbol_to_program_id.find(b_name);
if(it != b_symbol_to_program_id.end()) {
mapped_program_id = it->second;
}
string parent;
const COB_DATUM *pdatum = cob_data.GetCobDatum(b_name);
if( pdatum ) {
parent = pdatum->cbl_name;
}
cob_fields.Insert(f_name,
name_of_data,
b_name,
offset,
size,
name_of_attr,
a_name,
mapped_program_id,
cbl_name);
parent,
cbl_name); // And this the child
continue;
}
......@@ -674,7 +681,7 @@ ScanForLinkage(ifstream &ifs,
nfound = param2.find(" + ",nfound+3);
}
}
cob_fields.Modify(f_symbol,"NULL",offset);
cob_fields.Modify(f_symbol,"",offset);
}
// Go see if there are more on this line
......@@ -684,42 +691,6 @@ ScanForLinkage(ifstream &ifs,
return;
}
void
DATA_DESCRIPTIONS::Flatten(const COB_FIELDS &cob_fields,
const COB_FIELD_ATTRIBUTES &cob_field_attributes,
const COB_PIC_SYMBOLS &cob_pic_symbols,
const COB_DATA &cob_data
)
{
// Run through the f_ fields first
for(V_COB_FIELDS::const_iterator it=cob_fields.v_cob_fields.begin();
it != cob_fields.v_cob_fields.end();
it++
) {
COB_FIELD field = *it;
DATA_DESCRIPTION ds;
ds.f_name = field.f_name ;
ds.size = field.size ;
ds.b_name = field.b_name;
ds.offset = field.offset ;
ds.program_id = field.program_id ;
ds.cobol_symbol = field.cbl_name ;
const COB_DATUM *cd = cob_data.GetCobDatum(ds.b_name);
if( cd ) {
ds.size = cd->size;
ds.storage_type = cd->storage_type;
}
if( ds.storage_type.empty() AND ds.b_name.empty() ) {
ds.storage_type = "Local";
}
DsInsert(ds);
}
}
void
COB_PROGRAM_LABELS::FormatProgramInfo(stringstream &ss) const
{
......@@ -769,34 +740,3 @@ COB_PROGRAM_LABELS::FormatProgramInfo(stringstream &ss) const
}
}
}
void
DATA_DESCRIPTIONS::Dump() const
{
cout << "DATA_DESCRIPTION:" << endl ;
cout << setw(20) << "program_id" ;
cout << setw(20) << "cobol_symbol" ;
cout << setw(13) << "storage_type" ;
cout << setw( 9) << "b_name" ;
cout << setw( 7) << "offset" ;
cout << setw( 6) << "size" ;
cout << setw( 9) << "f_symbol" ;
cout << setw(16) << "picture" ;
cout << endl;
for(VDATA_DESCRIPTION::const_iterator it=symbols.begin();
it!=symbols.end();
it++ ) {
DATA_DESCRIPTION symbol = *it;
cout << setw(20) << symbol.program_id ;
cout << setw(20) << symbol.cobol_symbol ;
cout << setw(13) << symbol.storage_type ;
cout << setw( 9) << symbol.b_name ;
cout << setw( 7) << symbol.offset ;
cout << setw( 6) << symbol.size ;
cout << setw( 9) << symbol.f_name ;
cout << setw(16) << symbol.picture_string ;
cout << endl;
}
cout << endl;
}
......@@ -53,6 +53,7 @@
#define nullptr NULL
#endif
#define NO_INDEX ((size_t)(-1))
//////////////////////////////////////////////////////////////////////////
// These data structures are based on from GnuCOBOL3.1-dev/libcob/common.h
......@@ -379,8 +380,9 @@ public:
int offset; /* any internal + offset */
int size; /* Field size */
std::string a_name; /* name of attribute */
std::string program_id; /* Where defined */
std::string cbl_name; /* From the comment in the .h code */
std::string program_id;
std::string parent;
std::string child; /* child of B of C of parent of program_id */
public:
COB_FIELD(std::string f_name_,
......@@ -389,7 +391,8 @@ public:
int size_,
std::string a_name_,
std::string program_id_,
std::string cbl_name_)
std::string parent_,
std::string child_)
{
f_name = f_name_ ;
b_name = b_name_ ;
......@@ -397,7 +400,8 @@ public:
size = size_ ;
a_name = a_name_ ;
program_id = program_id_;
cbl_name = cbl_name_ ;
parent = parent_ ;
child = child_ ;
}
friend COB_FIELDS;
......@@ -409,10 +413,32 @@ class COB_FIELDS
{
private:
M_COB_FIELDS m_cob_fields;
M_COB_FIELDS m_from_cobol;
public:
V_COB_FIELDS v_cob_fields;
private:
std::string
MakeKey(const std::string &prog,
const std::string &parent,
const std::string &child,
int offset
) const
{
char ach[32];
sprintf(ach,"%d",offset);
std::string retval;
retval += prog;
retval += "|";
retval += parent;
retval += "|";
retval += child;
retval += "|";
retval += ach;
return retval;
}
public:
void
Insert(std::string f_name,
......@@ -421,7 +447,8 @@ public:
int size,
std::string a_name,
std::string program_id,
std::string cbl_name)
std::string parent,
std::string child)
{
M_COB_FIELDS::const_iterator it = m_cob_fields.find(f_name);
if( it != m_cob_fields.end() ) {
......@@ -434,11 +461,30 @@ public:
size,
a_name,
program_id,
cbl_name);
parent,
child);
m_cob_fields[f_name] = v_cob_fields.size();
v_cob_fields.push_back(cf);
}
size_t
FetchByNameAndOffset(const std::string &program_id_,
const std::string &parent_,
const std::string &child_,
int offset
) const
{
size_t retval = NO_INDEX;
std::string key = MakeKey(program_id_,parent_,child_,offset);
M_COB_FIELDS::const_iterator it = m_from_cobol.find(key);
if( it != m_from_cobol.end() ) {
retval = it->second;
}
return retval;
}
void
Modify(std::string f_name_,
std::string b_name_,
......@@ -454,6 +500,27 @@ public:
v_cob_fields[i].offset = offset_;
}
void
FixLookup(const COB_DATA &cob_data)
{
for( size_t i=0; i< v_cob_fields.size(); i++ ) {
if( !v_cob_fields[i].b_name.empty() && v_cob_fields[i].parent.empty() ) {
// The parent is empty, and b_name is not. It's possible that the linkage
// scan of the .c code populated the parent field. If so, we might be able
// to populate the parent field now:
const COB_DATUM *pdatum = cob_data.GetCobDatum(v_cob_fields[i].b_name);
if( pdatum ) {
v_cob_fields[i].parent = pdatum->cbl_name;
}
}
m_from_cobol[MakeKey(v_cob_fields[i].program_id,
v_cob_fields[i].parent,
v_cob_fields[i].child,
v_cob_fields[i].offset)] = i;
}
}
void
Dump() const
{
......@@ -464,7 +531,8 @@ public:
std::cout << std::setw(20) << "size";
std::cout << std::setw(20) << "a_name";
std::cout << std::setw(20) << "program_id";
std::cout << std::setw(20) << "cbl_name";
std::cout << std::setw(20) << "parent";
std::cout << std::setw(20) << "child";
std::cout << std::endl;
for( V_COB_FIELDS::const_iterator it = v_cob_fields.begin();
......@@ -477,7 +545,8 @@ public:
std::cout << std::setw(20) << field.size;
std::cout << std::setw(20) << field.a_name;
std::cout << std::setw(20) << field.program_id;
std::cout << std::setw(20) << field.cbl_name;
std::cout << std::setw(20) << field.parent;
std::cout << std::setw(20) << field.child;
std::cout << std::endl;
}
std::cout << std::endl;
......@@ -586,109 +655,6 @@ public:
}
};
class DATA_DESCRIPTIONS;
class DATA_DESCRIPTION
{
public:
std::string program_id;
std::string cobol_symbol;
std::string storage_type;
std::string b_name;
int offset;
int size;
std::string f_name;
std::string picture_string;
public:
DATA_DESCRIPTION()
{
offset = 0;
size = 0;
}
friend DATA_DESCRIPTIONS;
};
typedef std::vector<DATA_DESCRIPTION> VDATA_DESCRIPTION;
typedef std::map<std::string,size_t> M_VKEY;
class DATA_DESCRIPTIONS
{
private:
VDATA_DESCRIPTION symbols;
// We are going to key that table by (program-id, cbl_symbol,offset)
M_VKEY symbols_key;
std::map<std::string,size_t>location_of_c_symbol;
public:
int
size() const
{
return static_cast<int>(symbols.size());
}
const DATA_DESCRIPTION &
operator[](size_t n) const
{
return symbols[n];
}
int
IndexOfSymbol(const std::string &symbol) const
{
std::map<std::string,size_t>::const_iterator it =
location_of_c_symbol.find(symbol);
if(it == location_of_c_symbol.end()) {
return -1;
}
return static_cast<int>(it->second);
}
std::string
MakeVKey(std::string const &program_id, std::string const &c_symbol,int offset) const
{
char ach[20];
sprintf(ach,"%d",offset);
return program_id + "|" + c_symbol + "|" + ach;
}
void
DsInsert(const DATA_DESCRIPTION &ds )
{
symbols_key[MakeVKey(ds.program_id,ds.cobol_symbol,ds.offset)] = symbols.size();
symbols.push_back(ds);
}
bool
IsInDs(std::string const &program_id, std::string const &cobol_symbol,int offset) const
{
std::string dskey=MakeVKey(program_id,cobol_symbol,offset);
M_VKEY::const_iterator it = symbols_key.find(dskey);
return it != symbols_key.end();
}
size_t
GetIndex(std::string const &program_id, std::string const &c_symbol,int offset) const
{
size_t retval = (size_t)(-1);
std::string dskey=MakeVKey(program_id,c_symbol,offset);
M_VKEY::const_iterator it = symbols_key.find(dskey);
if(it != symbols_key.end()) {
retval = it->second;
}
return retval;
}
void Dump() const;
void Flatten(const COB_FIELDS &cob_fields,
const COB_FIELD_ATTRIBUTES &cob_field_attributes,
const COB_PIC_SYMBOLS &cob_pic_symbols,
const COB_DATA &cob_data
);
};
void ScanAllDotHFiles(const std::string &path,
const std::string &fname,
COB_PIC_SYMBOLS &cob_pic_symbols,
......
......@@ -47,14 +47,8 @@ VAR_NODE::VAR_NODE()
level = LEVEL_ROOT;
occurs = 0;
is_external = false;
is_global = false;
parent = nullptr;
data_description_index = NO_INDEX;
best_size = 0;
attr_type = 0;
attr_digits = 0;
attr_scale = 0;
attr_flags = 0;
}
void
......@@ -378,10 +372,6 @@ VARIABLE_TREE::ReadFromFile(std::ifstream &ifs)
// data we need in a formal way. RJD, just before version cobc 3.1 was
// released, 2020-06-26
if( tokens.back() EQ "GLOBAL" ) {
tokens.pop_back();
}
if(tokens.size() EQ 2 AND tokens[0] EQ "PROGRAM") {
// We are starting a brand new PROGRAM section, which has the
// root as a parent:
......@@ -459,6 +449,21 @@ VARIABLE_TREE::ReadFromFile(std::ifstream &ifs)
tokens.erase(it1,it2);
}
it = tokens.begin();
while(it != tokens.end()) {
if( *it EQ "GLOBAL" ) {
new_node->is_global = true;
break;
}
it++;
}
if( it != tokens.end() ) {
size_t index = it - tokens.begin();
vector<string>::iterator it1 = tokens.begin() + index;
vector<string>::iterator it2 = tokens.begin() + index + 1;
tokens.erase(it1,it2);
}
it = tokens.begin();
while(it != tokens.end()) {
if( *it EQ "OCCURS" ) {
......@@ -704,24 +709,16 @@ VARIABLE_TREE::DumpNode(ostream &sout, const VAR_NODE *node)
sout << "[";
sout << node->canonical_name;
if( !node->base_symbol.empty() OR node->offset > 0 ) {
if( !node->b_name.empty() OR node->offset > 0 ) {
sout << " ";
}
sout << node->base_symbol;
sout << node->b_name;
if(node->offset > 0) {
sout << "+";
sout << node->offset;
}
sout << "] ";
if( node->data_description_index != NO_INDEX ) {
sout << "[";
sout << node->data_description_index;
sout << "] ";
} else {
sout << "[no_index] ";
}
sout << endl;
}
}
......@@ -1051,39 +1048,6 @@ VARIABLE_TREE::BuildFlatList(VAR_NODE *node,VVAR_NODES &flat)
}
}
VAR_NODE *
DiveForVariable( VAR_NODE *node,
const string &section,
const string &program_id,
const string &cbl_symbol,
const int offset)
{
VAR_NODE *retval = nullptr;
if( node->GetSection() EQ section
AND node->GetProgram() EQ program_id
AND node->GetName() EQ cbl_symbol
AND node->GetOffset() EQ offset ) {
// Son of a gun! We found it!
return node;
}
VVAR_NODES children = node->GetChildren();
for(VVAR_NODES::const_iterator it=children.begin();
it!=children.end();
it++) {
VAR_NODE *child = *it;
VAR_NODE *maybe = DiveForVariable(child,
section,
program_id,
cbl_symbol,
offset);
if(maybe != nullptr) {
return maybe;
}
}
return retval;
}
void
VARIABLE_TREE::AdjustFlatList()
{
......@@ -1096,68 +1060,101 @@ VARIABLE_TREE::AdjustFlatList()
for( size_t i=1; i<flat_list.size(); i++ ) {
if( flat_list[i-1]->level EQ 0
AND flat_list[i]->base_symbol.empty()
AND flat_list[i]->b_name.empty()
AND flat_list[i-1]->program EQ flat_list[i]->program
AND flat_list[i]->level EQ 1 ) {
// This is the situation: 01 has no base_symbol, but its 00
// parent does.
flat_list[i]->base_symbol = flat_list[i-1]->base_symbol;
flat_list[i]->b_name = flat_list[i-1]->b_name;
}
}
}
void
VARIABLE_TREE::Consolidive(VAR_NODE *node,const DATA_DESCRIPTIONS &data_description,string &base_symbol,const COB_DATA &cob_data)
VARIABLE_TREE::Consolidive(VAR_NODE *node,
string &base_symbol,
const COB_FIELDS &cob_fields)
{
// This routine was once more complicated; now it just propogates the top node b_xxx down through
// the whole branch:
if( node ) {
VSTRING tokens = Split(node->GetCanonicalName(),"/");
if( tokens.size() >= 2 ) {
string program_id = tokens.back();
string cbl_symbol = tokens[0];
int offset = node->GetOffset();
size_t index = NO_INDEX;
if(node->section != "LOCAL-STORAGE" AND node->level <= 1 OR node->level EQ 77 ) {
// We're starting work on a brand-new level-one(ish) variable.
// Let's check to see if there is a b_ buffer for this 01/77 symbol
if( node->level <= 1 OR node->level EQ 77 ) {
// We're starting a new variable:
base_symbol = "";
const COB_DATUM *pdatum = cob_data.GetCobDatumFromProgAndName(program_id,cbl_symbol);
if( pdatum ) {
// Pick up the base symbol
base_symbol = pdatum->name;
}
}
if( base_symbol.empty() ) {
// From the canonical name, get the program-id (the last entry)
string program_id = tokens.back();
}
// Get the parent entry, which is the 01/77 part of the canonical name;
// it's the one right below the program-id
string parent = tokens[tokens.size()-2];
// Get the "youngest" element, which is the first in the list
// It can be the same as the parent
string child = tokens[0];
if( !base_symbol.empty() ) {
// See if we have a data_descriptions entry for that cbl_symbol
if( data_description.IsInDs(program_id,base_symbol,offset) ) {
// We do! Save the index
index = data_description.GetIndex(program_id,base_symbol,offset);
node->data_description_index = index;
}