merge local cobol_testgen improvements into v3 shared modules
- cond.py: SQLCODE/SQLSTATE handling, alphanumeric >/< boundary fix - output.py: termination tracking, db_input support, _is_field_assigned filter - coverage.py: mark_from_gcov, THRU support, KeyError protection - gcov.py: new file (dependency for coverage.py) - grammar.lark: multi-segment PIC support - read.py: SQL INCLUDE resolution, DECLARE TABLE parsing, * comment fix - core.py: SQL parsing, blocked_names, keyword list - design.py: multi-sentinel, THRU ranges, PERFORM VARYING last iteration - __init__.py: local main() + v3 API functions, guarded imports All 6 ZAN programs verified passing through v3 pipeline
This commit is contained in:
+169
-25
@@ -1,9 +1,12 @@
|
||||
"""??????? + COPYBOOK + DATA DIVISION?? + PIC"""
|
||||
"""Preprocessor + COPYBOOK + DATA DIVISION parse + PIC"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from lark import Lark, Transformer, v_args
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .models import FieldDef, PicInfo
|
||||
|
||||
|
||||
@@ -85,6 +88,8 @@ def preprocess(source: str) -> str:
|
||||
if len(line) >= 7 and line[6].upper() == 'D':
|
||||
continue
|
||||
content = line[6:] if len(line) >= 7 else line
|
||||
if content.strip().startswith('*'):
|
||||
continue
|
||||
else:
|
||||
comment_pos = line.find('*>')
|
||||
if comment_pos >= 0:
|
||||
@@ -192,6 +197,125 @@ def resolve_copybooks(source: str, source_dir: str, _recursion_depth: int = 0,
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
# ── EXEC SQL INCLUDE Resolution ──
|
||||
|
||||
_RE_SQL_INC = re.compile(
|
||||
r'EXEC\s+SQL\s+INCLUDE\s+(\w[\w-]*)\s+END-EXEC\.',
|
||||
re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
|
||||
_BUILTIN_SQLCA = """\
|
||||
01 SQLCA.
|
||||
05 SQLCAID PIC X(8).
|
||||
05 SQLCABC PIC S9(9) COMP.
|
||||
05 SQLCODE PIC S9(9) COMP.
|
||||
05 SQLERRM.
|
||||
10 SQLERRML PIC S9(4) COMP.
|
||||
10 SQLERRMC PIC X(70).
|
||||
05 SQLERRP PIC X(8).
|
||||
05 SQLERRD OCCURS 6 TIMES PIC S9(9) COMP.
|
||||
05 SQLWARN.
|
||||
10 SQLWARN0 PIC X.
|
||||
10 SQLWARN1 PIC X.
|
||||
10 SQLWARN2 PIC X.
|
||||
10 SQLWARN3 PIC X.
|
||||
10 SQLWARN4 PIC X.
|
||||
10 SQLWARN5 PIC X.
|
||||
10 SQLWARN6 PIC X.
|
||||
10 SQLWARN7 PIC X.
|
||||
05 SQLSTATE PIC X(5).
|
||||
"""
|
||||
|
||||
|
||||
def resolve_sql_includes(source: str, source_dir: str) -> str:
|
||||
"""Resolve EXEC SQL INCLUDE name END-EXEC. like COPY. Injects built-in SQLCA if not found."""
|
||||
def _resolve_one(m):
|
||||
name = m.group(1).upper()
|
||||
for ext in ('', '.cpy', '.CPY', '.cbl', '.CBL'):
|
||||
p = Path(source_dir) / f"{name}{ext}"
|
||||
if p.exists():
|
||||
return p.read_text(encoding='utf-8')
|
||||
if name == 'SQLCA':
|
||||
return _BUILTIN_SQLCA
|
||||
logger.warning(f"SQL INCLUDE {name} not found, injecting as comment")
|
||||
return f" * SQL INCLUDE {name} NOT RESOLVED\n"
|
||||
while True:
|
||||
new_source = _RE_SQL_INC.sub(_resolve_one, source)
|
||||
if new_source == source:
|
||||
break
|
||||
source = new_source
|
||||
return source
|
||||
|
||||
|
||||
_RE_SQL_BLOCK = re.compile(
|
||||
r'EXEC\s+SQL\s+(.*?)\s+END-EXEC\.?',
|
||||
re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
|
||||
_RE_DECLARE_TABLE = re.compile(
|
||||
r'EXEC\s+SQL\s+DECLARE\s+(\w[\w-]*)\s+TABLE\s*\((.*?)\)\s+END-EXEC\.?',
|
||||
re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
|
||||
|
||||
def strip_exec_sql_from_data_div(source: str) -> tuple:
|
||||
"""Strip EXEC SQL blocks from DATA DIVISION. Returns (cleaned_source, declared_columns)."""
|
||||
declared_columns = {}
|
||||
def _repl(m):
|
||||
full = m.group(0)
|
||||
dm = _RE_DECLARE_TABLE.match(full)
|
||||
if dm:
|
||||
table_name = dm.group(1).upper()
|
||||
col_text = dm.group(2)
|
||||
cols = _parse_declare_table_columns(col_text)
|
||||
declared_columns[table_name] = cols
|
||||
return f" *> DECLARE {table_name} TABLE ({len(cols)} cols)\n"
|
||||
return " *> SKIPPED EXEC SQL\n"
|
||||
cleaned = _RE_SQL_BLOCK.sub(_repl, source)
|
||||
return cleaned, declared_columns
|
||||
|
||||
|
||||
def _parse_declare_table_columns(col_text: str) -> list[dict]:
|
||||
"""Parse 'CUST_ID CHAR(5) NOT NULL, BALANCE PIC 9(6)' into column list."""
|
||||
cols = []
|
||||
for part in re.split(r',\s*', col_text):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
m = re.match(
|
||||
r'(\w[\w-]*)\s+(CHAR\s*\(\s*(\d+)\s*\)'
|
||||
r'|VARCHAR\s*\(\s*(\d+)\s*\)'
|
||||
r'|INTEGER|SMALLINT'
|
||||
r'|DECIMAL\s*\(\s*(\d+)\s*(?:,\s*(\d+))?\s*\)'
|
||||
r'|DATE'
|
||||
r'|PIC\s+([\w().]+))'
|
||||
r'(?:\s+NOT\s+NULL|\s+NULL)?',
|
||||
part, re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
name = m.group(1).upper()
|
||||
if m.group(3):
|
||||
col_type = {'db_type': 'CHAR', 'size': int(m.group(3))}
|
||||
elif m.group(4):
|
||||
col_type = {'db_type': 'VARCHAR', 'size': int(m.group(4))}
|
||||
elif m.group(2).upper() == 'INTEGER':
|
||||
col_type = {'db_type': 'INTEGER'}
|
||||
elif m.group(2).upper() == 'SMALLINT':
|
||||
col_type = {'db_type': 'SMALLINT'}
|
||||
elif m.group(5):
|
||||
prec = int(m.group(5)) if m.group(5) else 0
|
||||
scale = int(m.group(6)) if m.group(6) else 0
|
||||
col_type = {'db_type': 'DECIMAL', 'precision': prec, 'scale': scale}
|
||||
elif m.group(2).upper() == 'DATE':
|
||||
col_type = {'db_type': 'DATE'}
|
||||
elif m.group(7):
|
||||
col_type = {'db_type': 'PIC', 'pic': m.group(7).upper()}
|
||||
else:
|
||||
col_type = {'db_type': 'CHAR', 'size': 1}
|
||||
cols.append({'name': name, **col_type})
|
||||
return cols
|
||||
|
||||
|
||||
# 鈹€鈹€ Lark Grammar 鈹€鈹€
|
||||
|
||||
_GRAMMAR_CACHE = None
|
||||
@@ -464,7 +588,7 @@ def parse_file_control(source: str) -> dict:
|
||||
"""Parse FILE-CONTROL paragraph.
|
||||
|
||||
Returns dict:
|
||||
{filename: {"assign_to": str, "organization": str | None}}
|
||||
{filename: {"assign": str, "organization": str, "recording_mode": str}}
|
||||
"""
|
||||
m = re.search(r'FILE-CONTROL\.(.*?)(?=DATA\s+DIVISION|\Z)', source, re.DOTALL | re.IGNORECASE)
|
||||
if not m:
|
||||
@@ -472,21 +596,39 @@ def parse_file_control(source: str) -> dict:
|
||||
fc = m.group(1)
|
||||
result = {}
|
||||
for sel_m in re.finditer(
|
||||
r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+(["\'])(.*?)\2',
|
||||
r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+'
|
||||
r'(?:(["\'])(.*?)\2|(\w[\w-]*))'
|
||||
r'[^.]*\.',
|
||||
fc, re.IGNORECASE
|
||||
):
|
||||
fname = sel_m.group(1).upper()
|
||||
assign_to = sel_m.group(3).upper()
|
||||
# Extract ORGANIZATION clause within this SELECT statement
|
||||
org_m = re.search(
|
||||
r'ORGANIZATION\s+(?:IS\s+)?(\w[\w-]*)',
|
||||
sel_m.group(0), re.IGNORECASE
|
||||
)
|
||||
org = org_m.group(1).upper() if org_m else None
|
||||
result[fname] = {
|
||||
"assign_to": assign_to,
|
||||
"organization": org,
|
||||
}
|
||||
name = sel_m.group(1).upper()
|
||||
if sel_m.group(2):
|
||||
assign_to = sel_m.group(3).upper()
|
||||
else:
|
||||
assign_to = sel_m.group(4).upper()
|
||||
clause = sel_m.group(0)
|
||||
org_m = re.search(r'ORGANIZATION\s+(LINE\s+)?SEQUENTIAL', clause, re.IGNORECASE)
|
||||
if org_m and org_m.group(1):
|
||||
org = 'LINE SEQUENTIAL'
|
||||
elif org_m:
|
||||
org = 'SEQUENTIAL'
|
||||
else:
|
||||
org = 'SEQUENTIAL'
|
||||
result[name] = {'assign': assign_to, 'organization': org, 'recording_mode': 'F'}
|
||||
# Extract RECORDING MODE from FD blocks in FILE SECTION
|
||||
fd_sec_m = re.search(r'FILE\s+SECTION\.(.*?)(?=WORKING-STORAGE\s+SECTION|LINKAGE\s+SECTION|\Z)',
|
||||
source, re.DOTALL | re.IGNORECASE)
|
||||
if fd_sec_m:
|
||||
fs = fd_sec_m.group(1)
|
||||
for block in re.split(r'\n\s*(?=FD\s+)', fs.strip()):
|
||||
fd_m = re.match(r'FD\s+(\w[\w-]*)', block, re.IGNORECASE)
|
||||
if not fd_m:
|
||||
continue
|
||||
fd_name = fd_m.group(1).upper()
|
||||
if fd_name in result:
|
||||
rm_m = re.search(r'RECORDING\s+MODE\s+IS\s+(\w)', block, re.IGNORECASE)
|
||||
if rm_m:
|
||||
result[fd_name]['recording_mode'] = rm_m.group(1).upper()
|
||||
return result
|
||||
|
||||
|
||||
@@ -499,14 +641,12 @@ def parse_file_section(source: str) -> dict:
|
||||
fs = m.group(1)
|
||||
result = {}
|
||||
# FD 和 SD 条目
|
||||
blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
|
||||
for block in blocks:
|
||||
fd_blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
|
||||
for block in fd_blocks:
|
||||
m = re.match(r'(FD|SD)\s+(\w[\w-]*)', block, re.IGNORECASE)
|
||||
if not m:
|
||||
continue
|
||||
entry_type = m.group(1).upper() # "FD" or "SD"
|
||||
name = m.group(2).upper()
|
||||
# 找 01 层记录
|
||||
recs = re.findall(r'^\s*0{0,1}1\s+(\w[\w-]*)', block, re.MULTILINE)
|
||||
result[name] = [r.upper() for r in recs]
|
||||
return result
|
||||
@@ -521,11 +661,15 @@ def scan_open_statements(source: str) -> dict:
|
||||
source, re.IGNORECASE
|
||||
):
|
||||
full = m.group(1)
|
||||
for seg_m in re.finditer(
|
||||
r'(INPUT|OUTPUT|I-O)\s+([\w\s-]+)', full, re.IGNORECASE
|
||||
):
|
||||
direction = seg_m.group(1).upper()
|
||||
for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
|
||||
if fname.upper() not in ('INPUT', 'OUTPUT', 'I-O'):
|
||||
full = re.sub(r'\s+', ' ', full)
|
||||
tokens = re.split(r'\s+(?=(?:INPUT|OUTPUT|I-O)\s)', full)
|
||||
for seg in tokens:
|
||||
seg = seg.strip()
|
||||
if not seg:
|
||||
continue
|
||||
seg_m = re.match(r'(INPUT|OUTPUT|I-O)\s+([\w -]+)', seg, re.IGNORECASE)
|
||||
if seg_m:
|
||||
direction = seg_m.group(1).upper()
|
||||
for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
|
||||
dirs[fname.upper()] = direction
|
||||
return dirs
|
||||
|
||||
Reference in New Issue
Block a user