merge local cobol_testgen improvements into v3 shared modules

- cond.py: SQLCODE/SQLSTATE handling, alphanumeric >/< boundary fix
- output.py: termination tracking, db_input support, _is_field_assigned filter
- coverage.py: mark_from_gcov, THRU support, KeyError protection
- gcov.py: new file (dependency for coverage.py)
- grammar.lark: multi-segment PIC support
- read.py: SQL INCLUDE resolution, DECLARE TABLE parsing, * comment fix
- core.py: SQL parsing, blocked_names, keyword list
- design.py: multi-sentinel, THRU ranges, PERFORM VARYING last iteration
- __init__.py: local main() + v3 API functions, guarded imports

All 6 ZAN programs verified passing through v3 pipeline
This commit is contained in:
hangshuo652
2026-06-23 22:38:17 +08:00
parent e5ab3baa46
commit 7fb9304212
9 changed files with 1595 additions and 326 deletions
+169 -25
View File
@@ -1,9 +1,12 @@
"""??????? + COPYBOOK + DATA DIVISION?? + PIC"""
"""Preprocessor + COPYBOOK + DATA DIVISION parse + PIC"""
import re
import logging
from pathlib import Path
from lark import Lark, Transformer, v_args
logger = logging.getLogger(__name__)
from .models import FieldDef, PicInfo
@@ -85,6 +88,8 @@ def preprocess(source: str) -> str:
if len(line) >= 7 and line[6].upper() == 'D':
continue
content = line[6:] if len(line) >= 7 else line
if content.strip().startswith('*'):
continue
else:
comment_pos = line.find('*>')
if comment_pos >= 0:
@@ -192,6 +197,125 @@ def resolve_copybooks(source: str, source_dir: str, _recursion_depth: int = 0,
return '\n'.join(result)
# ── EXEC SQL INCLUDE Resolution ──
_RE_SQL_INC = re.compile(
r'EXEC\s+SQL\s+INCLUDE\s+(\w[\w-]*)\s+END-EXEC\.',
re.IGNORECASE | re.DOTALL
)
_BUILTIN_SQLCA = """\
01 SQLCA.
05 SQLCAID PIC X(8).
05 SQLCABC PIC S9(9) COMP.
05 SQLCODE PIC S9(9) COMP.
05 SQLERRM.
10 SQLERRML PIC S9(4) COMP.
10 SQLERRMC PIC X(70).
05 SQLERRP PIC X(8).
05 SQLERRD OCCURS 6 TIMES PIC S9(9) COMP.
05 SQLWARN.
10 SQLWARN0 PIC X.
10 SQLWARN1 PIC X.
10 SQLWARN2 PIC X.
10 SQLWARN3 PIC X.
10 SQLWARN4 PIC X.
10 SQLWARN5 PIC X.
10 SQLWARN6 PIC X.
10 SQLWARN7 PIC X.
05 SQLSTATE PIC X(5).
"""
def resolve_sql_includes(source: str, source_dir: str) -> str:
"""Resolve EXEC SQL INCLUDE name END-EXEC. like COPY. Injects built-in SQLCA if not found."""
def _resolve_one(m):
name = m.group(1).upper()
for ext in ('', '.cpy', '.CPY', '.cbl', '.CBL'):
p = Path(source_dir) / f"{name}{ext}"
if p.exists():
return p.read_text(encoding='utf-8')
if name == 'SQLCA':
return _BUILTIN_SQLCA
logger.warning(f"SQL INCLUDE {name} not found, injecting as comment")
return f" * SQL INCLUDE {name} NOT RESOLVED\n"
while True:
new_source = _RE_SQL_INC.sub(_resolve_one, source)
if new_source == source:
break
source = new_source
return source
_RE_SQL_BLOCK = re.compile(
r'EXEC\s+SQL\s+(.*?)\s+END-EXEC\.?',
re.IGNORECASE | re.DOTALL
)
_RE_DECLARE_TABLE = re.compile(
r'EXEC\s+SQL\s+DECLARE\s+(\w[\w-]*)\s+TABLE\s*\((.*?)\)\s+END-EXEC\.?',
re.IGNORECASE | re.DOTALL
)
def strip_exec_sql_from_data_div(source: str) -> tuple:
"""Strip EXEC SQL blocks from DATA DIVISION. Returns (cleaned_source, declared_columns)."""
declared_columns = {}
def _repl(m):
full = m.group(0)
dm = _RE_DECLARE_TABLE.match(full)
if dm:
table_name = dm.group(1).upper()
col_text = dm.group(2)
cols = _parse_declare_table_columns(col_text)
declared_columns[table_name] = cols
return f" *> DECLARE {table_name} TABLE ({len(cols)} cols)\n"
return " *> SKIPPED EXEC SQL\n"
cleaned = _RE_SQL_BLOCK.sub(_repl, source)
return cleaned, declared_columns
def _parse_declare_table_columns(col_text: str) -> list[dict]:
"""Parse 'CUST_ID CHAR(5) NOT NULL, BALANCE PIC 9(6)' into column list."""
cols = []
for part in re.split(r',\s*', col_text):
part = part.strip()
if not part:
continue
m = re.match(
r'(\w[\w-]*)\s+(CHAR\s*\(\s*(\d+)\s*\)'
r'|VARCHAR\s*\(\s*(\d+)\s*\)'
r'|INTEGER|SMALLINT'
r'|DECIMAL\s*\(\s*(\d+)\s*(?:,\s*(\d+))?\s*\)'
r'|DATE'
r'|PIC\s+([\w().]+))'
r'(?:\s+NOT\s+NULL|\s+NULL)?',
part, re.IGNORECASE
)
if m:
name = m.group(1).upper()
if m.group(3):
col_type = {'db_type': 'CHAR', 'size': int(m.group(3))}
elif m.group(4):
col_type = {'db_type': 'VARCHAR', 'size': int(m.group(4))}
elif m.group(2).upper() == 'INTEGER':
col_type = {'db_type': 'INTEGER'}
elif m.group(2).upper() == 'SMALLINT':
col_type = {'db_type': 'SMALLINT'}
elif m.group(5):
prec = int(m.group(5)) if m.group(5) else 0
scale = int(m.group(6)) if m.group(6) else 0
col_type = {'db_type': 'DECIMAL', 'precision': prec, 'scale': scale}
elif m.group(2).upper() == 'DATE':
col_type = {'db_type': 'DATE'}
elif m.group(7):
col_type = {'db_type': 'PIC', 'pic': m.group(7).upper()}
else:
col_type = {'db_type': 'CHAR', 'size': 1}
cols.append({'name': name, **col_type})
return cols
# 鈹€鈹€ Lark Grammar 鈹€鈹€
_GRAMMAR_CACHE = None
@@ -464,7 +588,7 @@ def parse_file_control(source: str) -> dict:
"""Parse FILE-CONTROL paragraph.
Returns dict:
{filename: {"assign_to": str, "organization": str | None}}
{filename: {"assign": str, "organization": str, "recording_mode": str}}
"""
m = re.search(r'FILE-CONTROL\.(.*?)(?=DATA\s+DIVISION|\Z)', source, re.DOTALL | re.IGNORECASE)
if not m:
@@ -472,21 +596,39 @@ def parse_file_control(source: str) -> dict:
fc = m.group(1)
result = {}
for sel_m in re.finditer(
r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+(["\'])(.*?)\2',
r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+'
r'(?:(["\'])(.*?)\2|(\w[\w-]*))'
r'[^.]*\.',
fc, re.IGNORECASE
):
fname = sel_m.group(1).upper()
assign_to = sel_m.group(3).upper()
# Extract ORGANIZATION clause within this SELECT statement
org_m = re.search(
r'ORGANIZATION\s+(?:IS\s+)?(\w[\w-]*)',
sel_m.group(0), re.IGNORECASE
)
org = org_m.group(1).upper() if org_m else None
result[fname] = {
"assign_to": assign_to,
"organization": org,
}
name = sel_m.group(1).upper()
if sel_m.group(2):
assign_to = sel_m.group(3).upper()
else:
assign_to = sel_m.group(4).upper()
clause = sel_m.group(0)
org_m = re.search(r'ORGANIZATION\s+(LINE\s+)?SEQUENTIAL', clause, re.IGNORECASE)
if org_m and org_m.group(1):
org = 'LINE SEQUENTIAL'
elif org_m:
org = 'SEQUENTIAL'
else:
org = 'SEQUENTIAL'
result[name] = {'assign': assign_to, 'organization': org, 'recording_mode': 'F'}
# Extract RECORDING MODE from FD blocks in FILE SECTION
fd_sec_m = re.search(r'FILE\s+SECTION\.(.*?)(?=WORKING-STORAGE\s+SECTION|LINKAGE\s+SECTION|\Z)',
source, re.DOTALL | re.IGNORECASE)
if fd_sec_m:
fs = fd_sec_m.group(1)
for block in re.split(r'\n\s*(?=FD\s+)', fs.strip()):
fd_m = re.match(r'FD\s+(\w[\w-]*)', block, re.IGNORECASE)
if not fd_m:
continue
fd_name = fd_m.group(1).upper()
if fd_name in result:
rm_m = re.search(r'RECORDING\s+MODE\s+IS\s+(\w)', block, re.IGNORECASE)
if rm_m:
result[fd_name]['recording_mode'] = rm_m.group(1).upper()
return result
@@ -499,14 +641,12 @@ def parse_file_section(source: str) -> dict:
fs = m.group(1)
result = {}
# FD 和 SD 条目
blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
for block in blocks:
fd_blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
for block in fd_blocks:
m = re.match(r'(FD|SD)\s+(\w[\w-]*)', block, re.IGNORECASE)
if not m:
continue
entry_type = m.group(1).upper() # "FD" or "SD"
name = m.group(2).upper()
# 找 01 层记录
recs = re.findall(r'^\s*0{0,1}1\s+(\w[\w-]*)', block, re.MULTILINE)
result[name] = [r.upper() for r in recs]
return result
@@ -521,11 +661,15 @@ def scan_open_statements(source: str) -> dict:
source, re.IGNORECASE
):
full = m.group(1)
for seg_m in re.finditer(
r'(INPUT|OUTPUT|I-O)\s+([\w\s-]+)', full, re.IGNORECASE
):
direction = seg_m.group(1).upper()
for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
if fname.upper() not in ('INPUT', 'OUTPUT', 'I-O'):
full = re.sub(r'\s+', ' ', full)
tokens = re.split(r'\s+(?=(?:INPUT|OUTPUT|I-O)\s)', full)
for seg in tokens:
seg = seg.strip()
if not seg:
continue
seg_m = re.match(r'(INPUT|OUTPUT|I-O)\s+([\w -]+)', seg, re.IGNORECASE)
if seg_m:
direction = seg_m.group(1).upper()
for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
dirs[fname.upper()] = direction
return dirs