feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+4
View File
@@ -0,0 +1,4 @@
__pycache__/
.pytest_cache/
*.pyc
test_output/
+237 -18
View File
@@ -1,4 +1,12 @@
"""COBOL Test Data Generator — 模块化版入口"""
"""COBOL Test Data Generator — 模块化版入口
from __future__ import annotations
公开 API:
extract_structure() — 解析 COBOL 控制流 → dict
generate_data() — 生成测试数据 → list[dict]
incremental_supplement — 差分补充数据 → list[dict]
check_coverage() — 覆盖率报告 → dict
"""
import sys
import re
@@ -11,14 +19,25 @@ from pathlib import Path
CONFIG = {}
from .read import preprocess, extract_data_division, extract_procedure_division
from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements
from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements, parse_file_control
from .core import build_branch_tree, classify_field_roles, _init_child_names
from .cond import parse_single_condition, is_field
from .cond import parse_single_condition, is_field, collect_leaves
from .design import enum_paths, generate_records, _filter_stop
from .output import output_json, output_input_files
from .coverage import run_coverage, generate_coverage_index
from .coverage import run_coverage, generate_coverage_index, check_coverage
from japanese_data import generate_fullwidth_text, generate_halfwidth_katakana, generate_wareki_date
logger = logging.getLogger(__name__)
n__all__ = [
"extract_structure",
"generate_data",
"incremental_supplement",
"check_coverage",
"CONFIG",
"generate_fullwidth_text",
"generate_halfwidth_katakana",
"generate_wareki_date",
]
# ── OCCURS 展开 ──
@@ -118,7 +137,7 @@ def main():
fh = logging.FileHandler(log_path, encoding="utf-8", mode="w")
fh.setLevel(logging.DEBUG)
fh.setFormatter(logging.Formatter(
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
))
sh = logging.StreamHandler()
sh.setLevel(logging.INFO)
@@ -353,7 +372,7 @@ def extract_structure(cobol_source: str) -> dict:
file_sec = parse_file_section(preprocessed)
open_dir = scan_open_statements(proc_div) if proc_div else {}
from .models import BrIf, BrEval, BrSeq
from .models import BrIf, BrEval, BrSeq, BrPerform, Assign, CondAnd, CondOr
decision_points = []
total_branches = 0
@@ -395,19 +414,219 @@ def extract_structure(cobol_source: str) -> dict:
if m:
paragraphs.add(m.group(1))
# ── 新增字段: select_files ──
select_files = parse_file_control(preprocessed)
# ── 新增字段: open_directions_detail (与 open_directions 一致) ──
open_directions_detail = open_dir
# ── 新增字段: has_divide / has_inspect / has_string ──
has_divide = bool(re.search(r'\bDIVIDE\b', cobol_source.upper()))
has_inspect = bool(re.search(r'\bINSPECT\b', cobol_source.upper()))
has_string = bool(re.search(r'\bSTRING\b', cobol_source.upper()))
# ── 新增字段: divide_constants ──
divide_constants = []
if has_divide and proc_div:
for dm in re.finditer(r'\bDIVIDE\s+([\d.]+)\b', proc_div, re.IGNORECASE):
val = dm.group(1)
try:
divide_constants.append(float(val))
except ValueError:
pass
# ── 新增字段: perform_patterns ──
perform_patterns = []
def _walk_performs(node):
if isinstance(node, BrPerform):
entry = {
"type": node.perf_type,
"target": node.target,
"condition": node.condition,
"times": node.times,
"varying_var": node.varying_var,
}
perform_patterns.append(entry)
_walk_performs(node.body_seq)
elif isinstance(node, BrIf):
_walk_performs(node.true_seq)
_walk_performs(node.false_seq)
elif isinstance(node, BrEval):
for _, seq in node.when_list:
_walk_performs(seq)
_walk_performs(node.other_seq)
elif isinstance(node, BrSeq):
for c in node.children:
_walk_performs(c)
if branch_tree:
_walk_performs(branch_tree)
# ── 新增字段: main_loop ──
main_loop = None
def _find_main_loop(node, depth=0):
nonlocal main_loop
if main_loop is not None:
return
if isinstance(node, BrPerform):
if _perform_has_read(node):
main_loop = {
"type": node.perf_type,
"read_file": _perform_read_file(node),
"has_at_end": False,
}
return
_find_main_loop(node.body_seq, depth + 1)
elif isinstance(node, BrIf):
_find_main_loop(node.true_seq, depth + 1)
_find_main_loop(node.false_seq, depth + 1)
elif isinstance(node, BrEval):
for _, seq in node.when_list:
_find_main_loop(seq, depth + 1)
_find_main_loop(node.other_seq, depth + 1)
elif isinstance(node, BrSeq):
for c in node.children:
_find_main_loop(c, depth + 1)
def _perform_has_read(perf_node):
def _walk_seq(seq):
if isinstance(seq, Assign):
if seq.source_info.get('type') == 'read_into':
return True
elif isinstance(seq, BrSeq):
for ch in seq.children:
if _walk_seq(ch):
return True
return False
return _walk_seq(perf_node.body_seq)
def _perform_read_file(perf_node):
def _walk_seq(seq):
if isinstance(seq, Assign):
if seq.source_info.get('type') == 'read_into':
return seq.source_info.get('file', '')
elif isinstance(seq, BrSeq):
for ch in seq.children:
result = _walk_seq(ch)
if result:
return result
return None
return _walk_seq(perf_node.body_seq)
if branch_tree:
_find_main_loop(branch_tree)
# ── 新增字段: if_types ──
if_types = {"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0}
def _walk_if_types(node, depth=0):
if isinstance(node, BrIf):
if_types["total"] += 1
if_types["nested_depth"] = max(if_types["nested_depth"], depth)
ct = node.cond_tree
if ct:
leaves = collect_leaves(ct)
# Check compound: cond_tree is CondAnd or CondOr (not just CondLeaf)
if isinstance(ct, (CondAnd, CondOr)):
if_types["compound"] += 1
for leaf in leaves:
if leaf.op in ('>', '<', '>=', '<='):
if_types["comparison"] += 1
elif leaf.op in ('=', '<>'):
if_types["equality"] += 1
_walk_if_types(node.true_seq, depth + 1)
_walk_if_types(node.false_seq, depth + 1)
elif isinstance(node, BrEval):
for _, seq in node.when_list:
_walk_if_types(seq, depth + 1)
_walk_if_types(node.other_seq, depth + 1)
elif isinstance(node, BrPerform):
_walk_if_types(node.body_seq, depth + 1)
elif isinstance(node, BrSeq):
for c in node.children:
_walk_if_types(c, depth + 1)
if branch_tree:
_walk_if_types(branch_tree)
# ── 新增字段: variable_patterns ──
variable_patterns = {
"has_prev_key": False,
"has_accumulator": False,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": False,
"has_work": False,
}
for f in fields_dict:
name = f.get('name', '')
if re.search(r'\bWS-PREV[-_]', name, re.IGNORECASE):
variable_patterns["has_prev_key"] = True
if re.search(r'[-_]CNT\b', name, re.IGNORECASE) or re.search(r'[-_]ACCUM\b', name, re.IGNORECASE):
variable_patterns["has_accumulator"] = True
if re.search(r'[-_]ERR\b', name, re.IGNORECASE) or re.search(r'[-_]ERROR[-_]', name, re.IGNORECASE):
variable_patterns["has_error_flag"] = True
if re.search(r'[-_]SW\b', name, re.IGNORECASE) or re.search(r'[-_]FLAG\b', name, re.IGNORECASE):
variable_patterns["has_switch"] = True
if re.search(r'[-_]IDX\b', name, re.IGNORECASE) or re.search(r'[-_]INDX\b', name, re.IGNORECASE) or re.search(r'[-_]SUB\b', name, re.IGNORECASE):
variable_patterns["has_index"] = True
if re.search(r'[-_]SAVE[-_]', name, re.IGNORECASE) or re.search(r'[-_]HOLD[-_]', name, re.IGNORECASE):
variable_patterns["has_save_area"] = True
if re.search(r'[-_]CNT\b', name, re.IGNORECASE) or re.search(r'[-_]COUNT\b', name, re.IGNORECASE):
variable_patterns["has_counter"] = True
if name.startswith('WS-') and not re.search(r'(?:CNT|ERR|SW|IDX|INDX|SUB|SAVE|HOLD|PREV|ACCUM)', name, re.IGNORECASE):
if re.search(r'[-_]W\b|[-_]WORK\b|[-_]WK\b|^WS-W[0O]\w', name, re.IGNORECASE):
variable_patterns["has_work"] = True
# ── 新增字段: open_pattern ──
open_pattern = "sequential"
if proc_div:
proc_upper = proc_div.upper()
open_positions = [m.start() for m in re.finditer(r'\bOPEN\b', proc_upper)]
close_positions = [m.start() for m in re.finditer(r'\bCLOSE\b', proc_upper)]
if open_positions and close_positions:
# Check OPEN ... CLOSE ... OPEN sequence
for i, opos in enumerate(open_positions):
for cpos in close_positions:
if cpos > opos:
for opos2 in open_positions:
if opos2 > cpos:
open_pattern = "open-close-open"
break
if open_pattern == "open-close-open":
break
if open_pattern == "open-close-open":
break
return {
"paragraphs": sorted(paragraphs) if paragraphs else [],
"decision_points": decision_points,
"branch_tree": branch_tree,
"file_count": len(file_sec) if file_sec else 0,
"open_directions": open_dir,
"has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
"has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
"has_call": 'CALL' in cobol_source.upper(),
"has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
"total_branches": total_branches,
"total_paragraphs": len(paragraphs),
"branch_tree_obj": branch_tree,
"paragraphs": sorted(paragraphs) if paragraphs else [],
"decision_points": decision_points,
"branch_tree": branch_tree,
"file_count": len(file_sec) if file_sec else 0,
"open_directions": open_dir,
"has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
"has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
"has_call": 'CALL' in cobol_source.upper(),
"has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
"total_branches": total_branches,
"total_paragraphs": len(paragraphs),
"branch_tree_obj": branch_tree,
# ── 新增 8 类结构特征 ──
"select_files": select_files,
"open_directions_detail": open_directions_detail,
"has_divide": has_divide,
"divide_constants": divide_constants,
"has_inspect": has_inspect,
"has_string": has_string,
"perform_patterns": perform_patterns,
"main_loop": main_loop,
"if_types": if_types,
"variable_patterns": variable_patterns,
"open_pattern": open_pattern,
}
+4
View File
@@ -0,0 +1,4 @@
"""允许 python -m cobol_testgen 直接运行"""
from cobol_testgen import main
main()
+258
View File
@@ -0,0 +1,258 @@
"""条件层:COBOL条件表达式解析 + MC/DC枚举 + 约束合并"""
import re
from .models import CondLeaf, CondAnd, CondOr, CondNot, PicInfo
# ── 条件解析 ──
def _split_at_operator(text, operator):
"""Split text on operator word, respecting parentheses."""
result = []
current = []
depth = 0
# Normalize so parentheses are space-delimited tokens
normalized = text.replace('(', ' ( ').replace(')', ' ) ')
for token in normalized.split():
if not token:
continue
if token == '(':
depth += 1
current.append(token)
elif token == ')':
depth -= 1
current.append(token)
elif token == operator and depth == 0:
result.append(' '.join(current).strip())
current = []
else:
current.append(token)
result.append(' '.join(current).strip())
return result
def parse_single_condition(text, fields=None):
"""Parse 'AMOUNT > 1000' into ('AMOUNT', '>', '1000').
Also handles subscripted fields: 'WS-ITEM(SUB) = 'A''.
Also resolves 88-level condition names (e.g. STATUS-APPROVED → WS-TRAN-STATUS = 'A').
Returns None if the condition contains AND/OR (compound).
"""
if ' AND ' in text or ' OR ' in text:
return None
# Check if text is an 88-level condition name
if fields:
for f in fields:
if f.get('is_88') and f['name'] == text.upper():
return (f.get('parent', ''), '=', f.get('value', ''))
m = re.match(
r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
text
)
if m:
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1))
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
# Try arithmetic expression: e.g. A + B > C
m = re.match(
r"^(\w[\w\s+\-*/().-]+?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
text
)
if m:
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)).strip()
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
return None
def parse_compound_condition(text, fields=None):
"""Parse a COBOL condition into a condition tree (AND/OR/LEAF).
Handles AND > OR precedence and parentheses.
"""
text = text.strip()
if not text:
return None
# Normalize parentheses to be space-delimited for reliable tokenization
text = text.replace('(', ' ( ').replace(')', ' ) ')
text = re.sub(r'\s+', ' ', text).strip()
# Strip outer parentheses
if text.startswith('(') and text.endswith(')'):
depth = 0
wrapped = True
for i, c in enumerate(text):
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0 and i < len(text) - 1:
wrapped = False
break
if wrapped:
inner = parse_compound_condition(text[1:-1], fields)
if inner:
return inner
# Split on OR (lowest precedence)
parts = _split_at_operator(text, 'OR')
if len(parts) > 1:
node = parse_compound_condition(parts[0], fields)
for p in parts[1:]:
node = CondOr(node, parse_compound_condition(p, fields))
return node
# Split on AND
parts = _split_at_operator(text, 'AND')
if len(parts) > 1:
node = parse_compound_condition(parts[0], fields)
for p in parts[1:]:
node = CondAnd(node, parse_compound_condition(p, fields))
return node
# NOT prefix (highest precedence, after AND/OR splitting)
if text.upper().startswith('NOT '):
inner = parse_compound_condition(text[4:].strip(), fields)
return CondNot(inner) if inner else None
# Leaf condition
parsed = parse_single_condition(text, fields)
if parsed:
return CondLeaf(*parsed)
return None
def collect_leaves(tree):
"""Return list of all CondLeaf nodes in the tree."""
if isinstance(tree, CondLeaf):
return [tree]
elif isinstance(tree, CondNot):
return collect_leaves(tree.child)
elif isinstance(tree, (CondAnd, CondOr)):
return collect_leaves(tree.left) + collect_leaves(tree.right)
return []
def evaluate_tree(tree, assignment):
"""Evaluate condition tree given leaf→bool assignment dict."""
if isinstance(tree, CondLeaf):
return assignment[tree]
elif isinstance(tree, CondNot):
return not evaluate_tree(tree.child, assignment)
elif isinstance(tree, CondAnd):
return evaluate_tree(tree.left, assignment) and evaluate_tree(tree.right, assignment)
elif isinstance(tree, CondOr):
return evaluate_tree(tree.left, assignment) or evaluate_tree(tree.right, assignment)
return False
def is_field(name, fields):
# Strip subscript: WS-ITEM-STATUS(WS-INDEX-VAR) -> WS-ITEM-STATUS
bare = re.sub(r'\s*\(.*\)\s*$', '', name).strip()
for f in fields:
if f['name'] == bare.upper():
return True
return False
# ── MC/DC ──
def mcdc_sets(tree, fields=None):
"""Generate MC/DC constraint sets.
Returns list of (constraints_list, decision_outcome) or None for simple conditions.
Each constraint is (field, op, value, want_true).
"""
leaves = collect_leaves(tree)
n = len(leaves)
if n <= 1:
return None
# Evaluate all 2^n truth assignments
all_results = []
for bits in range(1 << n):
assignment = {}
for i, leaf in enumerate(leaves):
assignment[leaf] = bool(bits & (1 << i))
result = evaluate_tree(tree, assignment)
all_results.append((assignment, result))
# For each leaf, find a pair showing independent effect on decision
needed_pairs = {}
for leaf in leaves:
for a1, r1 in all_results:
if leaf in needed_pairs:
break
for a2, r2 in all_results:
if a1[leaf] != a2[leaf] and r1 != r2:
if all(a1[o] == a2[o] for o in leaves if o != leaf):
needed_pairs[leaf] = (dict(a1), r1, dict(a2), r2)
break
# Convert leaf assignments to constraint tuples
result = []
added = set()
for leaf, (a1, r1, a2, r2) in needed_pairs.items():
for assignment, decision in [(a1, r1), (a2, r2)]:
key = frozenset((l, assignment[l]) for l in leaves)
if key not in added:
added.add(key)
constraints = []
for l in leaves:
want = assignment[l]
constraints.append((l.field, l.op, l.value, want))
result.append((constraints, decision))
return result
# ── 值计算 ──
def satisfying_value(field_info: dict, operator: str, value, want_true: bool) -> str:
ftype = field_info.get('type', 'unknown')
digits = field_info.get('digits', 0)
decimal = field_info.get('decimal', 0)
total = digits + decimal
if ftype == 'numeric':
try:
val_str = str(value)
val_float = float(val_str)
val_int = int(val_float * (10 ** decimal) + 0.5)
except (ValueError, TypeError):
val_int = 0
if want_true:
if operator == '>':
val_int = val_int + 1
elif operator in ('>=', '=', '<='):
pass
elif operator == '<':
val_int = max(0, val_int - 1)
elif operator == '<>':
val_int = (val_int + 1) % (10 ** total)
else:
if operator in ('>', '>='):
val_int = 0
elif operator == '=':
val_int = (val_int + 1) % (10 ** total)
elif operator == '<':
pass
elif operator == '<=':
val_int = val_int + 1
elif operator == '<>':
pass
val_int = val_int % (10 ** total)
int_part = str(val_int // (10 ** decimal)).zfill(digits)
dec_part = str(val_int % (10 ** decimal)).zfill(decimal)
if decimal == 0:
return int_part
return int_part + dec_part
elif ftype in ('alphanumeric', 'alphabetic'):
length = field_info.get('length', 1)
base_chr = value[0].upper() if isinstance(value, str) and value else 'A'
if want_true:
if operator in ('=', '=='):
return base_chr.ljust(length, base_chr)
elif operator in ('<>', '!='):
other = chr(65 + (ord(base_chr) - 64) % 26)
return other.ljust(length, other)
else:
if operator in ('=', '=='):
other = chr(65 + (ord(base_chr) - 64) % 26)
return other.ljust(length, other)
elif operator in ('<>', '!='):
return base_chr.ljust(length, base_chr)
return '0'.zfill(total)
+1649
View File
@@ -0,0 +1,1649 @@
"""核心层:PROCEDURE DIVISION解析 + 数据流追踪"""
import re
import logging
from datetime import datetime
from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
from .cond import parse_compound_condition, parse_single_condition, collect_leaves
logger = logging.getLogger(__name__)
_COBOL_SCOPE_ENDERS = {
'END-IF', 'END-EVALUATE', 'END-PERFORM', 'END-EXEC', 'END-CALL',
'END-READ', 'END-WRITE', 'END-DELETE', 'END-REWRITE', 'END-START',
'END-SEARCH',
'ELSE', 'WHEN', 'OTHER',
}
def scan_paragraphs(raw_lines):
paragraphs = {}
i = 0
while i < len(raw_lines):
line = raw_lines[i].strip()
m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
name = m.group(1)
elif sec_m:
name = sec_m.group(1).upper()
else:
i += 1
continue
start = i + 1
j = i + 1
while j < len(raw_lines):
nline = raw_lines[j].strip()
nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
break
if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
break
j += 1
paragraphs[name] = (start, j - 1)
i = j
return paragraphs
def build_branch_tree(proc_text, fields=None):
raw_lines = proc_text.split('\n')
paragraphs = scan_paragraphs(raw_lines)
first_para_name = None
first_para_idx = None
for i, line in enumerate(raw_lines):
clean = line.strip()
m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', clean)
if m and m.group(1) in paragraphs:
first_para_name = m.group(1)
first_para_idx = i
break
if first_para_name:
before = raw_lines[:first_para_idx]
has_code = any(
l.strip() and 'PROCEDURE DIVISION' not in l
for l in before
)
if has_code:
main_raw = raw_lines[:first_para_idx]
else:
p_start, p_end = paragraphs[first_para_name]
main_raw = raw_lines[p_start:p_end + 1]
else:
main_raw = raw_lines
filtered = [l for l in main_raw if l.strip()]
assignments = {}
parser = _BrParser(filtered, paragraphs, raw_lines, assignments, fields)
tree = parser.parse_seq(terminators={'GOBACK', 'STOP RUN', 'EXIT PROGRAM'})
return tree, assignments
# ── 定数 ──
_FIGURATIVE_CONSTANTS = frozenset({
'ZERO', 'ZEROS', 'ZEROES',
'SPACE', 'SPACES',
'HIGH-VALUE', 'HIGH-VALUES',
'LOW-VALUE', 'LOW-VALUES',
})
# ── _BrParser ──
class _BrParser:
def __init__(self, lines, paragraphs=None, raw_lines=None, assignments=None, fields=None, goto_depth=0):
self.lines = lines
self.pos = 0
self.paragraphs = paragraphs or {}
self.raw_lines = raw_lines or lines
# assignments is a dict[str, list[dict]] — append, never overwrite
self.assignments = assignments if assignments is not None else {}
self.fields = fields
self._goto_depth = goto_depth
def peek(self):
if self.pos < len(self.lines):
return self.lines[self.pos].strip()
return ''
def clean(self):
return self.peek().rstrip('.').strip()
def advance(self):
self.pos += 1
def parse_seq(self, end_tokens=None, end_check=None, terminators=None):
if end_tokens is None:
end_tokens = []
seq = BrSeq()
while self.pos < len(self.lines):
line = self.clean()
if self._is_end(line, end_tokens, end_check):
return seq
if terminators and line in terminators:
self.advance()
return seq
m_goto = re.match(r'^GO\s+TO\s+(\w[\w-]*)\s*$', line)
if m_goto:
goto_node = self._parse_goto(m_goto.group(1))
if goto_node:
seq.add(goto_node)
while self.pos < len(self.lines):
cl = self.clean()
if self._is_end(cl, end_tokens, end_check):
break
if cl in _COBOL_SCOPE_ENDERS:
break
self.advance()
return seq
m_exit = re.match(r'^EXIT\s+(PARAGRAPH|PERFORM|SECTION)\s*$', line)
if m_exit:
self.advance()
seq.add(ExitNode(m_exit.group(1)))
while self.pos < len(self.lines):
cl = self.clean()
if self._is_end(cl, end_tokens, end_check):
break
if cl in _COBOL_SCOPE_ENDERS:
break
self.advance()
return seq
m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
if m:
seq.add(self._parse_if())
continue
m = re.match(r'^EVALUATE\s+(.+?)\s*$', line)
if m:
seq.add(self._parse_evaluate())
continue
m = re.match(r'^PERFORM\s+', line)
if m:
perf_node = self._parse_perform()
if perf_node:
seq.add(perf_node)
continue
m_search = re.match(r'^SEARCH\b(?:\s+(ALL))?\s+(\w[\w-]*)(?:\s+VARYING\s+(\w[\w-]*))?', line, re.IGNORECASE)
if m_search:
seq.add(self._parse_search(m_search))
continue
m = re.match(r'^INITIALIZE\s+', line)
if m:
init_seq = self._parse_initialize()
if init_seq:
seq.add(init_seq)
continue
m_str = re.match(r'^STRING\s+', line)
if m_str:
str_seq = self._parse_string()
if str_seq:
seq.add(str_seq)
continue
m_unstr = re.match(r'^UNSTRING\s+', line)
if m_unstr:
unstr_seq = self._parse_unstring()
if unstr_seq:
seq.add(unstr_seq)
continue
m = re.match(r'^CALL\s+', line)
if m:
seq.add(self._parse_call())
continue
m = re.match(
r'^ACCEPT\s+(\w[\w-]*)(?:\s+FROM\s+(DATE|TIME|DAY|DAY-OF-WEEK|YEAR|YYYYMMDD|HHMMSS))?\s*$',
line, re.IGNORECASE
)
if m:
tgt = m.group(1).strip().upper()
from_type = (m.group(2) or 'USER').upper()
info = {'type': 'accept', 'from': from_type}
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
self.advance()
continue
m = re.match(r'^READ\s+(\w[\w-]*)\s+INTO\s+(\w[\w-]*)\s*$', line, re.IGNORECASE)
if m:
tgt = m.group(2).strip().upper()
info = {'type': 'read_into', 'file': m.group(1).strip().upper(), 'source_vars': []}
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
self.advance()
# 跳过 READ 语句剩余行(AT END / NOT AT END / END-READ
while self.pos < len(self.lines):
cl = self.clean()
if cl in ('END-READ', 'END-READ.'):
self.advance()
break
self.advance()
continue
m_set_false = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+FALSE\s*$', line, re.IGNORECASE)
if m_set_false:
seq.add(self._parse_set_false(m_set_false.group(1)))
continue
m = re.match(r'^(?:WRITE|REWRITE)\s+(\w[\w-]*)(?:\s+FROM\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
if m:
rec_name = m.group(1).strip().upper()
if m.group(2):
tgt = m.group(2).strip().upper()
info = {'type': 'write_from', 'file': rec_name, 'source_vars': [tgt]}
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
else:
seq.add(Assign(rec_name, {'type': 'write_bare', 'file': rec_name}))
self.advance()
continue
m_set = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+TRUE\s*$', line, re.IGNORECASE)
if m_set:
seq.add(self._parse_set_true(m_set.group(1)))
continue
m_insp = re.match(r'^INSPECT\s+', line, re.IGNORECASE)
if m_insp:
info = self._parse_inspect(line)
if info:
tgt = info.get('tgt', '')
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
self.advance()
continue
assign_node = self._record_assignment(line)
if assign_node:
seq.add(assign_node)
self.advance()
return seq
def _is_end(self, line, end_tokens, end_check):
if end_check and end_check(line):
return True
for tok in end_tokens:
if line == tok or line.startswith(tok + ' '):
return True
return False
# ── INSPECT ──
_PIC_FIG_CONV = {'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
'SPACE': ' ', 'SPACES': ' '}
@staticmethod
def _expand_figurative(val):
if val.upper() in _BrParser._PIC_FIG_CONV:
return _BrParser._PIC_FIG_CONV[val.upper()]
return val
def _parse_inspect_phrase(self, phrase):
m = re.match(
r'TALLYING\s+(\w[\w-]*)\s+FOR\s+'
r'(LEADING|TRAILING|CHARACTERS)'
r'(?:\s+([\'"])(.*?)\3)?'
r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\6)?\s*$',
phrase, re.IGNORECASE
)
if m:
return ('tally', {
'count_var': m.group(1).upper(),
'kind': m.group(2).upper(),
'char': self._expand_figurative(m.group(4) or ''),
'before_after': (m.group(5) or '').upper(),
'delimiter': self._expand_figurative(m.group(7) or ''),
})
m = re.match(
r'REPLACING\s+'
r'(ALL|LEADING|FIRST|CHARACTERS)\s+'
r'([\'"])(.*?)\2\s+BY\s+'
r'([\'"])(.*?)\4'
r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\7)?\s*$',
phrase, re.IGNORECASE
)
if m:
return ('replace', {
'kind': m.group(1).upper(),
'src': self._expand_figurative(m.group(3)),
'dst': self._expand_figurative(m.group(5)),
'before_after': (m.group(6) or '').upper(),
'delimiter': self._expand_figurative(m.group(8) or ''),
})
m = re.match(
r'CONVERTING\s+([\'"])(.*?)\1\s+TO\s+([\'"])(.*?)\3\s*$',
phrase, re.IGNORECASE
)
if m:
return ('convert', {
'from_chars': self._expand_figurative(m.group(2)),
'to_chars': self._expand_figurative(m.group(4)),
})
return None
def _parse_inspect(self, line):
m = re.match(r'^INSPECT\s+(\w[\w-]*)\s+(.+)$', line, re.IGNORECASE)
if not m:
return None
tgt = m.group(1).upper()
rest = m.group(2).strip()
phrases = re.split(r'\s+(?=(?:TALLYING|REPLACING|CONVERTING)\b)', rest, flags=re.IGNORECASE)
sub_ops = []
for phrase in phrases:
sub = self._parse_inspect_phrase(phrase.strip())
if sub:
sub_ops.append(sub)
if not sub_ops:
return None
return {
'type': 'inspect',
'tgt': tgt,
'source_vars': [tgt],
'sub_ops': sub_ops,
}
def _record_assignment(self, line):
if self.assignments is None:
return None
# MOVE
m = re.match(r'^MOVE\s+(.+?)\s+TO\s+(.+?)\s*$', line)
if m:
raw_src = m.group(1).strip()
tgt = m.group(2).strip()
# 保留下标:WS-CODE-VAL(1) → key='WS-CODE-VAL(1)'
m_tgt = re.match(r'^([A-Z][A-Z0-9-]*)(?:\s*\(([^)]*)\))?\s*$', tgt, re.IGNORECASE)
if not m_tgt:
return None
tgt_base = m_tgt.group(1).upper()
if m_tgt.group(2):
subscript = re.sub(r'\s*', '', m_tgt.group(2))
tgt_key = f"{tgt_base}({subscript})"
else:
tgt_key = tgt_base
src_clean = raw_src.strip("'").strip('"')
is_field_name = self.fields and any(f['name'] == src_clean for f in self.fields)
if is_field_name:
info = {'type': 'move', 'source_vars': [src_clean]}
else:
info = {'type': 'move_literal', 'literal': src_clean}
self.assignments.setdefault(tgt_key, []).append(info)
return Assign(tgt_key, info)
# COMPUTE
m = re.match(r'^COMPUTE\s+(.+?)(?:\s+ROUNDED)?\s*=\s*(.*)$', line)
if m:
tgt_raw = m.group(1).strip()
expr = m.group(2).strip()
m_tgt = re.match(r'^([A-Z][A-Z0-9-]*)(?:\s*\(([^)]*)\))?\s*$', tgt_raw, re.IGNORECASE)
tgt_key = tgt_raw
if m_tgt:
tgt_base = m_tgt.group(1).upper()
if m_tgt.group(2):
subscript = re.sub(r'\s*', '', m_tgt.group(2))
tgt_key = f"{tgt_base}({subscript})"
else:
tgt_key = tgt_base
if not expr:
peek_pos = self.pos + 1
if peek_pos < len(self.lines):
nxt = self.lines[peek_pos].strip().rstrip('.').strip()
if nxt and not re.match(r'^(PERFORM|END-|IF|ELSE|EVALUATE|WHEN|OTHER|MOVE|COMPUTE|ADD|SUBTRACT|MULTIPLY|DIVIDE|STRING|UNSTRING|READ|WRITE|INITIALIZE|ACCEPT|CALL|GO\s*TO|GOBACK|STOP|EXIT)', nxt, re.IGNORECASE):
expr = nxt
if expr:
info = self._parse_compute_expr(tgt_key, expr)
self.assignments.setdefault(tgt_key, []).append(info)
return Assign(tgt_key, info)
# ADD x TO y → y = y + x (支持变量和常量源)
m = re.match(r'^ADD\s+(\w[\w-]*)\s+TO\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
if m:
src = m.group(1).strip()
tgt = m.group(2).strip()
is_field = self.fields and any(f['name'] == src for f in self.fields)
if is_field:
info = {'type': 'compute', 'source_vars': [tgt, src],
'op': '+', 'const': None, 'expr': f'{tgt} + {src}'}
else:
try:
const = float(src)
info = {'type': 'compute', 'source_vars': [tgt],
'op': '+', 'const': const, 'expr': f'{tgt} + {const}'}
except ValueError:
return None
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# ADD x TO y GIVING z → z = y + x
m = re.match(r'^ADD\s+(.+?)\s+TO\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
if m:
raw_a = m.group(1).strip()
src_b = m.group(2).strip()
tgt = m.group(3).strip()
is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
if is_field_a:
info = {'type': 'compute', 'source_vars': [src_b, raw_a],
'op': '+', 'const': None, 'expr': f'{src_b} + {raw_a}'}
else:
try:
const = float(raw_a)
info = {'type': 'compute', 'source_vars': [src_b],
'op': '+', 'const': const, 'expr': f'{src_b} + {const}'}
except ValueError:
return None
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# ADD a[, b[, c...]] GIVING z → z = a + b + c + ...
m = re.match(r'^ADD\s+(.+?)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
if m:
raw_parts = re.findall(r'[A-Z][A-Z0-9-]*|\d+(?:\.\d+)?', m.group(1).upper())
fields_only = []
const_sum = 0.0
for p in raw_parts:
if self.fields and any(f['name'] == p for f in self.fields):
fields_only.append(p)
else:
try:
const_sum += float(p)
except ValueError:
pass
tgt = m.group(2).strip()
if not fields_only:
info = {'type': 'move_literal',
'literal': str(int(const_sum)) if const_sum == int(const_sum) else str(const_sum)}
else:
info = {'type': 'compute', 'source_vars': fields_only,
'op': '+', 'const': const_sum if const_sum != 0 else None,
'expr': '+'.join(fields_only) + (f' + {const_sum}' if const_sum else '')}
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# SUBTRACT x FROM y → y = y - x
m = re.match(r'^SUBTRACT\s+([\d.]+)\s+FROM\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
if m:
const = float(m.group(1))
tgt = m.group(2).strip()
info = {'type': 'compute', 'source_vars': [tgt],
'op': '-', 'const': const, 'expr': f'{tgt} - {const}'}
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# SUBTRACT a FROM b GIVING z → z = b - a
m = re.match(r'^SUBTRACT\s+([\d.\w-]*)\s+FROM\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
if m:
raw_a = m.group(1).strip()
src_b = m.group(2).strip()
tgt = m.group(3).strip()
is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
if is_field_a:
info = {'type': 'compute', 'source_vars': [src_b, raw_a],
'op': '-', 'const': None, 'expr': f'{src_b} - {raw_a}'}
else:
try:
const = float(raw_a)
info = {'type': 'compute', 'source_vars': [src_b],
'op': '-', 'const': const, 'expr': f'{src_b} - {const}'}
except ValueError:
return None
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# MULTIPLY x BY y → y = y * x
m = re.match(r'^MULTIPLY\s+([\d.]+)\s+BY\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
if m:
const = float(m.group(1))
tgt = m.group(2).strip()
info = {'type': 'compute', 'source_vars': [tgt],
'op': '*', 'const': const, 'expr': f'{tgt} * {const}'}
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# MULTIPLY a BY b GIVING z → z = a * b
m = re.match(r'^MULTIPLY\s+(\w[\w-]*)\s+BY\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
if m:
src_a = m.group(1).strip()
src_b = m.group(2).strip()
tgt = m.group(3).strip()
is_field_a = self.fields and any(f['name'] == src_a for f in self.fields)
if is_field_a:
info = {'type': 'compute', 'source_vars': [src_a, src_b],
'op': '*', 'const': None, 'expr': f'{src_a} * {src_b}'}
else:
try:
const = float(src_a)
info = {'type': 'compute', 'source_vars': [src_b],
'op': '*', 'const': const, 'expr': f'{const} * {src_b}'}
except ValueError:
return None
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# DIVIDE x INTO y → y = y / x
m = re.match(r'^DIVIDE\s+([\d.]+)\s+INTO\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
if m:
const = float(m.group(1))
tgt = m.group(2).strip()
info = {'type': 'compute', 'source_vars': [tgt],
'op': '/', 'const': const, 'expr': f'{tgt} / {const}'}
self.assignments.setdefault(tgt, []).append(info)
return Assign(tgt, info)
# DIVIDE a INTO b GIVING z → z = b / a
# Optional REMAINDER r → r = b - (b / a) * a
m = re.match(r'^DIVIDE\s+(.+?)\s+INTO\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?(?:\s+REMAINDER\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
if m:
raw_a = m.group(1).strip()
src_b = m.group(2).strip()
tgt = m.group(3).strip()
rem_tgt = m.group(4).strip().upper() if m.group(4) else None
is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
if is_field_a:
info = {'type': 'compute', 'source_vars': [src_b, raw_a],
'op': '/', 'const': None, 'expr': f'{src_b} / {raw_a}'}
rem_info = {'type': 'compute', 'source_vars': [src_b, raw_a],
'op': 'rem', 'const': None, 'expr': f'REM({src_b} / {raw_a})'}
else:
try:
const = float(raw_a)
info = {'type': 'compute', 'source_vars': [src_b],
'op': '/', 'const': const, 'expr': f'{src_b} / {const}'}
rem_info = {'type': 'compute', 'source_vars': [src_b],
'op': 'rem', 'const': const, 'expr': f'REM({src_b} / {const})'}
except ValueError:
return None
self.assignments.setdefault(tgt, []).append(info)
seq = BrSeq()
seq.add(Assign(tgt, info))
if rem_tgt:
self.assignments.setdefault(rem_tgt, []).append(rem_info)
seq.add(Assign(rem_tgt, rem_info))
return seq
# DIVIDE a BY b GIVING z → z = a / b
# Optional REMAINDER r → r = a - (a / b) * b
m = re.match(r'^DIVIDE\s+(\w[\w-]*)\s+BY\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?(?:\s+REMAINDER\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
if m:
src_a = m.group(1).strip()
src_b = m.group(2).strip()
tgt = m.group(3).strip()
rem_tgt = m.group(4).strip().upper() if m.group(4) else None
info = {'type': 'compute', 'source_vars': [src_a, src_b],
'op': '/', 'const': None, 'expr': f'{src_a} / {src_b}'}
rem_info = {'type': 'compute', 'source_vars': [src_a, src_b],
'op': 'rem', 'const': None, 'expr': f'REM({src_a} / {src_b})'}
self.assignments.setdefault(tgt, []).append(info)
seq = BrSeq()
seq.add(Assign(tgt, info))
if rem_tgt:
self.assignments.setdefault(rem_tgt, []).append(rem_info)
seq.add(Assign(rem_tgt, rem_info))
return seq
return None
def _parse_compute_expr(self, target, expr):
# const OP var
m = re.match(r'^\s*([\d.]+)\s*([+\-*/])\s*(\w[\w-]*)\s*$', expr)
if m:
const, op, var = float(m.group(1)), m.group(2), m.group(3)
return {'type': 'compute', 'source_vars': [var], 'op': op, 'const': const, 'expr': expr}
# var OP const
m = re.match(r'^\s*(\w[\w-]*)\s*([+\-*/])\s*([\d.]+)\s*$', expr)
if m:
var, op, const = m.group(1), m.group(2), float(m.group(3))
return {'type': 'compute', 'source_vars': [var], 'op': op, 'const': const, 'expr': expr}
# var OP var
m = re.match(r'^\s*(\w[\w-]*)\s*([+\-*/])\s*(\w[\w-]*)\s*$', expr)
if m:
var1, op, var2 = m.group(1), m.group(2), m.group(3)
return {'type': 'compute', 'source_vars': [var1, var2], 'op': op, 'expr': expr}
# complex expression — extract variable names only
vars_in = re.findall(r'[A-Z][A-Z0-9-]*', expr.upper())
return {'type': 'compute', 'source_vars': list(set(vars_in)), 'op': None, 'const': None, 'expr': expr}
# ── SEARCH / SEARCH ALL ──
def _parse_search(self, m):
is_all = bool(m.group(1))
table = m.group(2).upper()
varying = m.group(3).upper() if m.group(3) else None
node = BrSearch(table, is_all=is_all, varying=varying)
self.advance()
while self.pos < len(self.lines):
line = self.clean()
if line in ('END-SEARCH', 'END-SEARCH.'):
self.advance()
return node
m_at = re.match(r'^AT\s+END(.+)?$', line, re.IGNORECASE)
if m_at:
self.advance()
rest = m_at.group(1)
if rest and rest.strip():
self.lines.insert(self.pos, rest.strip())
node.at_end_seq = self.parse_seq(
end_check=lambda l: re.match(r'^WHEN\b', l) or l in ('END-SEARCH',)
)
node.has_at_end = True
continue
m_when = re.match(r'^WHEN\s+(.+?)\s*$', line, re.IGNORECASE)
if m_when:
cond_upper = m_when.group(1).strip()
self.advance()
cond_tree = parse_compound_condition(cond_upper, self.fields)
body_seq = self.parse_seq(
end_check=lambda l: re.match(r'^(WHEN|AT\s+END)\b', l) or l in ('END-SEARCH',)
)
node.when_list.append((cond_upper, body_seq))
node.cond_trees.append(cond_tree)
continue
self.advance()
return node
def _parse_if(self):
line = self.clean()
m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
cond_text = m.group(1).strip()
self.advance()
# Join continuation lines (multi-line IF conditions)
while self.pos < len(self.lines):
peek = self.clean()
if re.match(r'^(THEN|ELSE|END-IF|MOVE|IF|PERFORM|EVALUATE|COMPUTE|CALL|STRING|UNSTRING|INITIALIZE|ADD|SUBTRACT|MULTIPLY|DIVIDE|GO\b|EXIT\b)', peek, re.IGNORECASE):
break
if peek.endswith('.'):
cond_text += ' ' + peek.rstrip('.')
self.advance()
break
cond_text += ' ' + peek
self.advance()
# Consume optional THEN on its own line
if self.pos < len(self.lines):
peek = self.clean()
if peek == 'THEN':
self.advance()
node = BrIf(cond_text)
node.cond_tree = parse_compound_condition(node.condition, self.fields)
node.true_seq = self.parse_seq(['ELSE', 'END-IF'])
if self.clean() == 'ELSE':
self.advance()
node.false_seq = self.parse_seq(['END-IF'])
if self.clean() == 'END-IF':
self.advance()
return node
def _parse_evaluate(self):
line = self.clean()
m = re.match(r'^EVALUATE\s+(.+?)\s*$', line)
raw_subject = m.group(1).strip()
node = BrEval(raw_subject)
if ' ALSO ' in raw_subject:
node.subjects = [s.strip() for s in re.split(r'\s+ALSO\s+', raw_subject)]
self.advance()
while self.pos < len(self.lines):
line = self.clean()
if line == 'END-EVALUATE':
self.advance()
return node
m = re.match(r'^WHEN\s+(.+?)\s*$', line)
if m:
raw_val = m.group(1).strip().strip("'").strip('"')
self.advance()
# Capture multi-line WHEN conditions (AND/OR continuation)
while self.pos < len(self.lines):
peek = self.clean()
if re.match(r'^(?:AND|OR)\b', peek, re.IGNORECASE):
raw_val += ' ' + peek
self.advance()
else:
break
if raw_val == 'OTHER':
node.other_seq = self.parse_seq(end_check=lambda l: l == 'END-EVALUATE')
node.has_other = True
else:
case_seq = self.parse_seq(end_check=lambda l: l.startswith('WHEN') or l == 'END-EVALUATE')
if node.subjects:
vals = [v.strip().strip("'").strip('"')
for v in re.split(r'\s+ALSO\s+', raw_val)]
node.when_list.append((vals, case_seq))
else:
node.when_list.append((raw_val, case_seq))
continue
self.advance()
return node
def _parse_perform(self):
line = self.clean()
m = re.match(r'^PERFORM\s+UNTIL\s+(.+?)\s*$', line)
if m:
node = BrPerform('until', condition=m.group(1).strip())
self.advance()
node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
if self.clean() == 'END-PERFORM':
self.advance()
return node
m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+UNTIL\s+(.+?)\s*$', line)
if m:
target = m.group(1).strip()
node = BrPerform('para_until', target=target, condition=m.group(2).strip())
self.advance()
self._inline_perform(node, target)
return node
m = re.match(r'^PERFORM\s+(\d+)\s+TIMES\s*$', line)
if m:
node = BrPerform('times', times=int(m.group(1)))
self.advance()
return node
m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+THRU\s+(\w[\w-]*)\s*$', line)
if m:
node = BrPerform('thru', target=m.group(1).strip(), thru=m.group(2).strip())
self.advance()
self._inline_perform(node, node.target, node.thru)
return node
m = re.match(r'^PERFORM\s+VARYING\s+(\w[\w-]*)\s+FROM\s+(\S+)\s+BY\s+(\S+)(?:\s+UNTIL\s+(.+))?\s*$', line)
if m:
varying_var = m.group(1).strip()
from_val = m.group(2).strip()
by_val = m.group(3).strip()
condition = m.group(4).strip() if m.group(4) else None
if not condition:
save_pos = self.pos
self.advance()
while self.pos < len(self.lines):
nxt = self.clean()
cm = re.match(r'^UNTIL\s+(.+)$', nxt)
if cm:
condition = cm.group(1).strip()
self.advance()
break
fm = re.match(r'^FROM\s+(\S+)\s+BY\s+(\S+)$', nxt)
if fm:
from_val = fm.group(1).strip()
by_val = fm.group(2).strip()
self.advance()
continue
self.pos = save_pos
break
if condition:
node = BrPerform('varying', condition=condition,
varying_var=varying_var,
varying_from=from_val,
varying_by=by_val)
# condition from regex (single-line) → advance past PERFORM line
# condition from while-loop (multi-line) → already advanced past FROM/BY/UNTIL
if m.group(4):
self.advance()
node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
if self.clean() == 'END-PERFORM':
self.advance()
return node
self.pos = save_pos
# PERFORM VARYING var — FROM/BY/UNTIL all on subsequent lines
m = re.match(r'^PERFORM\s+VARYING\s+(\w[\w-]*)\s*$', line)
if m:
varying_var = m.group(1).strip()
save_pos = self.pos
self.advance()
from_val = by_val = condition = None
while self.pos < len(self.lines):
nxt = self.clean()
fm = re.match(r'^FROM\s+(\S+)\s+BY\s+(\S+)$', nxt)
if fm:
from_val, by_val = fm.group(1).strip(), fm.group(2).strip()
self.advance()
continue
um = re.match(r'^UNTIL\s+(.+)$', nxt)
if um:
condition = um.group(1).strip()
self.advance()
break
break
if from_val and by_val and condition:
node = BrPerform('varying', condition=condition,
varying_var=varying_var,
varying_from=from_val,
varying_by=by_val)
node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
if self.clean() == 'END-PERFORM':
self.advance()
return node
self.pos = save_pos
m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+VARYING\s+(\w[\w-]*)\s+FROM\s+(\S+)\s+BY\s+(\S+)(?:\s+UNTIL\s+(.+))?\s*$', line)
if m:
target = m.group(1).strip()
varying_var = m.group(2).strip()
from_val = m.group(3).strip()
by_val = m.group(4).strip()
condition = m.group(5).strip() if m.group(5) else None
if not condition:
save_pos = self.pos
self.advance()
while self.pos < len(self.lines):
nxt = self.clean()
cm = re.match(r'^UNTIL\s+(.+)$', nxt)
if cm:
condition = cm.group(1).strip()
self.advance()
break
self.pos = save_pos
break
if condition:
node = BrPerform('para_varying', target=target,
condition=condition,
varying_var=varying_var,
varying_from=from_val,
varying_by=by_val)
self.advance()
self._inline_perform(node, node.target)
return node
self.pos = save_pos
m = re.match(r'^PERFORM\s+(\w[\w-]*)\s*$', line)
if m:
target = m.group(1).strip()
node = BrPerform('para', target=target)
self.advance()
self._inline_perform(node, target)
return node
self.advance()
return None
def _inline_perform(self, node, target, thru=None):
if thru:
if target in self.paragraphs and thru in self.paragraphs:
start = self.paragraphs[target][0]
end = self.paragraphs[thru][1]
all_lines = []
for name, (s, e) in self.paragraphs.items():
if s >= start and e <= end:
all_lines.extend(self.raw_lines[s:e + 1])
sub = _BrParser(
[l for l in all_lines if l.strip()],
self.paragraphs, self.raw_lines, self.assignments, self.fields
)
node.body_seq = sub.parse_seq()
elif target in self.paragraphs:
start, end = self.paragraphs[target]
para_lines = self.raw_lines[start:end + 1]
sub = _BrParser(
[l for l in para_lines if l.strip()],
self.paragraphs, self.raw_lines, self.assignments, self.fields
)
node.body_seq = sub.parse_seq()
def _parse_initialize(self):
line = self.clean()
m = re.match(r'^INITIALIZE\s+(.+?)\s*$', line)
if not m:
self.advance()
return None
rest = m.group(1).strip()
# Split off REPLACING clause
parts = re.split(r'\s+REPLACING\s+', rest, maxsplit=1, flags=re.IGNORECASE)
target_str = parts[0].strip()
targets = re.findall(r'[A-Z][A-Z0-9-]*', target_str)
# Parse REPLACING: (NUMERIC|ALPHANUMERIC|ALPHABETIC) DATA BY literal
replacing = {}
if len(parts) > 1:
pairs = re.findall(
r'(NUMERIC|ALPHANUMERIC-EDITED|NUMERIC-EDITED|ALPHANUMERIC|ALPHABETIC)\s+DATA\s+BY\s+(\S+)',
parts[1], re.IGNORECASE
)
for ptype, literal in pairs:
replacing[ptype.upper()] = literal.strip("'").strip('"')
seq = BrSeq()
for tgt in targets:
info = {'type': 'initialize'}
if replacing:
info['replacing'] = replacing
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
self.advance()
return seq
def _parse_string(self):
parts = [self.clean()]
self.advance()
while self.pos < len(self.lines):
cl = self.clean()
if cl == 'END-STRING':
self.advance()
break
parts.append(cl)
self.advance()
full = ' '.join(parts)
m = re.match(r'^STRING\s+(.+)\s+INTO\s+(\w[\w-]*)\s*$', full, re.IGNORECASE | re.DOTALL)
if not m:
return None
source_part = m.group(1).strip()
target = m.group(2).strip()
source_vars = re.findall(r'[A-Z][A-Z0-9-]*', source_part)
info = {'type': 'string_concat', 'source_vars': source_vars}
self.assignments.setdefault(target, []).append(info)
seq = BrSeq()
seq.add(Assign(target, info))
return seq
def _parse_unstring(self):
parts = [self.clean()]
self.advance()
while self.pos < len(self.lines):
cl = self.clean()
if cl == 'END-UNSTRING':
self.advance()
break
parts.append(cl)
self.advance()
full = ' '.join(parts)
m = re.match(r'^UNSTRING\s+(.+?)\s+INTO\s+(.+?)\s*$', full, re.IGNORECASE | re.DOTALL)
if not m:
return None
source_part = m.group(1).strip()
targets_part = m.group(2).strip()
source_vars = re.findall(r'[A-Z][A-Z0-9-]*', source_part)
targets = re.findall(r'[A-Z][A-Z0-9-]*', targets_part)
source_var = source_vars[0] if source_vars else ''
seq = BrSeq()
for tgt in targets:
info = {'type': 'unstring_split', 'source_vars': [source_var], 'index': targets.index(tgt)}
self.assignments.setdefault(tgt, []).append(info)
seq.add(Assign(tgt, info))
return seq
def _parse_call(self):
line = self.clean()
m = re.match(r'^CALL\s+(\S+?)(?:\s+USING\s+(.+))?\s*$', line)
if not m:
self.advance()
return BrSeq()
prog = m.group(1).strip("'\"").upper()
params = []
if m.group(2):
rest = m.group(2)
# 逐 segment 解析: BY mechanism names...
current = "reference" # COBOL 默认 BY REFERENCE
for seg in re.split(r'\s+(?=BY\s+(?:REFERENCE|CONTENT|VALUE)\s+)',
rest, flags=re.IGNORECASE):
seg = seg.strip()
m_mech = re.match(
r'BY\s+(REFERENCE|CONTENT|VALUE)\s+(.*)', seg, re.IGNORECASE
)
if m_mech:
current = m_mech.group(1).lower()
names_text = m_mech.group(2)
else:
names_text = seg
for nm in re.findall(r'\w[\w-]*', names_text):
params.append({"name": nm.upper(), "mechanism": current})
node = CallNode(prog, using_params=params)
self.advance()
return node
def _parse_goto(self, target):
node = GoTo(target)
if self._goto_depth < 10 and target in self.paragraphs:
start, end = self.paragraphs[target]
para_lines = self.raw_lines[start:end + 1]
sub = _BrParser(
[l for l in para_lines if l.strip()],
self.paragraphs, self.raw_lines, self.assignments, self.fields,
goto_depth=self._goto_depth + 1
)
node.body_seq = sub.parse_seq()
self.advance()
return node
def _parse_set_true(self, name):
name = name.upper()
parent = None
value = None
if self.fields:
for f in self.fields:
if f.get('is_88') and f['name'] == name:
parent = f.get('parent', '')
value = f.get('value', '')
break
info = {'type': 'set_true', '88_name': name, 'value': value}
tgt = parent or name
if parent:
self.assignments.setdefault(tgt, []).append(info)
self.advance()
return Assign(tgt, info)
def _parse_set_false(self, name):
name = name.upper()
parent = None
value = None
if self.fields:
for f in self.fields:
if f.get('is_88') and f['name'] == name:
parent = f.get('parent', '')
value = f.get('value', '')
break
# FALSE 值 = 88-level VALUE 的反值
if value:
false_val = 'N' if value == 'Y' else ('Y' if value == 'N' else ' ')
else:
false_val = 'N'
info = {'type': 'move_literal', 'literal': false_val}
tgt = parent or name
self.assignments.setdefault(tgt, []).append(info)
self.advance()
return Assign(tgt, info)
# ── 工具函数 ──
def _basename(name: str) -> str:
"""去除下标后缀,如 WS-TABLE(1) → WS-TABLE"""
return re.sub(r'\s*\(.*?\)\s*$', '', name).strip()
def _init_child_names(group_name: str, fields: list) -> list:
"""递归收集 group 下所有非 88 级子字段的扁平名列表"""
result = []
grp_level = None
found = False
for f in fields:
if not found and f['name'] == group_name:
grp_level = f.get('level', 0)
found = True
continue
if found:
if f.get('level', 0) <= grp_level or f.get('level') == 77:
break
if f.get('is_88') or f.get('redefines'):
continue
if not f.get('pic_info') or f['pic_info'].get('type') == 'unknown':
result.extend(_init_child_names(f['name'], fields))
else:
result.append(f['name'])
return result
# ── 数据流追踪 ──
def trace_to_root(field_name, assignments, fields, path_assign=None):
seen = set()
var = field_name
chain = []
while var in assignments and var not in seen:
seen.add(var)
if path_assign and var in path_assign:
asgn_list = path_assign[var]
if isinstance(asgn_list, list):
asgn = asgn_list[-1]
for a in reversed(asgn_list):
sv = a.get('source_vars', [])
if len(sv) == 1 and sv[0] == var:
continue
asgn = a
break
else:
asgn = asgn_list
else:
asgn_list = assignments[var]
asgn = asgn_list[-1]
if isinstance(asgn_list, list):
for a in reversed(asgn_list):
sv = a.get('source_vars', [])
if len(sv) == 1 and sv[0] == var:
continue
asgn = a
break
chain.append((var, asgn))
if not asgn.get('source_vars'):
break
sv = asgn['source_vars']
if len(sv) == 1:
next_var = sv[0]
if next_var == var:
break
var = next_var
if next_var not in assignments:
break
elif len(sv) >= 2 and asgn.get('op') == '+':
# 多源加法:取第一个源变量继续追溯
var = sv[0]
else:
break
return var, chain
def invert_through_chain(root_var, chain, operator, value):
op = operator
try:
val = float(value)
except (ValueError, TypeError):
return root_var, op, value
for var, asgn in reversed(chain):
if asgn['type'] == 'move':
continue
sv = asgn.get('source_vars', [])
if asgn['type'] == 'compute' and asgn['op'] is not None:
if len(sv) == 1:
c = asgn['const']
inv = {'+': '-', '-': '+', '*': '/', '/': '*'}[asgn['op']]
if inv == '/':
val = val / c if c != 0 else val
elif inv == '*':
val = val * c
elif inv == '-':
val = val - c
elif inv == '+':
val = val + c
elif len(sv) >= 2 and asgn['op'] == '+':
# 多源加法:追溯第一个源变量,值不变(忽略其他源)
pass
if val == int(val):
return root_var, op, str(int(val))
return root_var, op, str(val)
FIGURATIVE_NUMERIC = {
'ZERO': 0.0, 'ZEROS': 0.0, 'ZEROES': 0.0,
'SPACE': 0.0, 'SPACES': 0.0,
'HIGH-VALUE': None, 'HIGH-VALUES': None,
'LOW-VALUE': 0.0, 'LOW-VALUES': 0.0,
}
FIGURATIVE_ALPHA = {
'SPACE': ' ', 'SPACES': ' ',
'HIGH-VALUE': chr(255), 'HIGH-VALUES': chr(255),
'LOW-VALUE': chr(0), 'LOW-VALUES': chr(0),
}
def _resolve_subscript(key, rec):
"""将变量下标解析为具体值:WS-FIXED-KEY(WS-IDX) → WS-FIXED-KEY(1) if WS-IDX=1 in rec"""
m = re.match(r'^(\w[\w-]*)\((\w[\w-]*)\)$', key)
if m:
base, var = m.groups()
if var in rec:
try:
return f'{base}({int(rec[var])})'
except (ValueError, TypeError):
pass
return key
def _apply_before_after(val, before_after, delimiter):
if not delimiter:
return val
if before_after == 'BEFORE':
idx = val.find(delimiter)
return val[:idx] if idx >= 0 else val
if before_after == 'AFTER':
idx = val.find(delimiter)
return val[idx + len(delimiter):] if idx >= 0 else ''
return val
def propagate_assignments(rec, assignments, fields, file_sec=None):
def raw_to_float(val, pi):
if pi.get('type') == 'numeric':
digits = pi.get('digits', 0)
decimal = pi.get('decimal', 0)
total = digits + decimal
s = str(val)
neg = s.startswith('-')
if neg:
s = s[1:]
s = s.zfill(total)
int_part = s[:digits] if digits else '0'
dec_part = s[digits:] if decimal > 0 else '0'
result = float(int(int_part or '0') + int(dec_part or '0') / (10 ** decimal))
return -result if neg else result
try:
return float(val)
except (ValueError, TypeError):
return 0.0
def float_to_raw(val, pi):
if pi.get('type') == 'numeric':
digits = pi.get('digits', 0)
decimal = pi.get('decimal', 0)
signed = pi.get('signed', False)
scaled = int(round(val * (10 ** decimal)))
if not signed and scaled < 0:
scaled = 0
capped = abs(scaled) % (10 ** (digits + decimal))
int_part = str(capped // (10 ** decimal)).zfill(digits)
dec_part = str(capped % (10 ** decimal)).zfill(decimal)
result = int_part + (dec_part if decimal > 0 else '')
if signed and scaled < 0:
result = '-' + result
return result
return str(val)
def literal_to_raw(literal, pi):
ftype = pi.get('type', 'unknown')
if ftype == 'numeric':
key = literal.upper()
if key in FIGURATIVE_NUMERIC:
v = FIGURATIVE_NUMERIC[key]
if v is None:
digits = pi.get('digits', 0)
decimal = pi.get('decimal', 0)
v = 10 ** (digits + decimal) - 1
return float_to_raw(v, pi)
try:
return float_to_raw(float(literal), pi)
except ValueError:
return float_to_raw(0.0, pi)
if ftype in ('alphanumeric', 'alphabetic'):
key = literal.upper()
if key in FIGURATIVE_ALPHA:
ch = FIGURATIVE_ALPHA[key]
return ch[0].ljust(pi.get('length', 1), ch[0])
return literal.ljust(pi.get('length', len(literal)))[:pi.get('length', len(literal))]
return literal
pi_map = {f['name']: f.get('pic_info', {}) for f in fields}
if file_sec is None:
file_sec = {}
# Flatten: {tgt: [info1, info2]} → [(tgt, info1), (tgt, info2)]
flat_list = []
for tgt, asgn_val in assignments.items():
if isinstance(asgn_val, list):
for asgn in asgn_val:
flat_list.append((tgt, asgn))
elif isinstance(asgn_val, dict):
flat_list.append((tgt, asgn_val))
_MAX_CONVERGE = 20
# 识别有"锚定赋值"(非自引用赋值,如 MOVE literal 或不同字段的 MOVE) 的 target
_anchored = set()
for tgt, asgn in flat_list:
if asgn.get('type') != 'compute':
_anchored.add(tgt)
else:
sv = asgn.get('source_vars', [])
if not (len(sv) == 1 and sv[0] == tgt) and not (len(sv) >= 2 and tgt == sv[0]):
_anchored.add(tgt)
for _converge_iter in range(_MAX_CONVERGE):
_old = dict(rec)
# Pass 1: variable-to-variable MOVE
for tgt, asgn in flat_list:
if asgn['type'] == 'move' and asgn['source_vars']:
src = asgn['source_vars'][0]
resolved_tgt = _resolve_subscript(tgt, rec)
resolved_src = _resolve_subscript(src, rec)
if resolved_src in rec:
rec[resolved_tgt] = rec[resolved_src]
# Pass 2: literal MOVE
for tgt, asgn in flat_list:
if asgn['type'] == 'move_literal':
resolved_tgt = _resolve_subscript(tgt, rec)
pi = pi_map.get(resolved_tgt, {})
rec[resolved_tgt] = literal_to_raw(asgn['literal'], pi)
# Pass 3: INITIALIZE
for tgt, asgn in flat_list:
if asgn['type'] == 'initialize':
resolved_tgt = _resolve_subscript(tgt, rec)
pi = pi_map.get(resolved_tgt, {})
ftype = pi.get('type', 'unknown')
replacing = asgn.get('replacing', {})
if replacing:
mapped = replacing.get(ftype.upper(), None)
if mapped:
rec[resolved_tgt] = literal_to_raw(mapped, pi)
else:
if ftype == 'numeric':
rec[resolved_tgt] = float_to_raw(0.0, pi)
else:
rec[resolved_tgt] = literal_to_raw('SPACE', pi)
else:
if ftype == 'numeric':
rec[resolved_tgt] = float_to_raw(0.0, pi)
else:
rec[resolved_tgt] = literal_to_raw('SPACE', pi)
# Pass 3.5: READ INTO
for tgt, asgn in flat_list:
if asgn['type'] == 'read_into':
fname = asgn.get('file', '')
if fname in file_sec:
fd_children = _init_child_names(file_sec[fname][0], fields)
ws_children = _init_child_names(tgt, fields)
for ws_c in ws_children:
fd_candidate = ws_c
if ws_c.startswith('WS-'):
fd_candidate = ws_c[3:]
if fd_candidate in rec:
rec[ws_c] = rec[fd_candidate]
else:
idx = ws_children.index(ws_c)
if idx < len(fd_children) and fd_children[idx] in rec:
rec[ws_c] = rec[fd_children[idx]]
rec[tgt] = ''.join(str(rec.get(c, '')) for c in ws_children)
# Pass 4: COMPUTE
for tgt, asgn in flat_list:
if asgn['type'] == 'compute' and asgn['source_vars'] and asgn['op'] is not None:
resolved_tgt = _resolve_subscript(tgt, rec)
pi_tgt = pi_map.get(resolved_tgt, {})
if len(asgn['source_vars']) == 1:
src = asgn['source_vars'][0]
resolved_src = _resolve_subscript(src, rec)
# 无锚定的自引用 COMPUTE(如 ADD 1 TO X):只在第 0 轮应用一次
if resolved_tgt == resolved_src and tgt not in _anchored and _converge_iter > 0:
continue
if resolved_src in rec:
sv = raw_to_float(rec[resolved_src], pi_map.get(resolved_src, {}))
c = asgn.get('const', 0)
if asgn['op'] == 'rem':
quotient = int(sv / c) if c != 0 else 0
result = sv - quotient * c
else:
result = {'+': sv + c, '-': sv - c, '*': sv * c, '/': sv / c if c != 0 else sv}[asgn['op']]
rec[resolved_tgt] = float_to_raw(result, pi_tgt)
elif len(asgn['source_vars']) == 2:
v1, v2 = asgn['source_vars']
resolved_v1 = _resolve_subscript(v1, rec)
resolved_v2 = _resolve_subscript(v2, rec)
# 无锚定的自引用 COMPUTE(如 ADD X TO Y 且 Y 无前置 MOVE
if resolved_tgt == resolved_v1 and tgt not in _anchored and _converge_iter > 0:
continue
if resolved_v1 in rec and resolved_v2 in rec:
sv1 = raw_to_float(rec[resolved_v1], pi_map.get(resolved_v1, {}))
sv2 = raw_to_float(rec[resolved_v2], pi_map.get(resolved_v2, {}))
if asgn['op'] == 'rem':
quotient = int(sv1 / sv2) if sv2 != 0 else 0
result = sv1 - quotient * sv2
else:
result = {'+': sv1 + sv2, '-': sv1 - sv2, '*': sv1 * sv2, '/': sv1 / sv2 if sv2 != 0 else sv1}[asgn['op']]
rec[resolved_tgt] = float_to_raw(result, pi_tgt)
elif len(asgn['source_vars']) >= 3 and asgn['op'] == '+':
total = 0
all_found = True
for v in asgn['source_vars']:
resolved_v = _resolve_subscript(v, rec)
if resolved_v in rec:
total += raw_to_float(rec[resolved_v], pi_map.get(resolved_v, {}))
else:
all_found = False
break
if all_found:
rec[resolved_tgt] = float_to_raw(total, pi_tgt)
# Pass 4.5: INSPECT
for tgt, asgn in flat_list:
if asgn['type'] != 'inspect':
continue
resolved_tgt = _resolve_subscript(tgt, rec)
if resolved_tgt not in rec:
continue
src_val = str(rec[resolved_tgt])
for op_type, params in asgn.get('sub_ops', []):
if op_type == 'tally':
cv = params['count_var'].upper()
cv_pi = pi_map.get(cv, {})
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
cnt = 0
if params['kind'] == 'LEADING':
cnt = len(effective) - len(effective.lstrip(params['char']))
elif params['kind'] == 'TRAILING':
cnt = len(effective) - len(effective.rstrip(params['char']))
else:
cnt = len(effective)
if cv_pi.get('type') == 'numeric':
rec[cv] = float_to_raw(float(cnt), cv_pi)
elif op_type == 'replace':
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
if params['kind'] == 'ALL':
new_val = effective.replace(params['src'], params['dst'])
elif params['kind'] == 'LEADING':
new_val = effective
while new_val.startswith(params['src']):
new_val = new_val[len(params['src']):]
new_val = effective.replace(params['src'], params['dst'], 1)
elif params['kind'] == 'FIRST':
new_val = effective.replace(params['src'], params['dst'], 1)
else:
new_val = params['dst'] * len(effective)
rec[resolved_tgt] = new_val
elif op_type == 'convert':
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
table = str.maketrans(params['from_chars'], params['to_chars'])
rec[resolved_tgt] = effective.translate(table)
# Pass 5: STRING / UNSTRING
for tgt, asgn in flat_list:
if asgn['type'] == 'string_concat':
resolved_tgt = _resolve_subscript(tgt, rec)
pi = pi_map.get(resolved_tgt, {})
parts = []
for v in asgn.get('source_vars', []):
resolved_v = _resolve_subscript(v, rec)
if resolved_v in rec:
parts.append(str(rec[resolved_v]))
val = ''.join(parts)
if pi.get('type') in ('alphanumeric', 'alphabetic'):
val = val.ljust(pi.get('length', len(val)))[:pi.get('length', len(val))]
rec[resolved_tgt] = val
elif asgn['type'] == 'unstring_split':
resolved_tgt = _resolve_subscript(tgt, rec)
pi = pi_map.get(resolved_tgt, {})
src_var = asgn.get('source_vars', [None])[0]
resolved_src = _resolve_subscript(src_var, rec) if src_var else None
idx = asgn.get('index', 0)
if resolved_src and resolved_src in rec:
src_val = str(rec[resolved_src])
ftype = pi.get('type', 'unknown')
if idx == 0:
val = src_val
else:
val = ' ' if ftype in ('alphanumeric', 'alphabetic') else '0'
if ftype in ('alphanumeric', 'alphabetic'):
val = val.ljust(pi.get('length', len(val)))[:pi.get('length', len(val))]
rec[resolved_tgt] = val
# Pass 6: READ INTO / WRITE FROM
for tgt, asgn in flat_list:
if asgn['type'] == 'read_into':
fname = asgn.get('file', '')
if fname in file_sec:
children = _init_child_names(file_sec[fname][0], fields)
rec[tgt] = ''.join(str(rec.get(c, '')) for c in children)
elif asgn['type'] == 'write_from':
buf = tgt
rec_name = asgn.get('file', '')
children = _init_child_names(rec_name, fields)
if children:
src = str(rec.get(buf, ''))
pos = 0
for c in children:
pi = pi_map.get(c, {})
length = pi.get('digits', 0) + pi.get('decimal', 0) or pi.get('length', 0)
if length > 0:
chunk = src[pos:pos + length]
if not chunk:
chunk = '0' if pi.get('type') == 'numeric' else ' '
rec[c] = chunk.ljust(length)
pos += length
# Pass 7: ACCEPT
for tgt, asgn in flat_list:
if asgn['type'] == 'accept':
resolved_tgt = _resolve_subscript(tgt, rec)
pi = pi_map.get(resolved_tgt, {})
ftype = pi.get('type', 'unknown')
total = pi.get('digits', 0) + pi.get('decimal', 0)
length = pi.get('length', 0)
from_type = asgn.get('from', 'USER')
val = None
if from_type == 'DATE':
val = '20260603'
elif from_type == 'TIME':
val = '120000'
elif from_type == 'DAY':
val = '2026154'
elif from_type == 'DAY-OF-WEEK':
val = '3'
elif from_type == 'YEAR':
val = '2026'
if val is not None:
if ftype == 'numeric':
rec[resolved_tgt] = val.zfill(total)
else:
rec[resolved_tgt] = val.ljust(length)[:length] if length else val
# Pass 8: SET var TO TRUE (88-level)
for tgt, asgn in flat_list:
if asgn['type'] == 'set_true':
resolved_tgt = _resolve_subscript(tgt, rec)
val = asgn.get('value', '1')
pi = pi_map.get(resolved_tgt, {})
ftype = pi.get('type', 'unknown')
if ftype in ('alphanumeric', 'alphabetic'):
length = pi.get('length', len(str(val)))
rec[resolved_tgt] = str(val)[0].ljust(length)[:length]
else:
total = pi.get('digits', 0) + pi.get('decimal', 0)
rec[resolved_tgt] = str(val).zfill(max(total, 1))
if rec == _old:
break
else:
logger.warning(f"propagate_assignments 未收敛({_MAX_CONVERGE} 次迭代后仍有变化)")
def classify_field_roles(tree, assignments, fields, source=None, proc_text=None):
"""分析分支树和赋值记录,分类各字段的入出力角色。
优先级:FD/OPEN 方向 > 静态分析
返回 {字段名: 'input'|'output'|'inout'|'unused'}.
"""
# Phase 0: FD/OPEN 方向解析
fd_roles = {}
if source and proc_text:
from .read import parse_file_control, parse_file_section, scan_open_statements
file_ctl = parse_file_control(source)
file_sec = parse_file_section(source)
open_dir = scan_open_statements(proc_text)
for iname, direction in open_dir.items():
if iname in file_sec:
for rec_name in file_sec[iname]:
if direction == 'INPUT':
fd_roles[rec_name] = 'input'
elif direction == 'OUTPUT':
fd_roles[rec_name] = 'output'
elif direction == 'I-O':
fd_roles[rec_name] = 'inout'
# 传播到子字段
for rec_name, role in list(fd_roles.items()):
for child in _init_child_names(rec_name, fields):
fd_roles[child] = role
counts = {f['name']: {'read': 0, 'write': 0} for f in fields}
def _walk(node):
if isinstance(node, BrIf):
if node.cond_tree:
for leaf in collect_leaves(node.cond_tree):
name = _basename(leaf.field)
if name in counts:
counts[name]['read'] += 1
_walk(node.true_seq)
_walk(node.false_seq)
elif isinstance(node, BrEval):
name = _basename(node.subject)
if name in counts:
counts[name]['read'] += 1
for _, seq in node.when_list:
_walk(seq)
_walk(node.other_seq)
elif isinstance(node, BrPerform):
if node.condition:
parsed = parse_single_condition(node.condition)
if parsed:
name = _basename(parsed[0])
if name in counts:
counts[name]['read'] += 1
if node.varying_var:
name = _basename(node.varying_var)
if name in counts:
counts[name]['write'] += 1
_walk(node.body_seq)
elif isinstance(node, CallNode):
for p in node.using_params:
name = _basename(p.get("name", ""))
mechanism = p.get("mechanism", "reference")
if name in counts:
counts[name]["read"] += 1
if mechanism.lower() == "reference":
counts[name]["write"] += 1
elif isinstance(node, Assign):
tgt_base = _basename(node.target)
atype = node.source_info.get('type')
if atype == 'read_into':
if tgt_base in counts:
counts[tgt_base]['write'] += 1
elif atype == 'write_from':
if tgt_base in counts:
counts[tgt_base]['read'] += 1
elif atype == 'set_true':
if tgt_base in counts:
counts[tgt_base]['write'] += 1
else:
if tgt_base in counts:
counts[tgt_base]['write'] += 1
for v in node.source_info.get('source_vars', []):
v_base = _basename(v)
if v_base in counts:
counts[v_base]['read'] += 1
if atype == 'initialize' and tgt_base in counts:
for child in _init_child_names(tgt_base, fields):
if child in counts:
counts[child]['write'] += 1
elif isinstance(node, BrSeq):
for c in node.children:
_walk(c)
_walk(tree)
# Phase extra: ACCEPT / DISPLAY (proc_text 扫描)
if proc_text:
for m in re.finditer(r'ACCEPT\s+(\w[\w-]*)', proc_text):
name = _basename(m.group(1).upper())
if name in counts:
counts[name]['write'] += 1
for m in re.finditer(r'DISPLAY\s+(\w[\w-]*)', proc_text):
name = _basename(m.group(1).upper())
if name in counts:
counts[name]['read'] += 1
# LINKAGE 字段默认 input(未使用时不改变)
for f in fields:
if f.get('section') == 'LINKAGE':
name = f['name']
if name in counts and counts[name]['read'] == 0 and counts[name]['write'] == 0:
counts[name]['read'] = 1
result = {}
for name, c in counts.items():
if name in fd_roles:
result[name] = fd_roles[name]
continue
if c['read'] > 0 and c['write'] > 0:
result[name] = 'inout'
elif c['write'] > 0:
result[name] = 'output'
elif c['read'] > 0:
result[name] = 'input'
else:
result[name] = 'unused'
# 确保 FD 记录字段也出现(即使不在 fields 中—应不会)
for name, role in fd_roles.items():
if name not in result:
result[name] = role
return result
+894
View File
@@ -0,0 +1,894 @@
"""设计层:路径枚举 + 值生成 + 约束应用"""
import re
import logging
from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, Assign, CallNode, CondNot, CondLeaf, ExitNode, GoTo
from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, mcdc_sets, satisfying_value
from .core import trace_to_root, invert_through_chain, propagate_assignments, _basename
logger = logging.getLogger(__name__)
_STOP = ('__STOP__', '', None, True)
_MAX_PATHS = 10000
def _filter_stop(cons):
return [c for c in cons if c is not _STOP]
def _cap_paths(paths):
if len(paths) > _MAX_PATHS:
return paths[:_MAX_PATHS]
return paths
def _cap_paths_fair(new_active, child_paths):
"""两阶段公平截断:每个前置路径至少保留一条子路径,再填充剩余配额。"""
if len(new_active) <= _MAX_PATHS:
return new_active
k = len(child_paths)
if k <= 1:
return new_active[:_MAX_PATHS]
# 分离 STOP 路径(不参与组合,直接保留)
stop_paths = [(p, a) for p, a in new_active if any(c is _STOP for c in p)]
combined = [(p, a) for p, a in new_active if not any(c is _STOP for c in p)]
n_pred = len(combined) // k
result = list(stop_paths)
if n_pred <= 1:
result.extend(combined[:_MAX_PATHS - len(result)])
return result[:_MAX_PATHS]
remaining_quota = _MAX_PATHS - len(result)
# Phase 1: 每个前置至少保留一条子路径(轮询分配不同子路径索引)
quota = min(n_pred, remaining_quota)
selected = set()
for p_idx in range(quota):
c_idx = p_idx % k
idx = p_idx * k + c_idx
selected.add(idx)
result.append(combined[idx])
if len(result) >= _MAX_PATHS:
return result[:_MAX_PATHS]
# Phase 2: 用剩余配额填充其余组合
remaining = _MAX_PATHS - len(result)
for idx in range(len(combined)):
if idx not in selected:
result.append(combined[idx])
remaining -= 1
if remaining <= 0:
break
return result[:_MAX_PATHS]
# ── 路径枚举 ──
def enum_paths(node, fields):
"""枚举路径,每条路径返回 (constraints, assignments).
返回 list[tuple[list[tuple], dict]].
"""
if isinstance(node, Assign):
return [([], {node.target: [node.source_info]})]
if isinstance(node, BrSeq):
if not node.children:
return [([], {})]
paths = [([], {})]
for child in node.children:
child_paths = _cap_paths(enum_paths(child, fields))
new_active = []
for p_cons, p_assign in paths:
if any(c is _STOP for c in p_cons):
new_active.append((p_cons, p_assign))
continue
for cp_cons, cp_assign in child_paths:
merged = {}
for d in (p_assign, cp_assign):
for k, v in d.items():
merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
merged_cons = p_cons + list(cp_cons)
new_active.append((merged_cons, merged))
paths = _cap_paths_fair(new_active, child_paths)
return paths
elif isinstance(node, BrIf):
parsed = parse_single_condition(node.condition, fields)
if parsed and is_field(parsed[0], fields):
field, op, val = parsed
paths = []
true_sub = _cap_paths(enum_paths(node.true_seq, fields))
for sp_cons, sp_assign in (true_sub or [([], {})]):
paths.append(([(field, op, val, True)] + sp_cons, sp_assign))
false_sub = _cap_paths(enum_paths(node.false_seq, fields))
for fp_cons, fp_assign in (false_sub or [([], {})]):
paths.append(([(field, op, val, False)] + fp_cons, fp_assign))
return paths
# CondNot wrapping a single leaf (e.g., IF NOT WS-AMOUNT > 1000)
if node.cond_tree and isinstance(node.cond_tree, CondNot):
child = node.cond_tree.child
if isinstance(child, CondLeaf) and is_field(child.field, fields):
paths = []
true_sub = _cap_paths(enum_paths(node.true_seq, fields))
for sp_cons, sp_assign in (true_sub or [([], {})]):
paths.append(([(child.field, child.op, child.value, False)] + sp_cons, sp_assign))
false_sub = _cap_paths(enum_paths(node.false_seq, fields))
for fp_cons, fp_assign in (false_sub or [([], {})]):
paths.append(([(child.field, child.op, child.value, True)] + fp_cons, fp_assign))
return paths
if node.cond_tree:
leaves = collect_leaves(node.cond_tree)
if leaves and all(is_field(l.field, fields) for l in leaves):
sets = mcdc_sets(node.cond_tree, fields)
if sets:
paths = []
for constraints, decision in sets:
body = _cap_paths(enum_paths(
node.true_seq if decision else node.false_seq, fields
))
for sp_cons, sp_assign in (body or [([], {})]):
paths.append((constraints + sp_cons, sp_assign))
return paths
# CondLeaf fallback: 单 leaf(含 88-level 解析后的条件树)MC/DC 不适用
if len(leaves) == 1:
leaf = leaves[0]
paths = []
true_sub = _cap_paths(enum_paths(node.true_seq, fields))
for sp_cons, sp_assign in (true_sub or [([], {})]):
paths.append(([(leaf.field, leaf.op, leaf.value, True)] + sp_cons, sp_assign))
false_sub = _cap_paths(enum_paths(node.false_seq, fields))
for fp_cons, fp_assign in (false_sub or [([], {})]):
paths.append(([(leaf.field, leaf.op, leaf.value, False)] + fp_cons, fp_assign))
return paths
# Fallback: parsed condition but non-field (e.g. arithmetic expr)
if parsed:
field, op, val = parsed
paths = []
true_sub = enum_paths(node.true_seq, fields)
for sp_cons, sp_assign in (true_sub or [([], {})]):
paths.append(([(field, op, val, True)] + sp_cons, sp_assign))
false_sub = enum_paths(node.false_seq, fields)
for fp_cons, fp_assign in (false_sub or [([], {})]):
paths.append(([(field, op, val, False)] + fp_cons, fp_assign))
return paths
return [([], {})]
elif isinstance(node, BrEval):
if node.subjects:
paths = []
prior_false_cons = []
for values, seq in node.when_list:
sub = _cap_paths(enum_paths(seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
when_cons = [(node.subjects[i], '=', values[i], True)
for i in range(len(node.subjects))]
constraints = list(prior_false_cons) + when_cons + sp_cons
paths.append((constraints, sp_assign))
for i in range(len(node.subjects)):
prior_false_cons.append((node.subjects[i], '=', values[i], False))
if node.has_other:
sub = _cap_paths(enum_paths(node.other_seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
paths.append((list(prior_false_cons) + sp_cons, sp_assign))
return paths
if node.subject == 'TRUE':
paths = []
prior_false_sets = [] # list[list[Constraint]]
for value, seq in node.when_list:
cond = parse_compound_condition(value, fields)
if cond and isinstance(cond, CondLeaf) and is_field(cond.field, fields):
sub = _cap_paths(enum_paths(seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
constraints = [c for pf in prior_false_sets for c in pf]
constraints.append((cond.field, cond.op, cond.value, True))
paths.append((constraints + sp_cons, sp_assign))
prior_false_sets.append([(cond.field, cond.op, cond.value, False)])
elif cond:
leaves = collect_leaves(cond)
if leaves and all(is_field(l.field, fields) for l in leaves):
sets = mcdc_sets(cond, fields)
if sets:
sub = _cap_paths(enum_paths(seq, fields))
new_false_sets = []
for cs, decision in sets:
if decision:
if not prior_false_sets:
for sp_cons, sp_assign in (sub or [([], {})]):
paths.append((list(cs) + sp_cons, sp_assign))
else:
for pf_set in prior_false_sets:
for sp_cons, sp_assign in (sub or [([], {})]):
paths.append((list(pf_set) + list(cs) + sp_cons, sp_assign))
else:
new_false_sets.append(cs)
if not new_false_sets:
prior_false_sets = []
break
combined = []
for pf_set in prior_false_sets:
for nf_set in new_false_sets:
combined.append(list(pf_set) + list(nf_set))
prior_false_sets = combined
else:
prior_false_sets = []
break
else:
prior_false_sets = []
break
else:
prior_false_sets = []
break
if node.has_other:
sub = _cap_paths(enum_paths(node.other_seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
constraints = [c for pf in prior_false_sets for c in pf]
paths.append((constraints + sp_cons, sp_assign))
return paths
if not is_field(node.subject, fields):
return [([], {})]
paths = []
for value, seq in node.when_list:
sub = _cap_paths(enum_paths(seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
paths.append(([(node.subject, '=', value, True)] + sp_cons, sp_assign))
if node.has_other:
case_vals = [v for v, _ in node.when_list]
sub = _cap_paths(enum_paths(node.other_seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
paths.append(([(node.subject, 'not_in', case_vals, True)] + sp_cons, sp_assign))
return paths
elif isinstance(node, BrSearch):
return _enum_search_paths(node, fields)
elif isinstance(node, BrPerform):
if node.perf_type in ('para', 'thru'):
if node.body_seq:
return enum_paths(node.body_seq, fields)
return [([], {})]
elif node.perf_type in ('until', 'para_until', 'varying', 'para_varying'):
# 尝试单条件(现有逻辑)
parsed = parse_single_condition(node.condition, fields)
if parsed and is_field(parsed[0], fields):
field, op, val = parsed
paths = []
false_sub = _cap_paths(enum_paths(node.body_seq, fields))
for sp_cons, sp_assign in (false_sub or [([], {})]):
# PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
if node.varying_from and node.varying_var:
is_fld = any(f['name'] == node.varying_from for f in fields) if fields else False
from_asgn = {'type': 'move', 'source_vars': [node.varying_from]} if is_fld else {'type': 'move_literal', 'literal': node.varying_from}
from_assign = {node.varying_var: [from_asgn]}
merged = {}
for d in (from_assign, sp_assign):
for k, v in d.items():
merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
sp_assign = merged
paths.append(([(field, op, val, False)] + sp_cons, sp_assign))
paths.append(([(field, op, val, True)], {}))
return paths
# 尝试复合条件(AND/OR
cond_tree = parse_compound_condition(node.condition, fields)
if cond_tree:
leaves = collect_leaves(cond_tree)
if leaves and all(is_field(l.field, fields) for l in leaves):
sets = mcdc_sets(cond_tree, fields)
if sets:
paths = []
false_sub = _cap_paths(enum_paths(node.body_seq, fields))
for sp_cons, sp_assign in (false_sub or [([], {})]):
# PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
if node.varying_from and node.varying_var:
is_fld = any(f['name'] == node.varying_from for f in fields) if fields else False
from_asgn = {'type': 'move', 'source_vars': [node.varying_from]} if is_fld else {'type': 'move_literal', 'literal': node.varying_from}
from_assign = {node.varying_var: [from_asgn]}
merged = {}
for d in (from_assign, sp_assign):
for k, v in d.items():
merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
sp_assign = merged
for constraints, decision in sets:
if not decision:
paths.append((list(constraints) + sp_cons, sp_assign))
for constraints, decision in sets:
if decision:
paths.append((list(constraints), {}))
if paths:
return paths
return [([], {})]
elif isinstance(node, CallNode):
return [([], {})]
elif isinstance(node, ExitNode):
return [([_STOP], {})]
elif isinstance(node, GoTo):
paths = enum_paths(node.body_seq, fields)
return [([_STOP] + c, a) for c, a in paths]
return [([], {})]
# ── 值生成 ──
def seq_numeric(seq_num: int, total_digits: int) -> str:
val = seq_num % (10 ** total_digits)
if val == 0:
val = 10 ** total_digits - 1
return str(val).zfill(total_digits)
def seq_alpha(seq_num: int, length: int) -> str:
letter = chr(65 + (seq_num - 1) % 26)
return letter * length
def seq_date(seq_num: int) -> str:
from datetime import datetime, timedelta
base = datetime(2000, 1, 1)
d = base + timedelta(days=seq_num - 1)
return d.strftime('%Y%m%d')
def _is_date_field(name: str) -> bool:
patterns = [r'DATE', r'YYMMDD', r'YYYYMM', r'YEAR', r'MONTH', r'DAY']
for p in patterns:
if re.search(p, name.upper()):
return True
return False
_SPECIAL_VALUES = {
'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
'SPACE': ' ', 'SPACES': ' ',
'HIGH-VALUE': '\xff', 'HIGH-VALUES': '\xff',
'LOW-VALUE': '\x00', 'LOW-VALUES': '\x00',
'QUOTE': "'", 'QUOTES': "'",
'ALL': '',
}
def _apply_value(field: dict, rec: dict) -> bool:
"""尝试应用 VALUE 子句的初始值。返回 True 表示已处理。"""
raw = field.get('value')
if raw is None:
return False
val = str(raw).strip("'\"").strip()
name = field['name']
pi = field.get('pic_info', {})
# 处理 COBOL 特殊值
if val.upper() in _SPECIAL_VALUES:
val = _SPECIAL_VALUES[val.upper()]
ftype = pi.get('type', 'unknown')
if ftype == 'numeric':
digits = pi.get('digits', 0) + pi.get('decimal', 0)
if digits:
rec[name] = val.zfill(digits)
else:
rec[name] = val
else:
length = pi.get('length', 0) or 1
rec[name] = val.ljust(length)[:length]
return True
def _children_of(group_name: str, fields: list) -> list:
"""返回组项目 group_name 在 fields 中的直属子字段列表(按声明顺序)。
终止条件:遇到同/更高级别(sibling/组边界)或 77 级(独立字段)。
"""
result = []
group_level = None
found = False
for f in fields:
if not found and f['name'] == group_name:
group_level = f['level']
found = True
continue
if found:
if f['level'] <= group_level or f['level'] == 77:
break
# 88-level 是条件名,不计为子字段
if f.get('is_88'):
continue
result.append(f)
return result
def _make_numeric_value(idx: int, record_num: int, total_digits: int) -> str:
for step in (100, 10, 1):
val = idx * step + record_num
if val < 10 ** total_digits:
return str(val).zfill(total_digits)
return str(record_num).zfill(total_digits)
def _make_alpha_value(idx: int, record_num: int, length: int) -> str:
if length == 1:
ch = chr(65 + (idx + record_num - 2) % 26)
return ch
letter = chr(65 + (idx - 1) % 26)
return letter + str(record_num).zfill(length - 1)
def make_base_record(seq_num: int, fields: list) -> dict:
rec = {}
redefines_map = {} # 标量 REDEFINES: parent_name → [child_names]
group_redefines = [] # 组 REDEFINES: [(redef_name, target_name)]
filler_key_counter = 0
numeric_idx = 0
alpha_idx = 0
record_num = seq_num
for f in fields:
name = f['name']
if f.get('is_88'):
continue
if f.get('redefines'):
parent = f['redefines']
if f.get('pic'):
# 标量 REDEFINES(有 PIC,如 WS-AMOUNT-DISP REDEFINES WS-AMOUNT PIC X(9)
redefines_map.setdefault(parent, []).append(name)
continue
else:
# 组 REDEFINES(无 PIC,如 CUST-ADDR2 REDEFINES CUST-ADDR
group_redefines.append((name, parent))
# 不 continue — 组本身无 PIC 会在下方"组项目跳过"处理
# 其子字段作为独立字段正常走循环
if f.get('is_filler'):
if name in rec:
filler_key_counter += 1
name = f'FILLER_{filler_key_counter + 1}'
rec[name] = 'x' * (f.get('pic_info', {}).get('length', 0) or 1)
continue
# Pass 0: VALUE 子句初始值优先
if _apply_value(f, rec):
continue
# 组项目(无 PIC)跳过
if not f.get('pic'):
continue
pi = f.get('pic_info', {})
ftype = pi.get('type', 'unknown')
digits = pi.get('digits', 0)
decimal = pi.get('decimal', 0)
length = pi.get('length', 0)
if ftype == 'numeric':
if _is_date_field(name):
rec[name] = seq_date(record_num)
else:
numeric_idx += 1
rec[name] = _make_numeric_value(numeric_idx, record_num, digits + decimal)
elif ftype in ('alphanumeric', 'alphabetic'):
alpha_idx += 1
rec[name] = _make_alpha_value(alpha_idx, record_num, length or 1)
elif ftype == 'numeric-edited':
numeric_idx += 1
raw = _make_numeric_value(numeric_idx, record_num, digits + decimal)
rec[name] = raw.rjust(length)
else:
alpha_idx += 1
rec[name] = _make_alpha_value(alpha_idx, record_num, 8)
# Pass 2a: 标量 REDEFINES 复制
for parent_name, child_names in redefines_map.items():
if parent_name in rec:
for child_name in child_names:
rec[child_name] = rec[parent_name]
# Pass 2b: 组 REDEFINES 按位置递归复制子字段
for redef_name, target_name in group_redefines:
redef_kids = _children_of(redef_name, fields)
tgt_kids = _children_of(target_name, fields)
tgt_idx = 0
for i, rk in enumerate(redef_kids):
if tgt_idx >= len(tgt_kids):
break
if i == len(redef_kids) - 1 and len(redef_kids) < len(tgt_kids):
# 最后一个 REDEFINES 子字段,且目标更多 → 拼接剩余所有目标值
parts = [rec.get(tk['name'], '') for tk in tgt_kids[tgt_idx:]]
rec[rk['name']] = ''.join(parts)
elif i == len(redef_kids) - 1 and len(redef_kids) > len(tgt_kids):
# REDEFINES 子字段更多 → 最后一个 REDEFINES 子字段取最后目标值
rec[rk['name']] = rec.get(tgt_kids[-1]['name'], '')
else:
rec[rk['name']] = rec.get(tgt_kids[tgt_idx]['name'], '')
tgt_idx += 1
return rec
# ── 约束应用 ──
def _check_constraint_satisfied(rec, field_name, operator, value, want_true, fields):
"""检查 field_name 当前值是否满足该约束。满足返回 True。"""
for f in fields:
if f['name'] == field_name:
pi = f.get('pic_info', {})
ftype = pi.get('type', 'unknown')
val = rec.get(field_name)
if val is None:
return False
if operator == 'not_in':
cases = value if isinstance(value, list) else []
return str(val) not in cases
if ftype == 'numeric':
try:
num_val = int(float(str(val)))
num_target = int(float(str(value)))
except (ValueError, TypeError):
return False
if operator in ('>=', '>', '<', '<=', '=', '<>'):
if operator == '>=': ok = num_val >= num_target
elif operator == '>': ok = num_val > num_target
elif operator == '<': ok = num_val < num_target
elif operator == '<=': ok = num_val <= num_target
elif operator == '=': ok = num_val == num_target
elif operator == '<>': ok = num_val != num_target
return ok == want_true
return True
else:
s_val = str(val).strip().upper()
s_target = str(value).strip().upper()
eq = s_val == s_target
if operator == '=':
return eq == want_true
elif operator == '<>':
return (not eq) == want_true
return True
return False
_ARITH_BOUNDS = {
'left_big_ops': {'>', '>=', '<>'},
'left_small_ops': {'<', '<='},
}
def _arith_pic_info(field_name, fields):
for f in fields:
if f['name'] == field_name.upper():
return f.get('pic_info', {})
return {}
def _arith_numeric_pick(field_name, want_big, fields):
"""为字段选一个大值或小值,返回字符串。"""
pi = _arith_pic_info(field_name, fields)
if pi.get('type') != 'numeric':
return None
digits = pi.get('digits', 0)
decimal = pi.get('decimal', 0)
total = digits + decimal
max_val = 10 ** total - 1
if want_big:
pick = int(max_val * 0.7)
else:
pick = 1
int_part = str(pick // (10 ** decimal)).zfill(digits)
dec_part = str(pick % (10 ** decimal)).zfill(decimal)
if decimal == 0:
return int_part
return int_part + dec_part
def _apply_arith_constraint(rec, field_name, operator, value, want_true, fields):
"""对算术表达式条件进行字段值 steering。
例如 A + B > C (want_true=True):
- 左值字段(A, B)设大 → 右值字段(C)设小
例如 A + B <= C (want_true=True):
- 左值字段设小 → 右值字段设大
这是启发式 steering,不是精确求解。
主要目标是保证分支可达,不保证边界值精确。
"""
# 1. 提取左值表达式中的所有字段名(大写)
tokens = re.findall(r'\b[A-Z][A-Z0-9-]*(?:\([^)]*\))?\b', field_name.upper())
left_fields = [t for t in tokens if any(f['name'] == t for f in fields)]
# 2. 右值是否也为字段
right_field = value if any(f['name'] == value for f in fields) else None
if not left_fields:
logger.debug(f"算术表达式无法提取字段: {field_name}")
return
# 3. 确定方向:want_true 时左值应大还是小
if operator in _ARITH_BOUNDS['left_big_ops']:
left_big = want_true
elif operator in _ARITH_BOUNDS['left_small_ops']:
left_big = not want_true
else:
left_big = want_true
# 4. 设置左值字段
for lf in left_fields:
pick = _arith_numeric_pick(lf, left_big, fields)
if pick is not None:
rec[lf] = pick
# 5. 设置右值字段(如果有)
if right_field:
pick = _arith_numeric_pick(right_field, not left_big, fields)
if pick is not None:
rec[right_field] = pick
def apply_constraint(rec, field_name, operator, value, want_true, fields, assignments=None, path_assign=None):
# 标准化字段名:去除括号内空格(WS-CELL ( 1, 1 ) → WS-CELL(1,1)
field_name = re.sub(r'\s*([(),])\s*', r'\1', field_name)
# 变量下标解析:WS-FIXED-VALUE(WS-IDX) → WS-FIXED-VALUE(1)
vm = re.match(r'^(\w[\w-]*)\((\w[\w-]*)\)$', field_name)
if vm:
base_var, subscript_var = vm.groups()
if subscript_var in rec:
try:
resolved_name = f'{base_var}({int(rec[subscript_var])})'
if any(f['name'] == resolved_name for f in fields):
apply_constraint(rec, resolved_name, operator, value, want_true, fields, assignments, path_assign)
return
except (ValueError, TypeError):
pass
# 下标传播:无下标约束 → 应用到所有下标变体
base = _basename(field_name)
subscripted = [f for f in fields if f['name'] != base and _basename(f['name']) == base]
if subscripted and field_name == base:
for sf in subscripted:
apply_constraint(rec, sf['name'], operator, value, want_true, fields, assignments, path_assign)
return
# REDEFINES 字段的约束重定向到父字段(共享存储)
for f in fields:
if f['name'] == field_name:
if f.get('is_filler'):
return
if f.get('redefines'):
parent_name = f['redefines']
logger.debug(f"REDEFINES 约束重定向: {field_name}{parent_name}")
apply_constraint(rec, parent_name, operator, value, want_true, fields, assignments, path_assign)
return
break
if assignments:
root_var, chain = trace_to_root(field_name, assignments, fields, path_assign)
if root_var != field_name:
new_field_name, new_op, new_val = invert_through_chain(root_var, chain, operator, value)
if any(f['name'] == new_field_name for f in fields):
field_name, operator, value = new_field_name, new_op, new_val
# 如果当前值已满足该约束,跳过覆盖(保持先前约束的一致性)
if _check_constraint_satisfied(rec, field_name, operator, value, want_true, fields):
return
if operator == 'not_in':
for f in fields:
if f['name'] == field_name:
pi = f.get('pic_info', {})
cases = value if isinstance(value, list) else []
ftype = pi.get('type', 'unknown')
if ftype in ('alphanumeric', 'alphabetic'):
for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
if c not in cases:
rec[field_name] = c.ljust(pi.get('length', 1), c)
return
else:
for n in range(1, 100):
if str(n) not in cases:
rec[field_name] = str(n).zfill(pi.get('digits', 0) + pi.get('decimal', 0))
return
return
# 字段间比较(值侧也是字段名)
if any(f['name'] == value for f in fields):
if re.search(r'[+\-*/]', field_name):
_apply_arith_constraint(rec, field_name, operator, value, want_true, fields)
else:
logger.debug(f"字段间比较约束跳过:{field_name} {operator} {value}")
return
for f in fields:
if f['name'] == field_name:
pi = f.get('pic_info', {})
val = satisfying_value(pi, operator, value, want_true)
rec[field_name] = val
return
# ── 记录生成入口 ──
def sync_redefined_fields(rec, fields):
"""赋值/约束后同步 REDEFINES 字段:父字段的值拷贝到所有 REDEFINES 子字段。"""
redefines_map = {}
group_redefines = []
for f in fields:
if f.get('is_88') or f.get('is_filler'):
continue
if f.get('redefines') and f.get('pic'):
redefines_map.setdefault(f['redefines'], []).append(f['name'])
elif f.get('redefines') and not f.get('pic'):
group_redefines.append((f['name'], f['redefines']))
for parent_name, child_names in redefines_map.items():
if parent_name in rec:
for child_name in child_names:
rec[child_name] = rec[parent_name]
for redef_name, target_name in group_redefines:
redef_kids = _children_of(redef_name, fields)
tgt_kids = _children_of(target_name, fields)
tgt_idx = 0
for i, rk in enumerate(redef_kids):
if tgt_idx >= len(tgt_kids):
break
if i == len(redef_kids) - 1 and len(redef_kids) < len(tgt_kids):
parts = [rec.get(tk['name'], '') for tk in tgt_kids[tgt_idx:]]
rec[rk['name']] = ''.join(parts)
elif i == len(redef_kids) - 1 and len(redef_kids) > len(tgt_kids):
rec[rk['name']] = rec.get(tgt_kids[-1]['name'], '')
else:
rec[rk['name']] = rec.get(tgt_kids[tgt_idx]['name'], '')
tgt_idx += 1
def apply_occurs_depending(rec, fields):
"""根据 OCCURS DEPENDING ON 变量的当前值,清零超范围的下标字段。"""
for f in fields:
dep_var = f.get('occurs_depending')
if not dep_var:
continue
name = f['name']
m = re.search(r'\((\d+)\)$', name)
if not m:
continue
sub = int(m.group(1))
max_val = int(rec.get(dep_var, 0))
if sub <= max_val:
continue
pi = f.get('pic_info', {})
ftype = pi.get('type', 'unknown')
length = pi.get('length', 0) or 1
if ftype == 'numeric':
rec[name] = '0' * (pi.get('digits', 0) + pi.get('decimal', 0))
elif ftype in ('alphanumeric', 'alphabetic'):
rec[name] = ' ' * length
else:
rec[name] = '0' * length
def _non_match_for(cond_leaf, fields):
if not fields or not cond_leaf:
return None
base = re.sub(r'\s*\(.*?\)\s*$', '', cond_leaf.field)
for f in fields:
if re.sub(r'\s*\(.*?\)\s*$', '', f['name']) == base:
pic = f.get('pic_info', {})
if pic.get('type') == 'numeric':
return '0'
return ' '
return None
def _enum_search_paths(node, fields):
# 从条件字段名推断 OCCURS 数;如 WS-CODE-VAL(WS-IDX) → 查 WS-CODE-VAL(j) 最大 j
occurs_count = 1
if node.when_list and node.cond_trees and node.cond_trees[0]:
ct = node.cond_trees[0]
if isinstance(ct, CondLeaf):
base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
for f in fields:
m = re.match(rf'^{re.escape(base)}\((\d+)\)$', f['name'])
if m:
occurs_count = max(occurs_count, int(m.group(1)))
if occurs_count <= 1:
# 再查父组名下各字段的后缀
parent = node.table_name
for f in fields:
m = re.match(rf'^{re.escape(parent)}\((\d+)\)$', f['name'])
if m:
occurs_count = max(occurs_count, int(m.group(1)))
paths = []
for i, (cond_text, body_seq) in enumerate(node.when_list):
cond_tree = node.cond_trees[i] if i < len(node.cond_trees) else None
sub = _cap_paths(enum_paths(body_seq, fields))
if not sub:
sub = [([], {})]
extra_assign = {}
if cond_tree and isinstance(cond_tree, CondLeaf):
base = re.sub(r'\s*\(.*?\)\s*$', '', cond_tree.field)
matching_val = cond_tree.value
elem_key = f'{base}({i + 1})'
extra_assign[elem_key] = [{'type': 'move_literal', 'literal': matching_val}]
non_match = _non_match_for(cond_tree, fields) or ' '
for j in range(i):
prev_key = f'{base}({j + 1})'
extra_assign[prev_key] = [{'type': 'move_literal', 'literal': non_match}]
for sp_cons, sp_assign in (sub or [([], {})]):
merged_assign = dict(extra_assign)
for k, v in sp_assign.items():
merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
paths.append((sp_cons, merged_assign))
if node.has_at_end:
sub = _cap_paths(enum_paths(node.at_end_seq, fields))
for sp_cons, sp_assign in (sub or [([], {})]):
extra_assign = {}
non_match = ' '
if node.when_list:
ct = node.cond_trees[0]
if ct and isinstance(ct, CondLeaf):
non_match = _non_match_for(ct, fields) or ' '
base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
for j in range(max(occurs_count, 1)):
extra_assign[f'{base}({j + 1})'] = [{'type': 'move_literal', 'literal': non_match}]
merged_assign = dict(extra_assign)
for k, v in sp_assign.items():
merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
paths.append((sp_cons, merged_assign))
return paths
def generate_records(branch_paths_with_assigns, data_fields, base_assignments=None, file_sec=None):
"""生成测试数据记录。
branch_paths_with_assigns: list of (constraints, path_assignments).
base_assignments: 全局 assignments dict (用于 trace_to_root).
返回: (records, kept_path_cons) — kept_path_cons 是与 records 一一对应的约束。
"""
records = []
kept_path_cons = []
if branch_paths_with_assigns:
for seq, (path_cons, path_assign) in enumerate(branch_paths_with_assigns, start=1):
path_cons = _filter_stop(path_cons)
rec = make_base_record(seq, data_fields)
# Pass A: 先传播赋值(MOVE/COMPUTE/READ INTO 等),模拟到决策点前的程序状态
if isinstance(path_assign, dict):
propagate_assignments(rec, path_assign, data_fields, file_sec=file_sec)
# Pass A.5: 检查约束是否经过链追溯到字面量截断(不可能路径)
skip_impossible = False
if base_assignments and isinstance(path_assign, dict):
for c in path_cons:
if len(c) == 4 and not skip_impossible:
field, op, val, want = c
root_var, chain = trace_to_root(field, base_assignments, data_fields, path_assign)
if root_var != field:
new_fn, new_op, new_val = invert_through_chain(root_var, chain, op, val)
if any(f['name'] == new_fn for f in data_fields):
asgn_val = path_assign.get(root_var)
if asgn_val is not None:
asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
if asgn_list and asgn_list[-1]['type'] == 'move_literal' and root_var in rec:
if not _check_constraint_satisfied(rec, root_var, new_op, new_val, want, data_fields):
skip_impossible = True
break
if skip_impossible:
continue
# Pass B: 约束覆盖(确保决策条件满足,覆盖 MOVE 带来的值)
for c in path_cons:
if len(c) == 4:
field, op, val, want = c
apply_constraint(rec, field, op, val, want, data_fields, base_assignments, path_assign)
# Pass B.5: 前向再传播变量间MOVE,保持约束修改后的链一致性
if isinstance(path_assign, dict):
forward = {}
for tgt, asgn_val in path_assign.items():
asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
filtered = [a for a in asgn_list if a['type'] == 'move' and a.get('source_vars')]
if filtered:
forward[tgt] = filtered
if forward:
propagate_assignments(rec, forward, data_fields, file_sec=file_sec)
# Pass C: 同步 REDEFINES(确保共享存储一致)
sync_redefined_fields(rec, data_fields)
# Pass D: OCCURS DEPENDING ON — 清零超范围的下标字段
apply_occurs_depending(rec, data_fields)
records.append(rec)
kept_path_cons.append(path_cons)
if not records:
rec = make_base_record(1, data_fields)
if base_assignments:
propagate_assignments(rec, base_assignments, data_fields, file_sec=file_sec)
records.append(rec)
kept_path_cons.append([])
return records, kept_path_cons
+35
View File
@@ -0,0 +1,35 @@
start: data_div_content
data_div_content: (file_section | working_storage | linkage)*
file_section: "FILE" "SECTION" DOT fd+
fd: "FD" NAME FD_SUFFIX data_item+
FD_SUFFIX: /(?:"[^"]*"|'[^']*'|[^.])*\./
working_storage: "WORKING-STORAGE" "SECTION" DOT data_item*
linkage: "LINKAGE" "SECTION" DOT data_item*
data_item: level_num (NAME | "FILLER") clause* DOT
level_num: LEVEL
clause: pic_clause | value_clause | occurs_clause | redefines_clause | usage_clause
| "SYNC" | "SYNCHRONIZED"
| "JUSTIFIED" "RIGHT"?
| "BLANK" "WHEN" "ZERO"
| "GLOBAL" | "EXTERNAL"
pic_clause: "PIC" "IS"? PICTURE_STRING
value_clause: "VALUE" "IS"? value_literal+
value_literal: INT | SIGNED_NUMBER | STRING | SQSTRING
| "ZERO" | "ZEROS" | "ZEROES"
| "SPACE" | "SPACES"
| "HIGH-VALUE" | "HIGH-VALUES"
| "LOW-VALUE" | "LOW-VALUES"
SQSTRING: /'[^']*'/
redefines_clause: "REDEFINES" NAME
occurs_clause: "OCCURS" INT "TIMES"? ("DEPENDING" "ON" NAME)?
usage_clause: USAGE_VAL
USAGE_VAL: "COMP" | "COMP-3" | "COMP-5" | "BINARY" | "PACKED-DECIMAL" | "DISPLAY"
LEVEL: /0[1-9]|[1-4][0-9]|49|77|88/
NAME: /[A-Z][A-Z0-9-]*/
PICTURE_STRING: /[0-9A-Z()+,\-*\/V]+/i
INT: /[0-9]+/
DOT: /\./
%import common.SIGNED_NUMBER
%import common.ESCAPED_STRING -> STRING
%import common.WS
%ignore WS
+163
View File
@@ -0,0 +1,163 @@
"""COBOL数据模型 — 所有层共享,无外部依赖"""
from dataclasses import dataclass, field
# ── 字段定义 ──
@dataclass
class PicInfo:
type: str = 'unknown' # "numeric" | "alphanumeric" | "alphabetic"
digits: int = 0
decimal: int = 0
length: int = 0
signed: bool = False
@dataclass
class FieldDef:
name: str
level: int
pic: str | None = None
pic_info: PicInfo | None = None
is_filler: bool = False
occurs_count: int = 0
occurs_depending: str | None = None
redefines: str | None = None
usage: str | None = None # "COMP" | "COMP-3" | "BINARY" | "PACKED-DECIMAL" | ...
value: str | None = None
values: list[str] | None = None
is_88: bool = False
parent: str | None = None
section: str | None = None
# ── 分支树 ──
class BrSeq:
def __init__(self):
self.children = []
def add(self, child):
self.children.append(child)
class BrIf:
def __init__(self, condition):
self.condition = condition
self.cond_tree = None # 由 core.py 在解析时赋值
self.true_seq = BrSeq()
self.false_seq = BrSeq()
class BrEval:
def __init__(self, subject):
self.subject = subject
self.subjects = [] # ALSO 多主体: ['WS-A', 'WS-B'],空=普通模式
self.when_list = []
self.other_seq = BrSeq()
self.has_other = False
class BrPerform:
def __init__(self, perf_type, condition=None, target=None, thru=None, times=None,
varying_var=None, varying_from=None, varying_by=None):
self.perf_type = perf_type
self.condition = condition
self.target = target
self.thru = thru
self.times = times
self.varying_var = varying_var
self.varying_from = varying_from
self.varying_by = varying_by
self.body_seq = BrSeq()
class Assign:
"""赋值节点:MOVE/COMPUTE/ADD/SUBTRACT/MULTIPLY/DIVIDE"""
def __init__(self, target: str, source_info: dict):
self.target = target
self.source_info = source_info
class CallNode:
"""CALL 子程序调用节点(黑盒模式)"""
def __init__(self, program_name: str, using_params: list = None):
self.program_name = program_name
self.using_params = using_params or []
# using_params: [{"name": "WS-A", "mechanism": "reference"}, ...]
# mechanism: "reference" | "content" | "value"
# ── 条件树 ──
class CondLeaf:
def __init__(self, field, op, value):
self.field = field
self.op = op
self.value = value
class CondNot:
def __init__(self, child):
self.child = child
class CondAnd:
def __init__(self, left, right):
self.left = left
self.right = right
class CondOr:
def __init__(self, left, right):
self.left = left
self.right = right
class BrSearch:
"""SEARCH / SEARCH ALL 表查找"""
def __init__(self, table_name, is_all=False, varying=None):
self.table_name = table_name
self.is_all = is_all
self.varying = varying.upper() if varying else None
self.at_end_seq = BrSeq()
self.when_list = [] # [(condition_text, BrSeq)]
self.cond_trees = [] # [cond_tree, ...]
self.has_at_end = False
class GoTo:
"""GO TO 节点:无条件跳转到指定段落"""
def __init__(self, target: str, body_seq: 'BrSeq' = None):
self.target = target
self.body_seq = body_seq or BrSeq()
class ExitNode:
"""控制流退出节点:EXIT PARAGRAPH / EXIT PERFORM / EXIT SECTION / EXIT PROGRAM"""
def __init__(self, exit_type: str):
self.exit_type = exit_type
# ── 约束路径 ──
Constraint = tuple # (field, op, value, want_true)
Path = list[Constraint]
# ── 解析错误 ──
@dataclass
class ParseError:
line: int
message: str
severity: str = 'warning'
@dataclass
class ProcParseResult:
tree: BrSeq | None = None
assignments: dict = field(default_factory=dict)
errors: list[ParseError] = field(default_factory=list)
fallback_to_ai: bool = False
+118
View File
@@ -0,0 +1,118 @@
"""输出层:JSON输出(按文件分组入出力 + 工作存储区分)"""
import json
from pathlib import Path
_INVERSE_OP = {'>': '<=', '<': '>=', '=': '<>', '>=': '<', '<=': '>'}
def _scenario_text(path_cons):
parts = []
for c in path_cons:
if len(c) != 4:
continue
field, op, val, want = c
if op == 'not_in':
desc = f"{field} not in {val}" if want else f"{field} in {val}"
elif not want:
desc = f"{field} {_INVERSE_OP.get(op, '?' + op)} {val}"
else:
desc = f"{field} {op} {val}"
parts.append(desc)
return ', '.join(parts)
def output_json(records, outpath, roles=None, fd_fields=None, field_to_fd=None,
open_dir=None, path_cons_list=None):
outpath.parent.mkdir(parents=True, exist_ok=True)
if not roles:
with open(outpath, 'w', encoding='utf-8') as f:
json.dump(records, f, ensure_ascii=False, indent=2)
return
# FD direction lookup
out = []
for i, rec in enumerate(records):
inp = {}
out_exp = {}
ws = {}
# Group by FD
if fd_fields and field_to_fd:
for fd_name, fds_set in fd_fields.items():
direction = (open_dir or {}).get(fd_name, '')
inp_block = {}
out_block = {}
for fname in fds_set:
if fname not in rec:
continue
r = roles.get(fname, 'unused')
val = rec[fname]
if direction in ('INPUT', 'I-O') and r in ('input', 'inout'):
inp_block[fname] = val
if direction in ('OUTPUT', 'I-O') and r in ('output', 'inout'):
out_block[fname] = val
if inp_block:
inp[fd_name] = inp_block
if out_block:
out_exp[fd_name] = out_block
# Working-storage: not belonging to any FD
for name, val in rec.items():
if not field_to_fd or name not in field_to_fd:
ws[name] = val
entry = {
'input': inp,
'expected_output': out_exp,
'working_storage': ws,
}
if path_cons_list and i < len(path_cons_list):
text = _scenario_text(path_cons_list[i])
if text:
entry['scenario'] = text
out.append(entry)
with open(outpath, 'w', encoding='utf-8') as f:
json.dump(out, f, ensure_ascii=False, indent=2)
def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, open_dir):
"""按 FD 名拆分出力入力 JSON 文件。
每个 INPUT / I-O 方向 FD 生成一个文件:{stem}_{fd_name}.json
内容为路径数 × 记录,每条只含该 FD 的入力字段值。
"""
input_fds = {}
for fd_name, fds_set in fd_fields.items():
direction = (open_dir or {}).get(fd_name, '')
if direction not in ('INPUT', 'I-O'):
continue
has_input = any(roles.get(fname, 'unused') in ('input', 'inout') for fname in fds_set)
if not has_input:
continue
input_fds[fd_name] = fds_set
if not input_fds:
return
outdir.mkdir(parents=True, exist_ok=True)
for fd_name, fds_set in input_fds.items():
fd_records = []
direction = (open_dir or {}).get(fd_name, '')
for rec in records:
fd_rec = {}
for fname in fds_set:
r = roles.get(fname, 'unused')
if direction in ('INPUT', 'I-O') and r in ('input', 'inout'):
if fname in rec:
fd_rec[fname] = rec[fname]
if fd_rec:
fd_records.append(fd_rec)
outpath = outdir / f'{stem}_{fd_name}.json'
with open(outpath, 'w', encoding='utf-8') as f:
json.dump(fd_records, f, ensure_ascii=False, indent=2)
+18 -3
View File
@@ -388,17 +388,32 @@ def parse_data_division(data_div_text: str) -> list[FieldDef]:
def parse_file_control(source: str) -> dict:
"""?? FILE-CONTROL??? {?????: ?????}"""
"""Parse FILE-CONTROL paragraph.
Returns dict:
{filename: {"assign_to": str, "organization": str | None}}
"""
m = re.search(r'FILE-CONTROL\.(.*?)(?=DATA\s+DIVISION|\Z)', source, re.DOTALL | re.IGNORECASE)
if not m:
return {}
fc = m.group(1)
result = {}
for m in re.finditer(
for sel_m in re.finditer(
r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+(["\'])(.*?)\2',
fc, re.IGNORECASE
):
result[m.group(1).upper()] = m.group(3).upper()
fname = sel_m.group(1).upper()
assign_to = sel_m.group(3).upper()
# Extract ORGANIZATION clause within this SELECT statement
org_m = re.search(
r'ORGANIZATION\s+(?:IS\s+)?(\w[\w-]*)',
sel_m.group(0), re.IGNORECASE
)
org = org_m.group(1).upper() if org_m else None
result[fname] = {
"assign_to": assign_to,
"organization": org,
}
return result