feat: complete INSPECT/SEARCH support, fix PERFORM/EVAL coverage marking

- Add INSPECT (TALLYING/REPLACING/CONVERTING) with BEFORE/AFTER INITIAL - Add SEARCH/SEARCH ALL with element-assignment path enumeration - Fix _mark_perform compound condition marking via evaluate_tree - Fix EVALUATE TRUE prior_false to collect all MC/DC false sets - Add impossible path filtering (Pass A.5) with trace-to-root conflict detection - Fix multi-line PERFORM VARYING parsing (VARYING/FROM/BY/UNTIL on separate lines) - Remove dead code: agents.py LLM parser (replaced by rule-based _BrParser) - 59 unit tests passing, 5 integration programs verified
add cobol_testgen module
2026-06-10 22:56:22 +08:00 · 2026-06-08 21:07:16 +08:00
22 changed files with 3584 additions and 979 deletions
@@ -0,0 +1,18 @@
 # cobol-java-v3
 ## 工作目录
 C:\Users\marye\Desktop\2026技术大赛\cobol-java-v3
 ## 我的模块
 cobol_testgen/
 ## 远程仓库
 https://gittea.dev/hangshuo652/cobol-java-v3
 ## 工作流程
 ```powershell
 cd "C:\Users\marye\Desktop\2026技术大赛\cobol-java-v3"
 git add cobol_testgen/
 git commit -m "描述修改"
 git push
 ```
@@ -0,0 +1,4 @@
 __pycache__/
 .pytest_cache/
 *.pyc
 test_output/
@@ -1,7 +1,6 @@
 """COBOL Test Data Generator — 模块化版入口"""
 import sys
 import re
 import logging
 from datetime import datetime
 from pathlib import Path
@@ -300,213 +299,3 @@ def main():
    if programs:
        generate_coverage_index(programs, outdir)
        logger.info(f"\n覆盖率总览：{outdir / 'coverage' / 'index.html'}")
 # ════════════════════════════════════════════
 # Phase 1: 可编程 API（供 orchestrator.py 调用）
 # ════════════════════════════════════════════
 def extract_structure(cobol_source: str) -> dict:
    """分析 COBOL 源码的结构，返回结构摘要。不生成测试数据，只做静态分析。
    Returns:
        dict with: paragraphs, decision_points, branch_tree, file_count,
                   open_directions, has_search_all, has_evaluate,
                   has_call, has_break, total_branches, total_paragraphs
    """
    preprocessed = preprocess(cobol_source)
    data_div = extract_data_division(preprocessed)
    data_fields = parse_data_division(data_div) if data_div else []
    fields_dict = []
    for idx, f in enumerate(data_fields):
        entry = {
            'name': f.name if f.name != 'FILLER' else f'FILLER_{idx + 1}',
            'level': f.level, 'pic': f.pic,
            'pic_info': {
                'type': f.pic_info.type if f.pic_info else 'unknown',
                'digits': f.pic_info.digits if f.pic_info else 0,
                'decimal': f.pic_info.decimal if f.pic_info else 0,
                'length': f.pic_info.length if f.pic_info else 0,
                'signed': f.pic_info.signed if f.pic_info else False,
            },
            'section': f.section, 'occurs': f.occurs_count,
            'occurs_depending': f.occurs_depending,
            'redefines': f.redefines, 'usage': f.usage,
        }
        if f.is_88:
            entry['is_88'] = True
            entry['parent'] = f.parent
            entry['value'] = f.value
            entry['values'] = f.values
        fields_dict.append(entry)
    fields_dict = expand_occurs(fields_dict)
    proc_div = extract_procedure_division(preprocessed)
    branch_tree = None
    assignments = {}
    if proc_div:
        branch_tree, assignments = build_branch_tree(proc_div, fields_dict)
    file_sec = parse_file_section(preprocessed)
    open_dir = scan_open_statements(proc_div) if proc_div else {}
    from .models import BrIf, BrEval, BrSeq
    decision_points = []
    total_branches = 0
    def _walk(node, counter):
        nonlocal total_branches
        if isinstance(node, BrIf):
            counter[0] += 1
            branches = 2
            decision_points.append({
                "id": counter[0], "kind": "IF",
                "label": str(node.condition)[:80], "branches": branches,
            })
            total_branches += branches
            _walk(node.true_seq, counter)
            _walk(node.false_seq, counter)
        elif isinstance(node, BrEval):
            counter[0] += 1
            n = len(node.when_list) + (1 if node.has_other else 0)
            decision_points.append({
                "id": counter[0], "kind": "EVALUATE",
                "label": str(node.subject)[:80], "branches": n,
            })
            total_branches += n
            for _, seq in node.when_list:
                _walk(seq, counter)
            _walk(node.other_seq, counter)
        elif isinstance(node, BrSeq):
            for child in node.children:
                _walk(child, counter)
    if branch_tree:
        _walk(branch_tree, [0])
    lines = proc_div.split('\n') if proc_div else []
    paragraphs = set()
    for line in lines:
        m = re.match(r'^\s*([A-Z0-9][A-Z0-9-]*)\.\s*$', line.strip())
        if m:
            paragraphs.add(m.group(1))
    return {
        "paragraphs": sorted(paragraphs) if paragraphs else [],
        "decision_points": decision_points,
        "branch_tree": branch_tree,
        "file_count": len(file_sec) if file_sec else 0,
        "open_directions": open_dir,
        "has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
        "has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
        "has_call": 'CALL' in cobol_source.upper(),
        "has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
        "total_branches": total_branches,
        "total_paragraphs": len(paragraphs),
        "branch_tree_obj": branch_tree,
    }
 def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:
    """根据 COBOL 源码生成覆盖所有路径的测试数据。
    Args:
        cobol_source: COBOL 程序源码文本
        structure: 可选，如果已调用 extract_structure() 可传入避免重复解析
    Returns:
        list[dict]: 测试数据记录列表，每条包含所有字段的值
    """
    if structure is None:
        structure = extract_structure(cobol_source)
    branch_tree = structure.get("branch_tree_obj")
    if branch_tree is None:
        return []
    preprocessed = preprocess(cobol_source)
    data_div = extract_data_division(preprocessed)
    data_fields = parse_data_division(data_div) if data_div else []
    fields_dict = []
    for f in data_fields:
        entry = {
            'name': f.name, 'level': f.level, 'pic': f.pic,
            'pic_info': {
                'type': f.pic_info.type if f.pic_info else 'unknown',
                'digits': f.pic_info.digits if f.pic_info else 0,
                'decimal': f.pic_info.decimal if f.pic_info else 0,
                'length': f.pic_info.length if f.pic_info else 0,
                'signed': f.pic_info.signed if f.pic_info else False,
            },
            'section': f.section, 'occurs': f.occurs_count,
            'occurs_depending': f.occurs_depending,
            'value': f.value, 'values': f.values,
            'redefines': f.redefines, 'usage': f.usage,
        }
        if f.is_88:
            entry['is_88'] = True
            entry['parent'] = f.parent
        fields_dict.append(entry)
    fields_dict = expand_occurs(fields_dict)
    proc_div = extract_procedure_division(preprocessed)
    _, assignments = build_branch_tree(proc_div, fields_dict)
    file_sec = parse_file_section(preprocessed)
    branch_paths = enum_paths(branch_tree, fields_dict)
    branch_paths = [(_filter_stop(c), a) for c, a in branch_paths]
    records, kept_paths = generate_records(branch_paths, fields_dict, assignments, file_sec=file_sec)
    return records
 def incremental_supplement(branch_tree, decision_gaps: list[int]) -> list[dict]:
    """针对未覆盖的决策点，增量生成补充测试数据。
    Args:
        branch_tree: extract_structure() 返回的 branch_tree 字段
        decision_gaps: 未覆盖的决策点 ID 列表，如 [1, 3, 5]
    Returns:
        list[dict]: 增量测试数据，格式与 generate_data() 兼容
    """
    from .models import BrIf, BrEval, BrSeq
    target_decisions = set(decision_gaps)
    found = []
    def _find_decisions(node, counter):
        if isinstance(node, BrIf):
            counter[0] += 1
            if counter[0] in target_decisions:
                found.append(("IF", node.condition))
            _find_decisions(node.true_seq, counter)
            _find_decisions(node.false_seq, counter)
        elif isinstance(node, BrEval):
            counter[0] += 1
            if counter[0] in target_decisions:
                found.append(("EVALUATE", node.subject))
            for _, seq in node.when_list:
                _find_decisions(seq, counter)
            _find_decisions(node.other_seq, counter)
        elif isinstance(node, BrSeq):
            for child in node.children:
                _find_decisions(child, counter)
    _find_decisions(branch_tree, [0])
    supplements = []
    for i, (kind, label) in enumerate(found):
        supplements.append({
            "_dec_id": f"incr_{i}",
            "_kind": kind,
            "_label": str(label)[:60],
        })
    return supplements
@@ -0,0 +1,4 @@
 """允许 python -m cobol_testgen 直接运行"""
 from cobol_testgen import main
 main()
@@ -0,0 +1,258 @@
 """条件层：COBOL条件表达式解析 + MC/DC枚举 + 约束合并"""
 import re
 from .models import CondLeaf, CondAnd, CondOr, CondNot, PicInfo
 # ── 条件解析 ──
 def _split_at_operator(text, operator):
    """Split text on operator word, respecting parentheses."""
    result = []
    current = []
    depth = 0
    # Normalize so parentheses are space-delimited tokens
    normalized = text.replace('(', ' ( ').replace(')', ' ) ')
    for token in normalized.split():
        if not token:
            continue
        if token == '(':
            depth += 1
            current.append(token)
        elif token == ')':
            depth -= 1
            current.append(token)
        elif token == operator and depth == 0:
            result.append(' '.join(current).strip())
            current = []
        else:
            current.append(token)
    result.append(' '.join(current).strip())
    return result
 def parse_single_condition(text, fields=None):
    """Parse 'AMOUNT > 1000' into ('AMOUNT', '>', '1000').
    Also handles subscripted fields: 'WS-ITEM(SUB) = 'A''.
    Also resolves 88-level condition names (e.g. STATUS-APPROVED → WS-TRAN-STATUS = 'A').
    Returns None if the condition contains AND/OR (compound).
    """
    if ' AND ' in text or ' OR ' in text:
        return None
    # Check if text is an 88-level condition name
    if fields:
        for f in fields:
            if f.get('is_88') and f['name'] == text.upper():
                return (f.get('parent', ''), '=', f.get('value', ''))
    m = re.match(
        r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
        text
    )
    if m:
        field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1))
        return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
    # Try arithmetic expression: e.g. A + B > C
    m = re.match(
        r"^(\w[\w\s+\-*/().-]+?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
        text
    )
    if m:
        field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)).strip()
        return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
    return None
 def parse_compound_condition(text, fields=None):
    """Parse a COBOL condition into a condition tree (AND/OR/LEAF).
    Handles AND > OR precedence and parentheses.
    """
    text = text.strip()
    if not text:
        return None
    # Normalize parentheses to be space-delimited for reliable tokenization
    text = text.replace('(', ' ( ').replace(')', ' ) ')
    text = re.sub(r'\s+', ' ', text).strip()
    # Strip outer parentheses
    if text.startswith('(') and text.endswith(')'):
        depth = 0
        wrapped = True
        for i, c in enumerate(text):
            if c == '(':
                depth += 1
            elif c == ')':
                depth -= 1
                if depth == 0 and i < len(text) - 1:
                    wrapped = False
                    break
        if wrapped:
            inner = parse_compound_condition(text[1:-1], fields)
            if inner:
                return inner
    # Split on OR (lowest precedence)
    parts = _split_at_operator(text, 'OR')
    if len(parts) > 1:
        node = parse_compound_condition(parts[0], fields)
        for p in parts[1:]:
            node = CondOr(node, parse_compound_condition(p, fields))
        return node
    # Split on AND
    parts = _split_at_operator(text, 'AND')
    if len(parts) > 1:
        node = parse_compound_condition(parts[0], fields)
        for p in parts[1:]:
            node = CondAnd(node, parse_compound_condition(p, fields))
        return node
    # NOT prefix (highest precedence, after AND/OR splitting)
    if text.upper().startswith('NOT '):
        inner = parse_compound_condition(text[4:].strip(), fields)
        return CondNot(inner) if inner else None
    # Leaf condition
    parsed = parse_single_condition(text, fields)
    if parsed:
        return CondLeaf(*parsed)
    return None
 def collect_leaves(tree):
    """Return list of all CondLeaf nodes in the tree."""
    if isinstance(tree, CondLeaf):
        return [tree]
    elif isinstance(tree, CondNot):
        return collect_leaves(tree.child)
    elif isinstance(tree, (CondAnd, CondOr)):
        return collect_leaves(tree.left) + collect_leaves(tree.right)
    return []
 def evaluate_tree(tree, assignment):
    """Evaluate condition tree given leaf→bool assignment dict."""
    if isinstance(tree, CondLeaf):
        return assignment[tree]
    elif isinstance(tree, CondNot):
        return not evaluate_tree(tree.child, assignment)
    elif isinstance(tree, CondAnd):
        return evaluate_tree(tree.left, assignment) and evaluate_tree(tree.right, assignment)
    elif isinstance(tree, CondOr):
        return evaluate_tree(tree.left, assignment) or evaluate_tree(tree.right, assignment)
    return False
 def is_field(name, fields):
    # Strip subscript: WS-ITEM-STATUS(WS-INDEX-VAR) -> WS-ITEM-STATUS
    bare = re.sub(r'\s*\(.*\)\s*$', '', name).strip()
    for f in fields:
        if f['name'] == bare.upper():
            return True
    return False
 # ── MC/DC ──
 def mcdc_sets(tree, fields=None):
    """Generate MC/DC constraint sets.
    Returns list of (constraints_list, decision_outcome) or None for simple conditions.
    Each constraint is (field, op, value, want_true).
    """
    leaves = collect_leaves(tree)
    n = len(leaves)
    if n <= 1:
        return None
    # Evaluate all 2^n truth assignments
    all_results = []
    for bits in range(1 << n):
        assignment = {}
        for i, leaf in enumerate(leaves):
            assignment[leaf] = bool(bits & (1 << i))
        result = evaluate_tree(tree, assignment)
        all_results.append((assignment, result))
    # For each leaf, find a pair showing independent effect on decision
    needed_pairs = {}
    for leaf in leaves:
        for a1, r1 in all_results:
            if leaf in needed_pairs:
                break
            for a2, r2 in all_results:
                if a1[leaf] != a2[leaf] and r1 != r2:
                    if all(a1[o] == a2[o] for o in leaves if o != leaf):
                        needed_pairs[leaf] = (dict(a1), r1, dict(a2), r2)
                        break
    # Convert leaf assignments to constraint tuples
    result = []
    added = set()
    for leaf, (a1, r1, a2, r2) in needed_pairs.items():
        for assignment, decision in [(a1, r1), (a2, r2)]:
            key = frozenset((l, assignment[l]) for l in leaves)
            if key not in added:
                added.add(key)
                constraints = []
                for l in leaves:
                    want = assignment[l]
                    constraints.append((l.field, l.op, l.value, want))
                result.append((constraints, decision))
    return result
 # ── 值计算 ──
 def satisfying_value(field_info: dict, operator: str, value, want_true: bool) -> str:
    ftype = field_info.get('type', 'unknown')
    digits = field_info.get('digits', 0)
    decimal = field_info.get('decimal', 0)
    total = digits + decimal
    if ftype == 'numeric':
        try:
            val_str = str(value)
            val_float = float(val_str)
            val_int = int(val_float * (10 ** decimal) + 0.5)
        except (ValueError, TypeError):
            val_int = 0
        if want_true:
            if operator == '>':
                val_int = val_int + 1
            elif operator in ('>=', '=', '<='):
                pass
            elif operator == '<':
                val_int = max(0, val_int - 1)
            elif operator == '<>':
                val_int = (val_int + 1) % (10 ** total)
        else:
            if operator in ('>', '>='):
                val_int = 0
            elif operator == '=':
                val_int = (val_int + 1) % (10 ** total)
            elif operator == '<':
                pass
            elif operator == '<=':
                val_int = val_int + 1
            elif operator == '<>':
                pass
        val_int = val_int % (10 ** total)
        int_part = str(val_int // (10 ** decimal)).zfill(digits)
        dec_part = str(val_int % (10 ** decimal)).zfill(decimal)
        if decimal == 0:
            return int_part
        return int_part + dec_part
    elif ftype in ('alphanumeric', 'alphabetic'):
        length = field_info.get('length', 1)
        base_chr = value[0].upper() if isinstance(value, str) and value else 'A'
        if want_true:
            if operator in ('=', '=='):
                return base_chr.ljust(length, base_chr)
            elif operator in ('<>', '!='):
                other = chr(65 + (ord(base_chr) - 64) % 26)
                return other.ljust(length, other)
        else:
            if operator in ('=', '=='):
                other = chr(65 + (ord(base_chr) - 64) % 26)
                return other.ljust(length, other)
            elif operator in ('<>', '!='):
                return base_chr.ljust(length, base_chr)
    return '0'.zfill(total)
@@ -0,0 +1,1649 @@
 """核心层：PROCEDURE DIVISION解析 + 数据流追踪"""
 import re
 import logging
 from datetime import datetime
 from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
 from .cond import parse_compound_condition, parse_single_condition, collect_leaves
 logger = logging.getLogger(__name__)
 _COBOL_SCOPE_ENDERS = {
    'END-IF', 'END-EVALUATE', 'END-PERFORM', 'END-EXEC', 'END-CALL',
    'END-READ', 'END-WRITE', 'END-DELETE', 'END-REWRITE', 'END-START',
    'END-SEARCH',
    'ELSE', 'WHEN', 'OTHER',
 }
 def scan_paragraphs(raw_lines):
    paragraphs = {}
    i = 0
    while i < len(raw_lines):
        line = raw_lines[i].strip()
        m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
        sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
        if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
            name = m.group(1)
        elif sec_m:
            name = sec_m.group(1).upper()
        else:
            i += 1
            continue
        start = i + 1
        j = i + 1
        while j < len(raw_lines):
            nline = raw_lines[j].strip()
            nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
            if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
                break
            if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
                break
            j += 1
        paragraphs[name] = (start, j - 1)
        i = j
    return paragraphs
 def build_branch_tree(proc_text, fields=None):
    raw_lines = proc_text.split('\n')
    paragraphs = scan_paragraphs(raw_lines)
    first_para_name = None
    first_para_idx = None
    for i, line in enumerate(raw_lines):
        clean = line.strip()
        m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', clean)
        if m and m.group(1) in paragraphs:
            first_para_name = m.group(1)
            first_para_idx = i
            break
    if first_para_name:
        before = raw_lines[:first_para_idx]
        has_code = any(
            l.strip() and 'PROCEDURE DIVISION' not in l
            for l in before
        )
        if has_code:
            main_raw = raw_lines[:first_para_idx]
        else:
            p_start, p_end = paragraphs[first_para_name]
            main_raw = raw_lines[p_start:p_end + 1]
    else:
        main_raw = raw_lines
    filtered = [l for l in main_raw if l.strip()]
    assignments = {}
    parser = _BrParser(filtered, paragraphs, raw_lines, assignments, fields)
    tree = parser.parse_seq(terminators={'GOBACK', 'STOP RUN', 'EXIT PROGRAM'})
    return tree, assignments
 # ── 定数 ──
 _FIGURATIVE_CONSTANTS = frozenset({
    'ZERO', 'ZEROS', 'ZEROES',
    'SPACE', 'SPACES',
    'HIGH-VALUE', 'HIGH-VALUES',
    'LOW-VALUE', 'LOW-VALUES',
 })
 # ── _BrParser ──
 class _BrParser:
    def __init__(self, lines, paragraphs=None, raw_lines=None, assignments=None, fields=None, goto_depth=0):
        self.lines = lines
        self.pos = 0
        self.paragraphs = paragraphs or {}
        self.raw_lines = raw_lines or lines
        # assignments is a dict[str, list[dict]] — append, never overwrite
        self.assignments = assignments if assignments is not None else {}
        self.fields = fields
        self._goto_depth = goto_depth
    def peek(self):
        if self.pos < len(self.lines):
            return self.lines[self.pos].strip()
        return ''
    def clean(self):
        return self.peek().rstrip('.').strip()
    def advance(self):
        self.pos += 1
    def parse_seq(self, end_tokens=None, end_check=None, terminators=None):
        if end_tokens is None:
            end_tokens = []
        seq = BrSeq()
        while self.pos < len(self.lines):
            line = self.clean()
            if self._is_end(line, end_tokens, end_check):
                return seq
            if terminators and line in terminators:
                self.advance()
                return seq
            m_goto = re.match(r'^GO\s+TO\s+(\w[\w-]*)\s*$', line)
            if m_goto:
                goto_node = self._parse_goto(m_goto.group(1))
                if goto_node:
                    seq.add(goto_node)
                while self.pos < len(self.lines):
                    cl = self.clean()
                    if self._is_end(cl, end_tokens, end_check):
                        break
                    if cl in _COBOL_SCOPE_ENDERS:
                        break
                    self.advance()
                return seq
            m_exit = re.match(r'^EXIT\s+(PARAGRAPH|PERFORM|SECTION)\s*$', line)
            if m_exit:
                self.advance()
                seq.add(ExitNode(m_exit.group(1)))
                while self.pos < len(self.lines):
                    cl = self.clean()
                    if self._is_end(cl, end_tokens, end_check):
                        break
                    if cl in _COBOL_SCOPE_ENDERS:
                        break
                    self.advance()
                return seq
            m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
            if m:
                seq.add(self._parse_if())
                continue
            m = re.match(r'^EVALUATE\s+(.+?)\s*$', line)
            if m:
                seq.add(self._parse_evaluate())
                continue
            m = re.match(r'^PERFORM\s+', line)
            if m:
                perf_node = self._parse_perform()
                if perf_node:
                    seq.add(perf_node)
                continue
            m_search = re.match(r'^SEARCH\b(?:\s+(ALL))?\s+(\w[\w-]*)(?:\s+VARYING\s+(\w[\w-]*))?', line, re.IGNORECASE)
            if m_search:
                seq.add(self._parse_search(m_search))
                continue
            m = re.match(r'^INITIALIZE\s+', line)
            if m:
                init_seq = self._parse_initialize()
                if init_seq:
                    seq.add(init_seq)
                continue
            m_str = re.match(r'^STRING\s+', line)
            if m_str:
                str_seq = self._parse_string()
                if str_seq:
                    seq.add(str_seq)
                continue
            m_unstr = re.match(r'^UNSTRING\s+', line)
            if m_unstr:
                unstr_seq = self._parse_unstring()
                if unstr_seq:
                    seq.add(unstr_seq)
                continue
            m = re.match(r'^CALL\s+', line)
            if m:
                seq.add(self._parse_call())
                continue
            m = re.match(
                r'^ACCEPT\s+(\w[\w-]*)(?:\s+FROM\s+(DATE|TIME|DAY|DAY-OF-WEEK|YEAR|YYYYMMDD|HHMMSS))?\s*$',
                line, re.IGNORECASE
            )
            if m:
                tgt = m.group(1).strip().upper()
                from_type = (m.group(2) or 'USER').upper()
                info = {'type': 'accept', 'from': from_type}
                self.assignments.setdefault(tgt, []).append(info)
                seq.add(Assign(tgt, info))
                self.advance()
                continue
            m = re.match(r'^READ\s+(\w[\w-]*)\s+INTO\s+(\w[\w-]*)\s*$', line, re.IGNORECASE)
            if m:
                tgt = m.group(2).strip().upper()
                info = {'type': 'read_into', 'file': m.group(1).strip().upper(), 'source_vars': []}
                self.assignments.setdefault(tgt, []).append(info)
                seq.add(Assign(tgt, info))
                self.advance()
                # 跳过 READ 语句剩余行（AT END / NOT AT END / END-READ）
                while self.pos < len(self.lines):
                    cl = self.clean()
                    if cl in ('END-READ', 'END-READ.'):
                        self.advance()
                        break
                    self.advance()
                continue
            m_set_false = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+FALSE\s*$', line, re.IGNORECASE)
            if m_set_false:
                seq.add(self._parse_set_false(m_set_false.group(1)))
                continue
            m = re.match(r'^(?:WRITE|REWRITE)\s+(\w[\w-]*)(?:\s+FROM\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
            if m:
                rec_name = m.group(1).strip().upper()
                if m.group(2):
                    tgt = m.group(2).strip().upper()
                    info = {'type': 'write_from', 'file': rec_name, 'source_vars': [tgt]}
                    self.assignments.setdefault(tgt, []).append(info)
                    seq.add(Assign(tgt, info))
                else:
                    seq.add(Assign(rec_name, {'type': 'write_bare', 'file': rec_name}))
                self.advance()
                continue
            m_set = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+TRUE\s*$', line, re.IGNORECASE)
            if m_set:
                seq.add(self._parse_set_true(m_set.group(1)))
                continue
            m_insp = re.match(r'^INSPECT\s+', line, re.IGNORECASE)
            if m_insp:
                info = self._parse_inspect(line)
                if info:
                    tgt = info.get('tgt', '')
                    self.assignments.setdefault(tgt, []).append(info)
                    seq.add(Assign(tgt, info))
                self.advance()
                continue
            assign_node = self._record_assignment(line)
            if assign_node:
                seq.add(assign_node)
            self.advance()
        return seq
    def _is_end(self, line, end_tokens, end_check):
        if end_check and end_check(line):
            return True
        for tok in end_tokens:
            if line == tok or line.startswith(tok + ' '):
                return True
        return False
    # ── INSPECT ──
    _PIC_FIG_CONV = {'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
                     'SPACE': ' ', 'SPACES': ' '}
    @staticmethod
    def _expand_figurative(val):
        if val.upper() in _BrParser._PIC_FIG_CONV:
            return _BrParser._PIC_FIG_CONV[val.upper()]
        return val
    def _parse_inspect_phrase(self, phrase):
        m = re.match(
            r'TALLYING\s+(\w[\w-]*)\s+FOR\s+'
            r'(LEADING|TRAILING|CHARACTERS)'
            r'(?:\s+([\'"])(.*?)\3)?'
            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\6)?\s*$',
            phrase, re.IGNORECASE
        )
        if m:
            return ('tally', {
                'count_var': m.group(1).upper(),
                'kind': m.group(2).upper(),
                'char': self._expand_figurative(m.group(4) or ''),
                'before_after': (m.group(5) or '').upper(),
                'delimiter': self._expand_figurative(m.group(7) or ''),
            })
        m = re.match(
            r'REPLACING\s+'
            r'(ALL|LEADING|FIRST|CHARACTERS)\s+'
            r'([\'"])(.*?)\2\s+BY\s+'
            r'([\'"])(.*?)\4'
            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\7)?\s*$',
            phrase, re.IGNORECASE
        )
        if m:
            return ('replace', {
                'kind': m.group(1).upper(),
                'src': self._expand_figurative(m.group(3)),
                'dst': self._expand_figurative(m.group(5)),
                'before_after': (m.group(6) or '').upper(),
                'delimiter': self._expand_figurative(m.group(8) or ''),
            })
        m = re.match(
            r'CONVERTING\s+([\'"])(.*?)\1\s+TO\s+([\'"])(.*?)\3\s*$',
            phrase, re.IGNORECASE
        )
        if m:
            return ('convert', {
                'from_chars': self._expand_figurative(m.group(2)),
                'to_chars': self._expand_figurative(m.group(4)),
            })
        return None
    def _parse_inspect(self, line):
        m = re.match(r'^INSPECT\s+(\w[\w-]*)\s+(.+)$', line, re.IGNORECASE)
        if not m:
            return None
        tgt = m.group(1).upper()
        rest = m.group(2).strip()
        phrases = re.split(r'\s+(?=(?:TALLYING|REPLACING|CONVERTING)\b)', rest, flags=re.IGNORECASE)
        sub_ops = []
        for phrase in phrases:
            sub = self._parse_inspect_phrase(phrase.strip())
            if sub:
                sub_ops.append(sub)
        if not sub_ops:
            return None
        return {
            'type': 'inspect',
            'tgt': tgt,
            'source_vars': [tgt],
            'sub_ops': sub_ops,
        }
    def _record_assignment(self, line):
        if self.assignments is None:
            return None
        # MOVE
        m = re.match(r'^MOVE\s+(.+?)\s+TO\s+(.+?)\s*$', line)
        if m:
            raw_src = m.group(1).strip()
            tgt = m.group(2).strip()
            # 保留下标：WS-CODE-VAL(1) → key='WS-CODE-VAL(1)'
            m_tgt = re.match(r'^([A-Z][A-Z0-9-]*)(?:\s*\(([^)]*)\))?\s*$', tgt, re.IGNORECASE)
            if not m_tgt:
                return None
            tgt_base = m_tgt.group(1).upper()
            if m_tgt.group(2):
                subscript = re.sub(r'\s*', '', m_tgt.group(2))
                tgt_key = f"{tgt_base}({subscript})"
            else:
                tgt_key = tgt_base
            src_clean = raw_src.strip("'").strip('"')
            is_field_name = self.fields and any(f['name'] == src_clean for f in self.fields)
            if is_field_name:
                info = {'type': 'move', 'source_vars': [src_clean]}
            else:
                info = {'type': 'move_literal', 'literal': src_clean}
            self.assignments.setdefault(tgt_key, []).append(info)
            return Assign(tgt_key, info)
        # COMPUTE
        m = re.match(r'^COMPUTE\s+(.+?)(?:\s+ROUNDED)?\s*=\s*(.*)$', line)
        if m:
            tgt_raw = m.group(1).strip()
            expr = m.group(2).strip()
            m_tgt = re.match(r'^([A-Z][A-Z0-9-]*)(?:\s*\(([^)]*)\))?\s*$', tgt_raw, re.IGNORECASE)
            tgt_key = tgt_raw
            if m_tgt:
                tgt_base = m_tgt.group(1).upper()
                if m_tgt.group(2):
                    subscript = re.sub(r'\s*', '', m_tgt.group(2))
                    tgt_key = f"{tgt_base}({subscript})"
                else:
                    tgt_key = tgt_base
            if not expr:
                peek_pos = self.pos + 1
                if peek_pos < len(self.lines):
                    nxt = self.lines[peek_pos].strip().rstrip('.').strip()
                    if nxt and not re.match(r'^(PERFORM|END-|IF|ELSE|EVALUATE|WHEN|OTHER|MOVE|COMPUTE|ADD|SUBTRACT|MULTIPLY|DIVIDE|STRING|UNSTRING|READ|WRITE|INITIALIZE|ACCEPT|CALL|GO\s*TO|GOBACK|STOP|EXIT)', nxt, re.IGNORECASE):
                        expr = nxt
            if expr:
                info = self._parse_compute_expr(tgt_key, expr)
                self.assignments.setdefault(tgt_key, []).append(info)
                return Assign(tgt_key, info)
        # ADD x TO y  → y = y + x  (支持变量和常量源)
        m = re.match(r'^ADD\s+(\w[\w-]*)\s+TO\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
        if m:
            src = m.group(1).strip()
            tgt = m.group(2).strip()
            is_field = self.fields and any(f['name'] == src for f in self.fields)
            if is_field:
                info = {'type': 'compute', 'source_vars': [tgt, src],
                        'op': '+', 'const': None, 'expr': f'{tgt} + {src}'}
            else:
                try:
                    const = float(src)
                    info = {'type': 'compute', 'source_vars': [tgt],
                            'op': '+', 'const': const, 'expr': f'{tgt} + {const}'}
                except ValueError:
                    return None
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # ADD x TO y GIVING z  → z = y + x
        m = re.match(r'^ADD\s+(.+?)\s+TO\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
        if m:
            raw_a = m.group(1).strip()
            src_b = m.group(2).strip()
            tgt = m.group(3).strip()
            is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
            if is_field_a:
                info = {'type': 'compute', 'source_vars': [src_b, raw_a],
                        'op': '+', 'const': None, 'expr': f'{src_b} + {raw_a}'}
            else:
                try:
                    const = float(raw_a)
                    info = {'type': 'compute', 'source_vars': [src_b],
                            'op': '+', 'const': const, 'expr': f'{src_b} + {const}'}
                except ValueError:
                    return None
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # ADD a[, b[, c...]] GIVING z  → z = a + b + c + ...
        m = re.match(r'^ADD\s+(.+?)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
        if m:
            raw_parts = re.findall(r'[A-Z][A-Z0-9-]*|\d+(?:\.\d+)?', m.group(1).upper())
            fields_only = []
            const_sum = 0.0
            for p in raw_parts:
                if self.fields and any(f['name'] == p for f in self.fields):
                    fields_only.append(p)
                else:
                    try:
                        const_sum += float(p)
                    except ValueError:
                        pass
            tgt = m.group(2).strip()
            if not fields_only:
                info = {'type': 'move_literal',
                        'literal': str(int(const_sum)) if const_sum == int(const_sum) else str(const_sum)}
            else:
                info = {'type': 'compute', 'source_vars': fields_only,
                        'op': '+', 'const': const_sum if const_sum != 0 else None,
                        'expr': '+'.join(fields_only) + (f' + {const_sum}' if const_sum else '')}
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # SUBTRACT x FROM y  → y = y - x
        m = re.match(r'^SUBTRACT\s+([\d.]+)\s+FROM\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
        if m:
            const = float(m.group(1))
            tgt = m.group(2).strip()
            info = {'type': 'compute', 'source_vars': [tgt],
                    'op': '-', 'const': const, 'expr': f'{tgt} - {const}'}
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # SUBTRACT a FROM b GIVING z  → z = b - a
        m = re.match(r'^SUBTRACT\s+([\d.\w-]*)\s+FROM\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
        if m:
            raw_a = m.group(1).strip()
            src_b = m.group(2).strip()
            tgt = m.group(3).strip()
            is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
            if is_field_a:
                info = {'type': 'compute', 'source_vars': [src_b, raw_a],
                        'op': '-', 'const': None, 'expr': f'{src_b} - {raw_a}'}
            else:
                try:
                    const = float(raw_a)
                    info = {'type': 'compute', 'source_vars': [src_b],
                            'op': '-', 'const': const, 'expr': f'{src_b} - {const}'}
                except ValueError:
                    return None
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # MULTIPLY x BY y  → y = y * x
        m = re.match(r'^MULTIPLY\s+([\d.]+)\s+BY\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
        if m:
            const = float(m.group(1))
            tgt = m.group(2).strip()
            info = {'type': 'compute', 'source_vars': [tgt],
                    'op': '*', 'const': const, 'expr': f'{tgt} * {const}'}
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # MULTIPLY a BY b GIVING z  → z = a * b
        m = re.match(r'^MULTIPLY\s+(\w[\w-]*)\s+BY\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line, re.IGNORECASE)
        if m:
            src_a = m.group(1).strip()
            src_b = m.group(2).strip()
            tgt = m.group(3).strip()
            is_field_a = self.fields and any(f['name'] == src_a for f in self.fields)
            if is_field_a:
                info = {'type': 'compute', 'source_vars': [src_a, src_b],
                        'op': '*', 'const': None, 'expr': f'{src_a} * {src_b}'}
            else:
                try:
                    const = float(src_a)
                    info = {'type': 'compute', 'source_vars': [src_b],
                            'op': '*', 'const': const, 'expr': f'{const} * {src_b}'}
                except ValueError:
                    return None
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # DIVIDE x INTO y  → y = y / x
        m = re.match(r'^DIVIDE\s+([\d.]+)\s+INTO\s+(\w[\w-]*?)(?:\s+ROUNDED)?\s*$', line)
        if m:
            const = float(m.group(1))
            tgt = m.group(2).strip()
            info = {'type': 'compute', 'source_vars': [tgt],
                    'op': '/', 'const': const, 'expr': f'{tgt} / {const}'}
            self.assignments.setdefault(tgt, []).append(info)
            return Assign(tgt, info)
        # DIVIDE a INTO b GIVING z  → z = b / a
        # Optional REMAINDER r → r = b - (b / a) * a
        m = re.match(r'^DIVIDE\s+(.+?)\s+INTO\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?(?:\s+REMAINDER\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
        if m:
            raw_a = m.group(1).strip()
            src_b = m.group(2).strip()
            tgt = m.group(3).strip()
            rem_tgt = m.group(4).strip().upper() if m.group(4) else None
            is_field_a = self.fields and any(f['name'] == raw_a for f in self.fields)
            if is_field_a:
                info = {'type': 'compute', 'source_vars': [src_b, raw_a],
                        'op': '/', 'const': None, 'expr': f'{src_b} / {raw_a}'}
                rem_info = {'type': 'compute', 'source_vars': [src_b, raw_a],
                            'op': 'rem', 'const': None, 'expr': f'REM({src_b} / {raw_a})'}
            else:
                try:
                    const = float(raw_a)
                    info = {'type': 'compute', 'source_vars': [src_b],
                            'op': '/', 'const': const, 'expr': f'{src_b} / {const}'}
                    rem_info = {'type': 'compute', 'source_vars': [src_b],
                                'op': 'rem', 'const': const, 'expr': f'REM({src_b} / {const})'}
                except ValueError:
                    return None
            self.assignments.setdefault(tgt, []).append(info)
            seq = BrSeq()
            seq.add(Assign(tgt, info))
            if rem_tgt:
                self.assignments.setdefault(rem_tgt, []).append(rem_info)
                seq.add(Assign(rem_tgt, rem_info))
            return seq
        # DIVIDE a BY b GIVING z  → z = a / b
        # Optional REMAINDER r → r = a - (a / b) * b
        m = re.match(r'^DIVIDE\s+(\w[\w-]*)\s+BY\s+(\w[\w-]*)\s+GIVING\s+(\w[\w-]*?)(?:\s+ROUNDED)?(?:\s+REMAINDER\s+(\w[\w-]*))?\s*$', line, re.IGNORECASE)
        if m:
            src_a = m.group(1).strip()
            src_b = m.group(2).strip()
            tgt = m.group(3).strip()
            rem_tgt = m.group(4).strip().upper() if m.group(4) else None
            info = {'type': 'compute', 'source_vars': [src_a, src_b],
                    'op': '/', 'const': None, 'expr': f'{src_a} / {src_b}'}
            rem_info = {'type': 'compute', 'source_vars': [src_a, src_b],
                        'op': 'rem', 'const': None, 'expr': f'REM({src_a} / {src_b})'}
            self.assignments.setdefault(tgt, []).append(info)
            seq = BrSeq()
            seq.add(Assign(tgt, info))
            if rem_tgt:
                self.assignments.setdefault(rem_tgt, []).append(rem_info)
                seq.add(Assign(rem_tgt, rem_info))
            return seq
        return None
    def _parse_compute_expr(self, target, expr):
        # const OP var
        m = re.match(r'^\s*([\d.]+)\s*([+\-*/])\s*(\w[\w-]*)\s*$', expr)
        if m:
            const, op, var = float(m.group(1)), m.group(2), m.group(3)
            return {'type': 'compute', 'source_vars': [var], 'op': op, 'const': const, 'expr': expr}
        # var OP const
        m = re.match(r'^\s*(\w[\w-]*)\s*([+\-*/])\s*([\d.]+)\s*$', expr)
        if m:
            var, op, const = m.group(1), m.group(2), float(m.group(3))
            return {'type': 'compute', 'source_vars': [var], 'op': op, 'const': const, 'expr': expr}
        # var OP var
        m = re.match(r'^\s*(\w[\w-]*)\s*([+\-*/])\s*(\w[\w-]*)\s*$', expr)
        if m:
            var1, op, var2 = m.group(1), m.group(2), m.group(3)
            return {'type': 'compute', 'source_vars': [var1, var2], 'op': op, 'expr': expr}
        # complex expression — extract variable names only
        vars_in = re.findall(r'[A-Z][A-Z0-9-]*', expr.upper())
        return {'type': 'compute', 'source_vars': list(set(vars_in)), 'op': None, 'const': None, 'expr': expr}
    # ── SEARCH / SEARCH ALL ──
    def _parse_search(self, m):
        is_all = bool(m.group(1))
        table = m.group(2).upper()
        varying = m.group(3).upper() if m.group(3) else None
        node = BrSearch(table, is_all=is_all, varying=varying)
        self.advance()
        while self.pos < len(self.lines):
            line = self.clean()
            if line in ('END-SEARCH', 'END-SEARCH.'):
                self.advance()
                return node
            m_at = re.match(r'^AT\s+END(.+)?$', line, re.IGNORECASE)
            if m_at:
                self.advance()
                rest = m_at.group(1)
                if rest and rest.strip():
                    self.lines.insert(self.pos, rest.strip())
                node.at_end_seq = self.parse_seq(
                    end_check=lambda l: re.match(r'^WHEN\b', l) or l in ('END-SEARCH',)
                )
                node.has_at_end = True
                continue
            m_when = re.match(r'^WHEN\s+(.+?)\s*$', line, re.IGNORECASE)
            if m_when:
                cond_upper = m_when.group(1).strip()
                self.advance()
                cond_tree = parse_compound_condition(cond_upper, self.fields)
                body_seq = self.parse_seq(
                    end_check=lambda l: re.match(r'^(WHEN|AT\s+END)\b', l) or l in ('END-SEARCH',)
                )
                node.when_list.append((cond_upper, body_seq))
                node.cond_trees.append(cond_tree)
                continue
            self.advance()
        return node
    def _parse_if(self):
        line = self.clean()
        m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
        cond_text = m.group(1).strip()
        self.advance()
        # Join continuation lines (multi-line IF conditions)
        while self.pos < len(self.lines):
            peek = self.clean()
            if re.match(r'^(THEN|ELSE|END-IF|MOVE|IF|PERFORM|EVALUATE|COMPUTE|CALL|STRING|UNSTRING|INITIALIZE|ADD|SUBTRACT|MULTIPLY|DIVIDE|GO\b|EXIT\b)', peek, re.IGNORECASE):
                break
            if peek.endswith('.'):
                cond_text += ' ' + peek.rstrip('.')
                self.advance()
                break
            cond_text += ' ' + peek
            self.advance()
        # Consume optional THEN on its own line
        if self.pos < len(self.lines):
            peek = self.clean()
            if peek == 'THEN':
                self.advance()
        node = BrIf(cond_text)
        node.cond_tree = parse_compound_condition(node.condition, self.fields)
        node.true_seq = self.parse_seq(['ELSE', 'END-IF'])
        if self.clean() == 'ELSE':
            self.advance()
            node.false_seq = self.parse_seq(['END-IF'])
        if self.clean() == 'END-IF':
            self.advance()
        return node
    def _parse_evaluate(self):
        line = self.clean()
        m = re.match(r'^EVALUATE\s+(.+?)\s*$', line)
        raw_subject = m.group(1).strip()
        node = BrEval(raw_subject)
        if ' ALSO ' in raw_subject:
            node.subjects = [s.strip() for s in re.split(r'\s+ALSO\s+', raw_subject)]
        self.advance()
        while self.pos < len(self.lines):
            line = self.clean()
            if line == 'END-EVALUATE':
                self.advance()
                return node
            m = re.match(r'^WHEN\s+(.+?)\s*$', line)
            if m:
                raw_val = m.group(1).strip().strip("'").strip('"')
                self.advance()
                # Capture multi-line WHEN conditions (AND/OR continuation)
                while self.pos < len(self.lines):
                    peek = self.clean()
                    if re.match(r'^(?:AND|OR)\b', peek, re.IGNORECASE):
                        raw_val += ' ' + peek
                        self.advance()
                    else:
                        break
                if raw_val == 'OTHER':
                    node.other_seq = self.parse_seq(end_check=lambda l: l == 'END-EVALUATE')
                    node.has_other = True
                else:
                    case_seq = self.parse_seq(end_check=lambda l: l.startswith('WHEN') or l == 'END-EVALUATE')
                    if node.subjects:
                        vals = [v.strip().strip("'").strip('"')
                                for v in re.split(r'\s+ALSO\s+', raw_val)]
                        node.when_list.append((vals, case_seq))
                    else:
                        node.when_list.append((raw_val, case_seq))
                continue
            self.advance()
        return node
    def _parse_perform(self):
        line = self.clean()
        m = re.match(r'^PERFORM\s+UNTIL\s+(.+?)\s*$', line)
        if m:
            node = BrPerform('until', condition=m.group(1).strip())
            self.advance()
            node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
            if self.clean() == 'END-PERFORM':
                self.advance()
            return node
        m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+UNTIL\s+(.+?)\s*$', line)
        if m:
            target = m.group(1).strip()
            node = BrPerform('para_until', target=target, condition=m.group(2).strip())
            self.advance()
            self._inline_perform(node, target)
            return node
        m = re.match(r'^PERFORM\s+(\d+)\s+TIMES\s*$', line)
        if m:
            node = BrPerform('times', times=int(m.group(1)))
            self.advance()
            return node
        m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+THRU\s+(\w[\w-]*)\s*$', line)
        if m:
            node = BrPerform('thru', target=m.group(1).strip(), thru=m.group(2).strip())
            self.advance()
            self._inline_perform(node, node.target, node.thru)
            return node
        m = re.match(r'^PERFORM\s+VARYING\s+(\w[\w-]*)\s+FROM\s+(\S+)\s+BY\s+(\S+)(?:\s+UNTIL\s+(.+))?\s*$', line)
        if m:
            varying_var = m.group(1).strip()
            from_val = m.group(2).strip()
            by_val = m.group(3).strip()
            condition = m.group(4).strip() if m.group(4) else None
            if not condition:
                save_pos = self.pos
                self.advance()
                while self.pos < len(self.lines):
                    nxt = self.clean()
                    cm = re.match(r'^UNTIL\s+(.+)$', nxt)
                    if cm:
                        condition = cm.group(1).strip()
                        self.advance()
                        break
                    fm = re.match(r'^FROM\s+(\S+)\s+BY\s+(\S+)$', nxt)
                    if fm:
                        from_val = fm.group(1).strip()
                        by_val = fm.group(2).strip()
                        self.advance()
                        continue
                    self.pos = save_pos
                    break
            if condition:
                node = BrPerform('varying', condition=condition,
                                 varying_var=varying_var,
                                 varying_from=from_val,
                                 varying_by=by_val)
                # condition from regex (single-line) → advance past PERFORM line
                # condition from while-loop (multi-line) → already advanced past FROM/BY/UNTIL
                if m.group(4):
                    self.advance()
                node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
                if self.clean() == 'END-PERFORM':
                    self.advance()
                return node
            self.pos = save_pos
        # PERFORM VARYING var — FROM/BY/UNTIL all on subsequent lines
        m = re.match(r'^PERFORM\s+VARYING\s+(\w[\w-]*)\s*$', line)
        if m:
            varying_var = m.group(1).strip()
            save_pos = self.pos
            self.advance()
            from_val = by_val = condition = None
            while self.pos < len(self.lines):
                nxt = self.clean()
                fm = re.match(r'^FROM\s+(\S+)\s+BY\s+(\S+)$', nxt)
                if fm:
                    from_val, by_val = fm.group(1).strip(), fm.group(2).strip()
                    self.advance()
                    continue
                um = re.match(r'^UNTIL\s+(.+)$', nxt)
                if um:
                    condition = um.group(1).strip()
                    self.advance()
                    break
                break
            if from_val and by_val and condition:
                node = BrPerform('varying', condition=condition,
                                 varying_var=varying_var,
                                 varying_from=from_val,
                                 varying_by=by_val)
                node.body_seq = self.parse_seq(end_check=lambda l: l == 'END-PERFORM')
                if self.clean() == 'END-PERFORM':
                    self.advance()
                return node
            self.pos = save_pos
        m = re.match(r'^PERFORM\s+(\w[\w-]*)\s+VARYING\s+(\w[\w-]*)\s+FROM\s+(\S+)\s+BY\s+(\S+)(?:\s+UNTIL\s+(.+))?\s*$', line)
        if m:
            target = m.group(1).strip()
            varying_var = m.group(2).strip()
            from_val = m.group(3).strip()
            by_val = m.group(4).strip()
            condition = m.group(5).strip() if m.group(5) else None
            if not condition:
                save_pos = self.pos
                self.advance()
                while self.pos < len(self.lines):
                    nxt = self.clean()
                    cm = re.match(r'^UNTIL\s+(.+)$', nxt)
                    if cm:
                        condition = cm.group(1).strip()
                        self.advance()
                        break
                    self.pos = save_pos
                    break
            if condition:
                node = BrPerform('para_varying', target=target,
                                 condition=condition,
                                 varying_var=varying_var,
                                 varying_from=from_val,
                                 varying_by=by_val)
                self.advance()
                self._inline_perform(node, node.target)
                return node
            self.pos = save_pos
        m = re.match(r'^PERFORM\s+(\w[\w-]*)\s*$', line)
        if m:
            target = m.group(1).strip()
            node = BrPerform('para', target=target)
            self.advance()
            self._inline_perform(node, target)
            return node
        self.advance()
        return None
    def _inline_perform(self, node, target, thru=None):
        if thru:
            if target in self.paragraphs and thru in self.paragraphs:
                start = self.paragraphs[target][0]
                end = self.paragraphs[thru][1]
                all_lines = []
                for name, (s, e) in self.paragraphs.items():
                    if s >= start and e <= end:
                        all_lines.extend(self.raw_lines[s:e + 1])
                sub = _BrParser(
                    [l for l in all_lines if l.strip()],
                    self.paragraphs, self.raw_lines, self.assignments, self.fields
                )
                node.body_seq = sub.parse_seq()
        elif target in self.paragraphs:
            start, end = self.paragraphs[target]
            para_lines = self.raw_lines[start:end + 1]
            sub = _BrParser(
                [l for l in para_lines if l.strip()],
                self.paragraphs, self.raw_lines, self.assignments, self.fields
            )
            node.body_seq = sub.parse_seq()
    def _parse_initialize(self):
        line = self.clean()
        m = re.match(r'^INITIALIZE\s+(.+?)\s*$', line)
        if not m:
            self.advance()
            return None
        rest = m.group(1).strip()
        # Split off REPLACING clause
        parts = re.split(r'\s+REPLACING\s+', rest, maxsplit=1, flags=re.IGNORECASE)
        target_str = parts[0].strip()
        targets = re.findall(r'[A-Z][A-Z0-9-]*', target_str)
        # Parse REPLACING: (NUMERIC|ALPHANUMERIC|ALPHABETIC) DATA BY literal
        replacing = {}
        if len(parts) > 1:
            pairs = re.findall(
                r'(NUMERIC|ALPHANUMERIC-EDITED|NUMERIC-EDITED|ALPHANUMERIC|ALPHABETIC)\s+DATA\s+BY\s+(\S+)',
                parts[1], re.IGNORECASE
            )
            for ptype, literal in pairs:
                replacing[ptype.upper()] = literal.strip("'").strip('"')
        seq = BrSeq()
        for tgt in targets:
            info = {'type': 'initialize'}
            if replacing:
                info['replacing'] = replacing
            self.assignments.setdefault(tgt, []).append(info)
            seq.add(Assign(tgt, info))
        self.advance()
        return seq
    def _parse_string(self):
        parts = [self.clean()]
        self.advance()
        while self.pos < len(self.lines):
            cl = self.clean()
            if cl == 'END-STRING':
                self.advance()
                break
            parts.append(cl)
            self.advance()
        full = ' '.join(parts)
        m = re.match(r'^STRING\s+(.+)\s+INTO\s+(\w[\w-]*)\s*$', full, re.IGNORECASE | re.DOTALL)
        if not m:
            return None
        source_part = m.group(1).strip()
        target = m.group(2).strip()
        source_vars = re.findall(r'[A-Z][A-Z0-9-]*', source_part)
        info = {'type': 'string_concat', 'source_vars': source_vars}
        self.assignments.setdefault(target, []).append(info)
        seq = BrSeq()
        seq.add(Assign(target, info))
        return seq
    def _parse_unstring(self):
        parts = [self.clean()]
        self.advance()
        while self.pos < len(self.lines):
            cl = self.clean()
            if cl == 'END-UNSTRING':
                self.advance()
                break
            parts.append(cl)
            self.advance()
        full = ' '.join(parts)
        m = re.match(r'^UNSTRING\s+(.+?)\s+INTO\s+(.+?)\s*$', full, re.IGNORECASE | re.DOTALL)
        if not m:
            return None
        source_part = m.group(1).strip()
        targets_part = m.group(2).strip()
        source_vars = re.findall(r'[A-Z][A-Z0-9-]*', source_part)
        targets = re.findall(r'[A-Z][A-Z0-9-]*', targets_part)
        source_var = source_vars[0] if source_vars else ''
        seq = BrSeq()
        for tgt in targets:
            info = {'type': 'unstring_split', 'source_vars': [source_var], 'index': targets.index(tgt)}
            self.assignments.setdefault(tgt, []).append(info)
            seq.add(Assign(tgt, info))
        return seq
    def _parse_call(self):
        line = self.clean()
        m = re.match(r'^CALL\s+(\S+?)(?:\s+USING\s+(.+))?\s*$', line)
        if not m:
            self.advance()
            return BrSeq()
        prog = m.group(1).strip("'\"").upper()
        params = []
        if m.group(2):
            rest = m.group(2)
            # 逐 segment 解析: BY mechanism names...
            current = "reference"  # COBOL 默认 BY REFERENCE
            for seg in re.split(r'\s+(?=BY\s+(?:REFERENCE|CONTENT|VALUE)\s+)',
                                rest, flags=re.IGNORECASE):
                seg = seg.strip()
                m_mech = re.match(
                    r'BY\s+(REFERENCE|CONTENT|VALUE)\s+(.*)', seg, re.IGNORECASE
                )
                if m_mech:
                    current = m_mech.group(1).lower()
                    names_text = m_mech.group(2)
                else:
                    names_text = seg
                for nm in re.findall(r'\w[\w-]*', names_text):
                    params.append({"name": nm.upper(), "mechanism": current})
        node = CallNode(prog, using_params=params)
        self.advance()
        return node
    def _parse_goto(self, target):
        node = GoTo(target)
        if self._goto_depth < 10 and target in self.paragraphs:
            start, end = self.paragraphs[target]
            para_lines = self.raw_lines[start:end + 1]
            sub = _BrParser(
                [l for l in para_lines if l.strip()],
                self.paragraphs, self.raw_lines, self.assignments, self.fields,
                goto_depth=self._goto_depth + 1
            )
            node.body_seq = sub.parse_seq()
        self.advance()
        return node
    def _parse_set_true(self, name):
        name = name.upper()
        parent = None
        value = None
        if self.fields:
            for f in self.fields:
                if f.get('is_88') and f['name'] == name:
                    parent = f.get('parent', '')
                    value = f.get('value', '')
                    break
        info = {'type': 'set_true', '88_name': name, 'value': value}
        tgt = parent or name
        if parent:
            self.assignments.setdefault(tgt, []).append(info)
        self.advance()
        return Assign(tgt, info)
    def _parse_set_false(self, name):
        name = name.upper()
        parent = None
        value = None
        if self.fields:
            for f in self.fields:
                if f.get('is_88') and f['name'] == name:
                    parent = f.get('parent', '')
                    value = f.get('value', '')
                    break
        # FALSE 值 = 88-level VALUE 的反值
        if value:
            false_val = 'N' if value == 'Y' else ('Y' if value == 'N' else ' ')
        else:
            false_val = 'N'
        info = {'type': 'move_literal', 'literal': false_val}
        tgt = parent or name
        self.assignments.setdefault(tgt, []).append(info)
        self.advance()
        return Assign(tgt, info)
 # ── 工具函数 ──
 def _basename(name: str) -> str:
    """去除下标后缀，如 WS-TABLE(1) → WS-TABLE"""
    return re.sub(r'\s*\(.*?\)\s*$', '', name).strip()
 def _init_child_names(group_name: str, fields: list) -> list:
    """递归收集 group 下所有非 88 级子字段的扁平名列表"""
    result = []
    grp_level = None
    found = False
    for f in fields:
        if not found and f['name'] == group_name:
            grp_level = f.get('level', 0)
            found = True
            continue
        if found:
            if f.get('level', 0) <= grp_level or f.get('level') == 77:
                break
            if f.get('is_88') or f.get('redefines'):
                continue
            if not f.get('pic_info') or f['pic_info'].get('type') == 'unknown':
                result.extend(_init_child_names(f['name'], fields))
            else:
                result.append(f['name'])
    return result
 # ── 数据流追踪 ──
 def trace_to_root(field_name, assignments, fields, path_assign=None):
    seen = set()
    var = field_name
    chain = []
    while var in assignments and var not in seen:
        seen.add(var)
        if path_assign and var in path_assign:
            asgn_list = path_assign[var]
            if isinstance(asgn_list, list):
                asgn = asgn_list[-1]
                for a in reversed(asgn_list):
                    sv = a.get('source_vars', [])
                    if len(sv) == 1 and sv[0] == var:
                        continue
                    asgn = a
                    break
            else:
                asgn = asgn_list
        else:
            asgn_list = assignments[var]
            asgn = asgn_list[-1]
            if isinstance(asgn_list, list):
                for a in reversed(asgn_list):
                    sv = a.get('source_vars', [])
                    if len(sv) == 1 and sv[0] == var:
                        continue
                    asgn = a
                    break
        chain.append((var, asgn))
        if not asgn.get('source_vars'):
            break
        sv = asgn['source_vars']
        if len(sv) == 1:
            next_var = sv[0]
            if next_var == var:
                break
            var = next_var
            if next_var not in assignments:
                break
        elif len(sv) >= 2 and asgn.get('op') == '+':
            # 多源加法：取第一个源变量继续追溯
            var = sv[0]
        else:
            break
    return var, chain
 def invert_through_chain(root_var, chain, operator, value):
    op = operator
    try:
        val = float(value)
    except (ValueError, TypeError):
        return root_var, op, value
    for var, asgn in reversed(chain):
        if asgn['type'] == 'move':
            continue
        sv = asgn.get('source_vars', [])
        if asgn['type'] == 'compute' and asgn['op'] is not None:
            if len(sv) == 1:
                c = asgn['const']
                inv = {'+': '-', '-': '+', '*': '/', '/': '*'}[asgn['op']]
                if inv == '/':
                    val = val / c if c != 0 else val
                elif inv == '*':
                    val = val * c
                elif inv == '-':
                    val = val - c
                elif inv == '+':
                    val = val + c
            elif len(sv) >= 2 and asgn['op'] == '+':
                # 多源加法：追溯第一个源变量，值不变（忽略其他源）
                pass
    if val == int(val):
        return root_var, op, str(int(val))
    return root_var, op, str(val)
 FIGURATIVE_NUMERIC = {
    'ZERO': 0.0, 'ZEROS': 0.0, 'ZEROES': 0.0,
    'SPACE': 0.0, 'SPACES': 0.0,
    'HIGH-VALUE': None, 'HIGH-VALUES': None,
    'LOW-VALUE': 0.0, 'LOW-VALUES': 0.0,
 }
 FIGURATIVE_ALPHA = {
    'SPACE': ' ', 'SPACES': ' ',
    'HIGH-VALUE': chr(255), 'HIGH-VALUES': chr(255),
    'LOW-VALUE': chr(0), 'LOW-VALUES': chr(0),
 }
 def _resolve_subscript(key, rec):
    """将变量下标解析为具体值：WS-FIXED-KEY(WS-IDX) → WS-FIXED-KEY(1) if WS-IDX=1 in rec"""
    m = re.match(r'^(\w[\w-]*)\((\w[\w-]*)\)$', key)
    if m:
        base, var = m.groups()
        if var in rec:
            try:
                return f'{base}({int(rec[var])})'
            except (ValueError, TypeError):
                pass
    return key
 def _apply_before_after(val, before_after, delimiter):
    if not delimiter:
        return val
    if before_after == 'BEFORE':
        idx = val.find(delimiter)
        return val[:idx] if idx >= 0 else val
    if before_after == 'AFTER':
        idx = val.find(delimiter)
        return val[idx + len(delimiter):] if idx >= 0 else ''
    return val
 def propagate_assignments(rec, assignments, fields, file_sec=None):
    def raw_to_float(val, pi):
        if pi.get('type') == 'numeric':
            digits = pi.get('digits', 0)
            decimal = pi.get('decimal', 0)
            total = digits + decimal
            s = str(val)
            neg = s.startswith('-')
            if neg:
                s = s[1:]
            s = s.zfill(total)
            int_part = s[:digits] if digits else '0'
            dec_part = s[digits:] if decimal > 0 else '0'
            result = float(int(int_part or '0') + int(dec_part or '0') / (10 ** decimal))
            return -result if neg else result
        try:
            return float(val)
        except (ValueError, TypeError):
            return 0.0
    def float_to_raw(val, pi):
        if pi.get('type') == 'numeric':
            digits = pi.get('digits', 0)
            decimal = pi.get('decimal', 0)
            signed = pi.get('signed', False)
            scaled = int(round(val * (10 ** decimal)))
            if not signed and scaled < 0:
                scaled = 0
            capped = abs(scaled) % (10 ** (digits + decimal))
            int_part = str(capped // (10 ** decimal)).zfill(digits)
            dec_part = str(capped % (10 ** decimal)).zfill(decimal)
            result = int_part + (dec_part if decimal > 0 else '')
            if signed and scaled < 0:
                result = '-' + result
            return result
        return str(val)
    def literal_to_raw(literal, pi):
        ftype = pi.get('type', 'unknown')
        if ftype == 'numeric':
            key = literal.upper()
            if key in FIGURATIVE_NUMERIC:
                v = FIGURATIVE_NUMERIC[key]
                if v is None:
                    digits = pi.get('digits', 0)
                    decimal = pi.get('decimal', 0)
                    v = 10 ** (digits + decimal) - 1
                return float_to_raw(v, pi)
            try:
                return float_to_raw(float(literal), pi)
            except ValueError:
                return float_to_raw(0.0, pi)
        if ftype in ('alphanumeric', 'alphabetic'):
            key = literal.upper()
            if key in FIGURATIVE_ALPHA:
                ch = FIGURATIVE_ALPHA[key]
                return ch[0].ljust(pi.get('length', 1), ch[0])
            return literal.ljust(pi.get('length', len(literal)))[:pi.get('length', len(literal))]
        return literal
    pi_map = {f['name']: f.get('pic_info', {}) for f in fields}
    if file_sec is None:
        file_sec = {}
    # Flatten: {tgt: [info1, info2]} → [(tgt, info1), (tgt, info2)]
    flat_list = []
    for tgt, asgn_val in assignments.items():
        if isinstance(asgn_val, list):
            for asgn in asgn_val:
                flat_list.append((tgt, asgn))
        elif isinstance(asgn_val, dict):
            flat_list.append((tgt, asgn_val))
    _MAX_CONVERGE = 20
    # 识别有"锚定赋值"(非自引用赋值，如 MOVE  literal 或不同字段的 MOVE) 的 target
    _anchored = set()
    for tgt, asgn in flat_list:
        if asgn.get('type') != 'compute':
            _anchored.add(tgt)
        else:
            sv = asgn.get('source_vars', [])
            if not (len(sv) == 1 and sv[0] == tgt) and not (len(sv) >= 2 and tgt == sv[0]):
                _anchored.add(tgt)
    for _converge_iter in range(_MAX_CONVERGE):
        _old = dict(rec)
        # Pass 1: variable-to-variable MOVE
        for tgt, asgn in flat_list:
            if asgn['type'] == 'move' and asgn['source_vars']:
                src = asgn['source_vars'][0]
                resolved_tgt = _resolve_subscript(tgt, rec)
                resolved_src = _resolve_subscript(src, rec)
                if resolved_src in rec:
                    rec[resolved_tgt] = rec[resolved_src]
        # Pass 2: literal MOVE
        for tgt, asgn in flat_list:
            if asgn['type'] == 'move_literal':
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi = pi_map.get(resolved_tgt, {})
                rec[resolved_tgt] = literal_to_raw(asgn['literal'], pi)
        # Pass 3: INITIALIZE
        for tgt, asgn in flat_list:
            if asgn['type'] == 'initialize':
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi = pi_map.get(resolved_tgt, {})
                ftype = pi.get('type', 'unknown')
                replacing = asgn.get('replacing', {})
                if replacing:
                    mapped = replacing.get(ftype.upper(), None)
                    if mapped:
                        rec[resolved_tgt] = literal_to_raw(mapped, pi)
                    else:
                        if ftype == 'numeric':
                            rec[resolved_tgt] = float_to_raw(0.0, pi)
                        else:
                            rec[resolved_tgt] = literal_to_raw('SPACE', pi)
                else:
                    if ftype == 'numeric':
                        rec[resolved_tgt] = float_to_raw(0.0, pi)
                    else:
                        rec[resolved_tgt] = literal_to_raw('SPACE', pi)
        # Pass 3.5: READ INTO
        for tgt, asgn in flat_list:
            if asgn['type'] == 'read_into':
                fname = asgn.get('file', '')
                if fname in file_sec:
                    fd_children = _init_child_names(file_sec[fname][0], fields)
                    ws_children = _init_child_names(tgt, fields)
                    for ws_c in ws_children:
                        fd_candidate = ws_c
                        if ws_c.startswith('WS-'):
                            fd_candidate = ws_c[3:]
                        if fd_candidate in rec:
                            rec[ws_c] = rec[fd_candidate]
                        else:
                            idx = ws_children.index(ws_c)
                            if idx < len(fd_children) and fd_children[idx] in rec:
                                rec[ws_c] = rec[fd_children[idx]]
                    rec[tgt] = ''.join(str(rec.get(c, '')) for c in ws_children)
        # Pass 4: COMPUTE
        for tgt, asgn in flat_list:
            if asgn['type'] == 'compute' and asgn['source_vars'] and asgn['op'] is not None:
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi_tgt = pi_map.get(resolved_tgt, {})
                if len(asgn['source_vars']) == 1:
                    src = asgn['source_vars'][0]
                    resolved_src = _resolve_subscript(src, rec)
                    # 无锚定的自引用 COMPUTE（如 ADD 1 TO X）：只在第 0 轮应用一次
                    if resolved_tgt == resolved_src and tgt not in _anchored and _converge_iter > 0:
                        continue
                    if resolved_src in rec:
                        sv = raw_to_float(rec[resolved_src], pi_map.get(resolved_src, {}))
                        c = asgn.get('const', 0)
                        if asgn['op'] == 'rem':
                            quotient = int(sv / c) if c != 0 else 0
                            result = sv - quotient * c
                        else:
                            result = {'+': sv + c, '-': sv - c, '*': sv * c, '/': sv / c if c != 0 else sv}[asgn['op']]
                        rec[resolved_tgt] = float_to_raw(result, pi_tgt)
                elif len(asgn['source_vars']) == 2:
                    v1, v2 = asgn['source_vars']
                    resolved_v1 = _resolve_subscript(v1, rec)
                    resolved_v2 = _resolve_subscript(v2, rec)
                    # 无锚定的自引用 COMPUTE（如 ADD X TO Y 且 Y 无前置 MOVE）
                    if resolved_tgt == resolved_v1 and tgt not in _anchored and _converge_iter > 0:
                        continue
                    if resolved_v1 in rec and resolved_v2 in rec:
                        sv1 = raw_to_float(rec[resolved_v1], pi_map.get(resolved_v1, {}))
                        sv2 = raw_to_float(rec[resolved_v2], pi_map.get(resolved_v2, {}))
                        if asgn['op'] == 'rem':
                            quotient = int(sv1 / sv2) if sv2 != 0 else 0
                            result = sv1 - quotient * sv2
                        else:
                            result = {'+': sv1 + sv2, '-': sv1 - sv2, '*': sv1 * sv2, '/': sv1 / sv2 if sv2 != 0 else sv1}[asgn['op']]
                        rec[resolved_tgt] = float_to_raw(result, pi_tgt)
                elif len(asgn['source_vars']) >= 3 and asgn['op'] == '+':
                    total = 0
                    all_found = True
                    for v in asgn['source_vars']:
                        resolved_v = _resolve_subscript(v, rec)
                        if resolved_v in rec:
                            total += raw_to_float(rec[resolved_v], pi_map.get(resolved_v, {}))
                        else:
                            all_found = False
                            break
                    if all_found:
                        rec[resolved_tgt] = float_to_raw(total, pi_tgt)
        # Pass 4.5: INSPECT
        for tgt, asgn in flat_list:
            if asgn['type'] != 'inspect':
                continue
            resolved_tgt = _resolve_subscript(tgt, rec)
            if resolved_tgt not in rec:
                continue
            src_val = str(rec[resolved_tgt])
            for op_type, params in asgn.get('sub_ops', []):
                if op_type == 'tally':
                    cv = params['count_var'].upper()
                    cv_pi = pi_map.get(cv, {})
                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
                    cnt = 0
                    if params['kind'] == 'LEADING':
                        cnt = len(effective) - len(effective.lstrip(params['char']))
                    elif params['kind'] == 'TRAILING':
                        cnt = len(effective) - len(effective.rstrip(params['char']))
                    else:
                        cnt = len(effective)
                    if cv_pi.get('type') == 'numeric':
                        rec[cv] = float_to_raw(float(cnt), cv_pi)
                elif op_type == 'replace':
                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
                    if params['kind'] == 'ALL':
                        new_val = effective.replace(params['src'], params['dst'])
                    elif params['kind'] == 'LEADING':
                        new_val = effective
                        while new_val.startswith(params['src']):
                            new_val = new_val[len(params['src']):]
                        new_val = effective.replace(params['src'], params['dst'], 1)
                    elif params['kind'] == 'FIRST':
                        new_val = effective.replace(params['src'], params['dst'], 1)
                    else:
                        new_val = params['dst'] * len(effective)
                    rec[resolved_tgt] = new_val
                elif op_type == 'convert':
                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
                    table = str.maketrans(params['from_chars'], params['to_chars'])
                    rec[resolved_tgt] = effective.translate(table)
        # Pass 5: STRING / UNSTRING
        for tgt, asgn in flat_list:
            if asgn['type'] == 'string_concat':
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi = pi_map.get(resolved_tgt, {})
                parts = []
                for v in asgn.get('source_vars', []):
                    resolved_v = _resolve_subscript(v, rec)
                    if resolved_v in rec:
                        parts.append(str(rec[resolved_v]))
                val = ''.join(parts)
                if pi.get('type') in ('alphanumeric', 'alphabetic'):
                    val = val.ljust(pi.get('length', len(val)))[:pi.get('length', len(val))]
                rec[resolved_tgt] = val
            elif asgn['type'] == 'unstring_split':
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi = pi_map.get(resolved_tgt, {})
                src_var = asgn.get('source_vars', [None])[0]
                resolved_src = _resolve_subscript(src_var, rec) if src_var else None
                idx = asgn.get('index', 0)
                if resolved_src and resolved_src in rec:
                    src_val = str(rec[resolved_src])
                    ftype = pi.get('type', 'unknown')
                    if idx == 0:
                        val = src_val
                    else:
                        val = ' ' if ftype in ('alphanumeric', 'alphabetic') else '0'
                    if ftype in ('alphanumeric', 'alphabetic'):
                        val = val.ljust(pi.get('length', len(val)))[:pi.get('length', len(val))]
                    rec[resolved_tgt] = val
        # Pass 6: READ INTO / WRITE FROM
        for tgt, asgn in flat_list:
            if asgn['type'] == 'read_into':
                fname = asgn.get('file', '')
                if fname in file_sec:
                    children = _init_child_names(file_sec[fname][0], fields)
                    rec[tgt] = ''.join(str(rec.get(c, '')) for c in children)
            elif asgn['type'] == 'write_from':
                buf = tgt
                rec_name = asgn.get('file', '')
                children = _init_child_names(rec_name, fields)
                if children:
                    src = str(rec.get(buf, ''))
                    pos = 0
                    for c in children:
                        pi = pi_map.get(c, {})
                        length = pi.get('digits', 0) + pi.get('decimal', 0) or pi.get('length', 0)
                        if length > 0:
                            chunk = src[pos:pos + length]
                            if not chunk:
                                chunk = '0' if pi.get('type') == 'numeric' else ' '
                            rec[c] = chunk.ljust(length)
                            pos += length
        # Pass 7: ACCEPT
        for tgt, asgn in flat_list:
            if asgn['type'] == 'accept':
                resolved_tgt = _resolve_subscript(tgt, rec)
                pi = pi_map.get(resolved_tgt, {})
                ftype = pi.get('type', 'unknown')
                total = pi.get('digits', 0) + pi.get('decimal', 0)
                length = pi.get('length', 0)
                from_type = asgn.get('from', 'USER')
                val = None
                if from_type == 'DATE':
                    val = '20260603'
                elif from_type == 'TIME':
                    val = '120000'
                elif from_type == 'DAY':
                    val = '2026154'
                elif from_type == 'DAY-OF-WEEK':
                    val = '3'
                elif from_type == 'YEAR':
                    val = '2026'
                if val is not None:
                    if ftype == 'numeric':
                        rec[resolved_tgt] = val.zfill(total)
                    else:
                        rec[resolved_tgt] = val.ljust(length)[:length] if length else val
        # Pass 8: SET var TO TRUE (88-level)
        for tgt, asgn in flat_list:
            if asgn['type'] == 'set_true':
                resolved_tgt = _resolve_subscript(tgt, rec)
                val = asgn.get('value', '1')
                pi = pi_map.get(resolved_tgt, {})
                ftype = pi.get('type', 'unknown')
                if ftype in ('alphanumeric', 'alphabetic'):
                    length = pi.get('length', len(str(val)))
                    rec[resolved_tgt] = str(val)[0].ljust(length)[:length]
                else:
                    total = pi.get('digits', 0) + pi.get('decimal', 0)
                    rec[resolved_tgt] = str(val).zfill(max(total, 1))
        if rec == _old:
            break
    else:
        logger.warning(f"propagate_assignments 未收敛（{_MAX_CONVERGE} 次迭代后仍有变化）")
 def classify_field_roles(tree, assignments, fields, source=None, proc_text=None):
    """分析分支树和赋值记录，分类各字段的入出力角色。
    优先级：FD/OPEN 方向 > 静态分析
    返回 {字段名: 'input'|'output'|'inout'|'unused'}.
    """
    # Phase 0: FD/OPEN 方向解析
    fd_roles = {}
    if source and proc_text:
        from .read import parse_file_control, parse_file_section, scan_open_statements
        file_ctl = parse_file_control(source)
        file_sec = parse_file_section(source)
        open_dir = scan_open_statements(proc_text)
        for iname, direction in open_dir.items():
            if iname in file_sec:
                for rec_name in file_sec[iname]:
                    if direction == 'INPUT':
                        fd_roles[rec_name] = 'input'
                    elif direction == 'OUTPUT':
                        fd_roles[rec_name] = 'output'
                    elif direction == 'I-O':
                        fd_roles[rec_name] = 'inout'
        # 传播到子字段
        for rec_name, role in list(fd_roles.items()):
            for child in _init_child_names(rec_name, fields):
                fd_roles[child] = role
    counts = {f['name']: {'read': 0, 'write': 0} for f in fields}
    def _walk(node):
        if isinstance(node, BrIf):
            if node.cond_tree:
                for leaf in collect_leaves(node.cond_tree):
                    name = _basename(leaf.field)
                    if name in counts:
                        counts[name]['read'] += 1
            _walk(node.true_seq)
            _walk(node.false_seq)
        elif isinstance(node, BrEval):
            name = _basename(node.subject)
            if name in counts:
                counts[name]['read'] += 1
            for _, seq in node.when_list:
                _walk(seq)
            _walk(node.other_seq)
        elif isinstance(node, BrPerform):
            if node.condition:
                parsed = parse_single_condition(node.condition)
                if parsed:
                    name = _basename(parsed[0])
                    if name in counts:
                        counts[name]['read'] += 1
            if node.varying_var:
                name = _basename(node.varying_var)
                if name in counts:
                    counts[name]['write'] += 1
            _walk(node.body_seq)
        elif isinstance(node, CallNode):
            for p in node.using_params:
                name = _basename(p.get("name", ""))
                mechanism = p.get("mechanism", "reference")
                if name in counts:
                    counts[name]["read"] += 1
                    if mechanism.lower() == "reference":
                        counts[name]["write"] += 1
        elif isinstance(node, Assign):
            tgt_base = _basename(node.target)
            atype = node.source_info.get('type')
            if atype == 'read_into':
                if tgt_base in counts:
                    counts[tgt_base]['write'] += 1
            elif atype == 'write_from':
                if tgt_base in counts:
                    counts[tgt_base]['read'] += 1
            elif atype == 'set_true':
                if tgt_base in counts:
                    counts[tgt_base]['write'] += 1
            else:
                if tgt_base in counts:
                    counts[tgt_base]['write'] += 1
                for v in node.source_info.get('source_vars', []):
                    v_base = _basename(v)
                    if v_base in counts:
                        counts[v_base]['read'] += 1
                if atype == 'initialize' and tgt_base in counts:
                    for child in _init_child_names(tgt_base, fields):
                        if child in counts:
                            counts[child]['write'] += 1
        elif isinstance(node, BrSeq):
            for c in node.children:
                _walk(c)
    _walk(tree)
    # Phase extra: ACCEPT / DISPLAY (proc_text 扫描)
    if proc_text:
        for m in re.finditer(r'ACCEPT\s+(\w[\w-]*)', proc_text):
            name = _basename(m.group(1).upper())
            if name in counts:
                counts[name]['write'] += 1
        for m in re.finditer(r'DISPLAY\s+(\w[\w-]*)', proc_text):
            name = _basename(m.group(1).upper())
            if name in counts:
                counts[name]['read'] += 1
    # LINKAGE 字段默认 input（未使用时不改变）
    for f in fields:
        if f.get('section') == 'LINKAGE':
            name = f['name']
            if name in counts and counts[name]['read'] == 0 and counts[name]['write'] == 0:
                counts[name]['read'] = 1
    result = {}
    for name, c in counts.items():
        if name in fd_roles:
            result[name] = fd_roles[name]
            continue
        if c['read'] > 0 and c['write'] > 0:
            result[name] = 'inout'
        elif c['write'] > 0:
            result[name] = 'output'
        elif c['read'] > 0:
            result[name] = 'input'
        else:
            result[name] = 'unused'
    # 确保 FD 记录字段也出现（即使不在 fields 中—应不会）
    for name, role in fd_roles.items():
        if name not in result:
            result[name] = role
    return result
@@ -1205,32 +1205,3 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
        '_decision_points': decision_points,
        '_leaf_stats': leaf_stats,
    }
 def check_coverage(structure: dict, test_records: list[dict]) -> dict:
    """报告 COBOL 源码的静态分支结构信息。
    注意: 静态分析无法精确判断每条测试数据运行时覆盖了哪些分支。
    精确的路径追踪依赖 gcov（Phase 3）。此处仅报告总分支数和记录生成情况。
    Returns:
        dict with: paragraph_rate, branch_rate, decision_rate, total_branches,
                   total_paragraphs, records_count, note
    """
    total_paragraphs = structure.get("total_paragraphs", 0)
    total_branches = structure.get("total_branches", 0)
    decision_points = structure.get("decision_points", [])
    has_data = len(test_records) > 0
    paragraph_rate = 1.0 if (total_paragraphs > 0 and has_data) else 0.0
    return {
        "paragraph_rate": paragraph_rate,
        "branch_rate": 0.0,
        "decision_rate": 0.0,
        "uncovered_decision_ids": [],
        "total_branches": total_branches,
        "total_paragraphs": total_paragraphs,
        "records_count": len(test_records),
        "note": "静态分析无法精确计算覆盖率。精确数据通过 gcov 获取（Phase 3）。",
    }
@@ -0,0 +1,894 @@
 """设计层：路径枚举 + 值生成 + 约束应用"""
 import re
 import logging
 from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, Assign, CallNode, CondNot, CondLeaf, ExitNode, GoTo
 from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, mcdc_sets, satisfying_value
 from .core import trace_to_root, invert_through_chain, propagate_assignments, _basename
 logger = logging.getLogger(__name__)
 _STOP = ('__STOP__', '', None, True)
 _MAX_PATHS = 10000
 def _filter_stop(cons):
    return [c for c in cons if c is not _STOP]
 def _cap_paths(paths):
    if len(paths) > _MAX_PATHS:
        return paths[:_MAX_PATHS]
    return paths
 def _cap_paths_fair(new_active, child_paths):
    """两阶段公平截断：每个前置路径至少保留一条子路径，再填充剩余配额。"""
    if len(new_active) <= _MAX_PATHS:
        return new_active
    k = len(child_paths)
    if k <= 1:
        return new_active[:_MAX_PATHS]
    # 分离 STOP 路径（不参与组合，直接保留）
    stop_paths = [(p, a) for p, a in new_active if any(c is _STOP for c in p)]
    combined = [(p, a) for p, a in new_active if not any(c is _STOP for c in p)]
    n_pred = len(combined) // k
    result = list(stop_paths)
    if n_pred <= 1:
        result.extend(combined[:_MAX_PATHS - len(result)])
        return result[:_MAX_PATHS]
    remaining_quota = _MAX_PATHS - len(result)
    # Phase 1: 每个前置至少保留一条子路径（轮询分配不同子路径索引）
    quota = min(n_pred, remaining_quota)
    selected = set()
    for p_idx in range(quota):
        c_idx = p_idx % k
        idx = p_idx * k + c_idx
        selected.add(idx)
        result.append(combined[idx])
    if len(result) >= _MAX_PATHS:
        return result[:_MAX_PATHS]
    # Phase 2: 用剩余配额填充其余组合
    remaining = _MAX_PATHS - len(result)
    for idx in range(len(combined)):
        if idx not in selected:
            result.append(combined[idx])
            remaining -= 1
            if remaining <= 0:
                break
    return result[:_MAX_PATHS]
 # ── 路径枚举 ──
 def enum_paths(node, fields):
    """枚举路径，每条路径返回 (constraints, assignments).
    返回 list[tuple[list[tuple], dict]].
    """
    if isinstance(node, Assign):
        return [([], {node.target: [node.source_info]})]
    if isinstance(node, BrSeq):
        if not node.children:
            return [([], {})]
        paths = [([], {})]
        for child in node.children:
            child_paths = _cap_paths(enum_paths(child, fields))
            new_active = []
            for p_cons, p_assign in paths:
                if any(c is _STOP for c in p_cons):
                    new_active.append((p_cons, p_assign))
                    continue
                for cp_cons, cp_assign in child_paths:
                    merged = {}
                    for d in (p_assign, cp_assign):
                        for k, v in d.items():
                            merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                    merged_cons = p_cons + list(cp_cons)
                    new_active.append((merged_cons, merged))
            paths = _cap_paths_fair(new_active, child_paths)
        return paths
    elif isinstance(node, BrIf):
        parsed = parse_single_condition(node.condition, fields)
        if parsed and is_field(parsed[0], fields):
            field, op, val = parsed
            paths = []
            true_sub = _cap_paths(enum_paths(node.true_seq, fields))
            for sp_cons, sp_assign in (true_sub or [([], {})]):
                paths.append(([(field, op, val, True)] + sp_cons, sp_assign))
            false_sub = _cap_paths(enum_paths(node.false_seq, fields))
            for fp_cons, fp_assign in (false_sub or [([], {})]):
                paths.append(([(field, op, val, False)] + fp_cons, fp_assign))
            return paths
        # CondNot wrapping a single leaf (e.g., IF NOT WS-AMOUNT > 1000)
        if node.cond_tree and isinstance(node.cond_tree, CondNot):
            child = node.cond_tree.child
            if isinstance(child, CondLeaf) and is_field(child.field, fields):
                paths = []
                true_sub = _cap_paths(enum_paths(node.true_seq, fields))
                for sp_cons, sp_assign in (true_sub or [([], {})]):
                    paths.append(([(child.field, child.op, child.value, False)] + sp_cons, sp_assign))
                false_sub = _cap_paths(enum_paths(node.false_seq, fields))
                for fp_cons, fp_assign in (false_sub or [([], {})]):
                    paths.append(([(child.field, child.op, child.value, True)] + fp_cons, fp_assign))
                return paths
        if node.cond_tree:
            leaves = collect_leaves(node.cond_tree)
            if leaves and all(is_field(l.field, fields) for l in leaves):
                sets = mcdc_sets(node.cond_tree, fields)
                if sets:
                    paths = []
                    for constraints, decision in sets:
                        body = _cap_paths(enum_paths(
                            node.true_seq if decision else node.false_seq, fields
                        ))
                        for sp_cons, sp_assign in (body or [([], {})]):
                            paths.append((constraints + sp_cons, sp_assign))
                    return paths
            # CondLeaf fallback: 单 leaf（含 88-level 解析后的条件树）MC/DC 不适用
            if len(leaves) == 1:
                leaf = leaves[0]
                paths = []
                true_sub = _cap_paths(enum_paths(node.true_seq, fields))
                for sp_cons, sp_assign in (true_sub or [([], {})]):
                    paths.append(([(leaf.field, leaf.op, leaf.value, True)] + sp_cons, sp_assign))
                false_sub = _cap_paths(enum_paths(node.false_seq, fields))
                for fp_cons, fp_assign in (false_sub or [([], {})]):
                    paths.append(([(leaf.field, leaf.op, leaf.value, False)] + fp_cons, fp_assign))
                return paths
        # Fallback: parsed condition but non-field (e.g. arithmetic expr)
        if parsed:
            field, op, val = parsed
            paths = []
            true_sub = enum_paths(node.true_seq, fields)
            for sp_cons, sp_assign in (true_sub or [([], {})]):
                paths.append(([(field, op, val, True)] + sp_cons, sp_assign))
            false_sub = enum_paths(node.false_seq, fields)
            for fp_cons, fp_assign in (false_sub or [([], {})]):
                paths.append(([(field, op, val, False)] + fp_cons, fp_assign))
            return paths
        return [([], {})]
    elif isinstance(node, BrEval):
        if node.subjects:
            paths = []
            prior_false_cons = []
            for values, seq in node.when_list:
                sub = _cap_paths(enum_paths(seq, fields))
                for sp_cons, sp_assign in (sub or [([], {})]):
                    when_cons = [(node.subjects[i], '=', values[i], True)
                                 for i in range(len(node.subjects))]
                    constraints = list(prior_false_cons) + when_cons + sp_cons
                    paths.append((constraints, sp_assign))
                for i in range(len(node.subjects)):
                    prior_false_cons.append((node.subjects[i], '=', values[i], False))
            if node.has_other:
                sub = _cap_paths(enum_paths(node.other_seq, fields))
                for sp_cons, sp_assign in (sub or [([], {})]):
                    paths.append((list(prior_false_cons) + sp_cons, sp_assign))
            return paths
        if node.subject == 'TRUE':
            paths = []
            prior_false_sets = []  # list[list[Constraint]]
            for value, seq in node.when_list:
                cond = parse_compound_condition(value, fields)
                if cond and isinstance(cond, CondLeaf) and is_field(cond.field, fields):
                    sub = _cap_paths(enum_paths(seq, fields))
                    for sp_cons, sp_assign in (sub or [([], {})]):
                        constraints = [c for pf in prior_false_sets for c in pf]
                        constraints.append((cond.field, cond.op, cond.value, True))
                        paths.append((constraints + sp_cons, sp_assign))
                    prior_false_sets.append([(cond.field, cond.op, cond.value, False)])
                elif cond:
                    leaves = collect_leaves(cond)
                    if leaves and all(is_field(l.field, fields) for l in leaves):
                        sets = mcdc_sets(cond, fields)
                        if sets:
                            sub = _cap_paths(enum_paths(seq, fields))
                            new_false_sets = []
                            for cs, decision in sets:
                                if decision:
                                    if not prior_false_sets:
                                        for sp_cons, sp_assign in (sub or [([], {})]):
                                            paths.append((list(cs) + sp_cons, sp_assign))
                                    else:
                                        for pf_set in prior_false_sets:
                                            for sp_cons, sp_assign in (sub or [([], {})]):
                                                paths.append((list(pf_set) + list(cs) + sp_cons, sp_assign))
                                else:
                                    new_false_sets.append(cs)
                            if not new_false_sets:
                                prior_false_sets = []
                                break
                            combined = []
                            for pf_set in prior_false_sets:
                                for nf_set in new_false_sets:
                                    combined.append(list(pf_set) + list(nf_set))
                            prior_false_sets = combined
                        else:
                            prior_false_sets = []
                            break
                    else:
                        prior_false_sets = []
                        break
                else:
                    prior_false_sets = []
                    break
            if node.has_other:
                sub = _cap_paths(enum_paths(node.other_seq, fields))
                for sp_cons, sp_assign in (sub or [([], {})]):
                    constraints = [c for pf in prior_false_sets for c in pf]
                    paths.append((constraints + sp_cons, sp_assign))
            return paths
        if not is_field(node.subject, fields):
            return [([], {})]
        paths = []
        for value, seq in node.when_list:
            sub = _cap_paths(enum_paths(seq, fields))
            for sp_cons, sp_assign in (sub or [([], {})]):
                paths.append(([(node.subject, '=', value, True)] + sp_cons, sp_assign))
        if node.has_other:
            case_vals = [v for v, _ in node.when_list]
            sub = _cap_paths(enum_paths(node.other_seq, fields))
            for sp_cons, sp_assign in (sub or [([], {})]):
                paths.append(([(node.subject, 'not_in', case_vals, True)] + sp_cons, sp_assign))
        return paths
    elif isinstance(node, BrSearch):
        return _enum_search_paths(node, fields)
    elif isinstance(node, BrPerform):
        if node.perf_type in ('para', 'thru'):
            if node.body_seq:
                return enum_paths(node.body_seq, fields)
            return [([], {})]
        elif node.perf_type in ('until', 'para_until', 'varying', 'para_varying'):
            # 尝试单条件（现有逻辑）
            parsed = parse_single_condition(node.condition, fields)
            if parsed and is_field(parsed[0], fields):
                field, op, val = parsed
                paths = []
                false_sub = _cap_paths(enum_paths(node.body_seq, fields))
                for sp_cons, sp_assign in (false_sub or [([], {})]):
                    # PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
                    if node.varying_from and node.varying_var:
                        is_fld = any(f['name'] == node.varying_from for f in fields) if fields else False
                        from_asgn = {'type': 'move', 'source_vars': [node.varying_from]} if is_fld else {'type': 'move_literal', 'literal': node.varying_from}
                        from_assign = {node.varying_var: [from_asgn]}
                        merged = {}
                        for d in (from_assign, sp_assign):
                            for k, v in d.items():
                                merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                        sp_assign = merged
                    paths.append(([(field, op, val, False)] + sp_cons, sp_assign))
                paths.append(([(field, op, val, True)], {}))
                return paths
            # 尝试复合条件（AND/OR）
            cond_tree = parse_compound_condition(node.condition, fields)
            if cond_tree:
                leaves = collect_leaves(cond_tree)
                if leaves and all(is_field(l.field, fields) for l in leaves):
                    sets = mcdc_sets(cond_tree, fields)
                    if sets:
                        paths = []
                        false_sub = _cap_paths(enum_paths(node.body_seq, fields))
                        for sp_cons, sp_assign in (false_sub or [([], {})]):
                            # PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
                            if node.varying_from and node.varying_var:
                                is_fld = any(f['name'] == node.varying_from for f in fields) if fields else False
                                from_asgn = {'type': 'move', 'source_vars': [node.varying_from]} if is_fld else {'type': 'move_literal', 'literal': node.varying_from}
                                from_assign = {node.varying_var: [from_asgn]}
                                merged = {}
                                for d in (from_assign, sp_assign):
                                    for k, v in d.items():
                                        merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                                sp_assign = merged
                            for constraints, decision in sets:
                                if not decision:
                                    paths.append((list(constraints) + sp_cons, sp_assign))
                        for constraints, decision in sets:
                            if decision:
                                paths.append((list(constraints), {}))
                        if paths:
                            return paths
        return [([], {})]
    elif isinstance(node, CallNode):
        return [([], {})]
    elif isinstance(node, ExitNode):
        return [([_STOP], {})]
    elif isinstance(node, GoTo):
        paths = enum_paths(node.body_seq, fields)
        return [([_STOP] + c, a) for c, a in paths]
    return [([], {})]
 # ── 值生成 ──
 def seq_numeric(seq_num: int, total_digits: int) -> str:
    val = seq_num % (10 ** total_digits)
    if val == 0:
        val = 10 ** total_digits - 1
    return str(val).zfill(total_digits)
 def seq_alpha(seq_num: int, length: int) -> str:
    letter = chr(65 + (seq_num - 1) % 26)
    return letter * length
 def seq_date(seq_num: int) -> str:
    from datetime import datetime, timedelta
    base = datetime(2000, 1, 1)
    d = base + timedelta(days=seq_num - 1)
    return d.strftime('%Y%m%d')
 def _is_date_field(name: str) -> bool:
    patterns = [r'DATE', r'YYMMDD', r'YYYYMM', r'YEAR', r'MONTH', r'DAY']
    for p in patterns:
        if re.search(p, name.upper()):
            return True
    return False
 _SPECIAL_VALUES = {
    'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
    'SPACE': ' ', 'SPACES': ' ',
    'HIGH-VALUE': '\xff', 'HIGH-VALUES': '\xff',
    'LOW-VALUE': '\x00', 'LOW-VALUES': '\x00',
    'QUOTE': "'", 'QUOTES': "'",
    'ALL': '',
 }
 def _apply_value(field: dict, rec: dict) -> bool:
    """尝试应用 VALUE 子句的初始值。返回 True 表示已处理。"""
    raw = field.get('value')
    if raw is None:
        return False
    val = str(raw).strip("'\"").strip()
    name = field['name']
    pi = field.get('pic_info', {})
    # 处理 COBOL 特殊值
    if val.upper() in _SPECIAL_VALUES:
        val = _SPECIAL_VALUES[val.upper()]
    ftype = pi.get('type', 'unknown')
    if ftype == 'numeric':
        digits = pi.get('digits', 0) + pi.get('decimal', 0)
        if digits:
            rec[name] = val.zfill(digits)
        else:
            rec[name] = val
    else:
        length = pi.get('length', 0) or 1
        rec[name] = val.ljust(length)[:length]
    return True
 def _children_of(group_name: str, fields: list) -> list:
    """返回组项目 group_name 在 fields 中的直属子字段列表（按声明顺序）。
    终止条件：遇到同/更高级别（sibling/组边界）或 77 级（独立字段）。
    """
    result = []
    group_level = None
    found = False
    for f in fields:
        if not found and f['name'] == group_name:
            group_level = f['level']
            found = True
            continue
        if found:
            if f['level'] <= group_level or f['level'] == 77:
                break
            # 88-level 是条件名，不计为子字段
            if f.get('is_88'):
                continue
            result.append(f)
    return result
 def _make_numeric_value(idx: int, record_num: int, total_digits: int) -> str:
    for step in (100, 10, 1):
        val = idx * step + record_num
        if val < 10 ** total_digits:
            return str(val).zfill(total_digits)
    return str(record_num).zfill(total_digits)
 def _make_alpha_value(idx: int, record_num: int, length: int) -> str:
    if length == 1:
        ch = chr(65 + (idx + record_num - 2) % 26)
        return ch
    letter = chr(65 + (idx - 1) % 26)
    return letter + str(record_num).zfill(length - 1)
 def make_base_record(seq_num: int, fields: list) -> dict:
    rec = {}
    redefines_map = {}       # 标量 REDEFINES:  parent_name → [child_names]
    group_redefines = []     # 组 REDEFINES:    [(redef_name, target_name)]
    filler_key_counter = 0
    numeric_idx = 0
    alpha_idx = 0
    record_num = seq_num
    for f in fields:
        name = f['name']
        if f.get('is_88'):
            continue
        if f.get('redefines'):
            parent = f['redefines']
            if f.get('pic'):
                # 标量 REDEFINES（有 PIC，如 WS-AMOUNT-DISP REDEFINES WS-AMOUNT PIC X(9)）
                redefines_map.setdefault(parent, []).append(name)
                continue
            else:
                # 组 REDEFINES（无 PIC，如 CUST-ADDR2 REDEFINES CUST-ADDR）
                group_redefines.append((name, parent))
                # 不 continue — 组本身无 PIC 会在下方"组项目跳过"处理
                # 其子字段作为独立字段正常走循环
        if f.get('is_filler'):
            if name in rec:
                filler_key_counter += 1
                name = f'FILLER_{filler_key_counter + 1}'
            rec[name] = 'x' * (f.get('pic_info', {}).get('length', 0) or 1)
            continue
        # Pass 0: VALUE 子句初始值优先
        if _apply_value(f, rec):
            continue
        # 组项目（无 PIC）跳过
        if not f.get('pic'):
            continue
        pi = f.get('pic_info', {})
        ftype = pi.get('type', 'unknown')
        digits = pi.get('digits', 0)
        decimal = pi.get('decimal', 0)
        length = pi.get('length', 0)
        if ftype == 'numeric':
            if _is_date_field(name):
                rec[name] = seq_date(record_num)
            else:
                numeric_idx += 1
                rec[name] = _make_numeric_value(numeric_idx, record_num, digits + decimal)
        elif ftype in ('alphanumeric', 'alphabetic'):
            alpha_idx += 1
            rec[name] = _make_alpha_value(alpha_idx, record_num, length or 1)
        elif ftype == 'numeric-edited':
            numeric_idx += 1
            raw = _make_numeric_value(numeric_idx, record_num, digits + decimal)
            rec[name] = raw.rjust(length)
        else:
            alpha_idx += 1
            rec[name] = _make_alpha_value(alpha_idx, record_num, 8)
    # Pass 2a: 标量 REDEFINES 复制
    for parent_name, child_names in redefines_map.items():
        if parent_name in rec:
            for child_name in child_names:
                rec[child_name] = rec[parent_name]
    # Pass 2b: 组 REDEFINES 按位置递归复制子字段
    for redef_name, target_name in group_redefines:
        redef_kids = _children_of(redef_name, fields)
        tgt_kids = _children_of(target_name, fields)
        tgt_idx = 0
        for i, rk in enumerate(redef_kids):
            if tgt_idx >= len(tgt_kids):
                break
            if i == len(redef_kids) - 1 and len(redef_kids) < len(tgt_kids):
                # 最后一个 REDEFINES 子字段，且目标更多 → 拼接剩余所有目标值
                parts = [rec.get(tk['name'], '') for tk in tgt_kids[tgt_idx:]]
                rec[rk['name']] = ''.join(parts)
            elif i == len(redef_kids) - 1 and len(redef_kids) > len(tgt_kids):
                # REDEFINES 子字段更多 → 最后一个 REDEFINES 子字段取最后目标值
                rec[rk['name']] = rec.get(tgt_kids[-1]['name'], '')
            else:
                rec[rk['name']] = rec.get(tgt_kids[tgt_idx]['name'], '')
            tgt_idx += 1
    return rec
 # ── 约束应用 ──
 def _check_constraint_satisfied(rec, field_name, operator, value, want_true, fields):
    """检查 field_name 当前值是否满足该约束。满足返回 True。"""
    for f in fields:
        if f['name'] == field_name:
            pi = f.get('pic_info', {})
            ftype = pi.get('type', 'unknown')
            val = rec.get(field_name)
            if val is None:
                return False
            if operator == 'not_in':
                cases = value if isinstance(value, list) else []
                return str(val) not in cases
            if ftype == 'numeric':
                try:
                    num_val = int(float(str(val)))
                    num_target = int(float(str(value)))
                except (ValueError, TypeError):
                    return False
                if operator in ('>=', '>', '<', '<=', '=', '<>'):
                    if operator == '>=':   ok = num_val >= num_target
                    elif operator == '>':  ok = num_val > num_target
                    elif operator == '<':  ok = num_val < num_target
                    elif operator == '<=': ok = num_val <= num_target
                    elif operator == '=':  ok = num_val == num_target
                    elif operator == '<>': ok = num_val != num_target
                    return ok == want_true
                return True
            else:
                s_val = str(val).strip().upper()
                s_target = str(value).strip().upper()
                eq = s_val == s_target
                if operator == '=':
                    return eq == want_true
                elif operator == '<>':
                    return (not eq) == want_true
                return True
    return False
 _ARITH_BOUNDS = {
    'left_big_ops':   {'>', '>=', '<>'},
    'left_small_ops': {'<', '<='},
 }
 def _arith_pic_info(field_name, fields):
    for f in fields:
        if f['name'] == field_name.upper():
            return f.get('pic_info', {})
    return {}
 def _arith_numeric_pick(field_name, want_big, fields):
    """为字段选一个大值或小值，返回字符串。"""
    pi = _arith_pic_info(field_name, fields)
    if pi.get('type') != 'numeric':
        return None
    digits = pi.get('digits', 0)
    decimal = pi.get('decimal', 0)
    total = digits + decimal
    max_val = 10 ** total - 1
    if want_big:
        pick = int(max_val * 0.7)
    else:
        pick = 1
    int_part = str(pick // (10 ** decimal)).zfill(digits)
    dec_part = str(pick % (10 ** decimal)).zfill(decimal)
    if decimal == 0:
        return int_part
    return int_part + dec_part
 def _apply_arith_constraint(rec, field_name, operator, value, want_true, fields):
    """对算术表达式条件进行字段值 steering。
    例如 A + B > C (want_true=True):
      - 左值字段（A, B）设大 → 右值字段（C）设小
    例如 A + B <= C (want_true=True):
      - 左值字段设小 → 右值字段设大
    这是启发式 steering，不是精确求解。
    主要目标是保证分支可达，不保证边界值精确。
    """
    # 1. 提取左值表达式中的所有字段名（大写）
    tokens = re.findall(r'\b[A-Z][A-Z0-9-]*(?:\([^)]*\))?\b', field_name.upper())
    left_fields = [t for t in tokens if any(f['name'] == t for f in fields)]
    # 2. 右值是否也为字段
    right_field = value if any(f['name'] == value for f in fields) else None
    if not left_fields:
        logger.debug(f"算术表达式无法提取字段: {field_name}")
        return
    # 3. 确定方向：want_true 时左值应大还是小
    if operator in _ARITH_BOUNDS['left_big_ops']:
        left_big = want_true
    elif operator in _ARITH_BOUNDS['left_small_ops']:
        left_big = not want_true
    else:
        left_big = want_true
    # 4. 设置左值字段
    for lf in left_fields:
        pick = _arith_numeric_pick(lf, left_big, fields)
        if pick is not None:
            rec[lf] = pick
    # 5. 设置右值字段（如果有）
    if right_field:
        pick = _arith_numeric_pick(right_field, not left_big, fields)
        if pick is not None:
            rec[right_field] = pick
 def apply_constraint(rec, field_name, operator, value, want_true, fields, assignments=None, path_assign=None):
    # 标准化字段名：去除括号内空格（WS-CELL ( 1, 1 ) → WS-CELL(1,1)）
    field_name = re.sub(r'\s*([(),])\s*', r'\1', field_name)
    # 变量下标解析：WS-FIXED-VALUE(WS-IDX) → WS-FIXED-VALUE(1)
    vm = re.match(r'^(\w[\w-]*)\((\w[\w-]*)\)$', field_name)
    if vm:
        base_var, subscript_var = vm.groups()
        if subscript_var in rec:
            try:
                resolved_name = f'{base_var}({int(rec[subscript_var])})'
                if any(f['name'] == resolved_name for f in fields):
                    apply_constraint(rec, resolved_name, operator, value, want_true, fields, assignments, path_assign)
                    return
            except (ValueError, TypeError):
                pass
    # 下标传播：无下标约束 → 应用到所有下标变体
    base = _basename(field_name)
    subscripted = [f for f in fields if f['name'] != base and _basename(f['name']) == base]
    if subscripted and field_name == base:
        for sf in subscripted:
            apply_constraint(rec, sf['name'], operator, value, want_true, fields, assignments, path_assign)
        return
    # REDEFINES 字段的约束重定向到父字段（共享存储）
    for f in fields:
        if f['name'] == field_name:
            if f.get('is_filler'):
                return
            if f.get('redefines'):
                parent_name = f['redefines']
                logger.debug(f"REDEFINES 约束重定向: {field_name} → {parent_name}")
                apply_constraint(rec, parent_name, operator, value, want_true, fields, assignments, path_assign)
                return
            break
    if assignments:
        root_var, chain = trace_to_root(field_name, assignments, fields, path_assign)
        if root_var != field_name:
            new_field_name, new_op, new_val = invert_through_chain(root_var, chain, operator, value)
            if any(f['name'] == new_field_name for f in fields):
                field_name, operator, value = new_field_name, new_op, new_val
    # 如果当前值已满足该约束，跳过覆盖（保持先前约束的一致性）
    if _check_constraint_satisfied(rec, field_name, operator, value, want_true, fields):
        return
    if operator == 'not_in':
        for f in fields:
            if f['name'] == field_name:
                pi = f.get('pic_info', {})
                cases = value if isinstance(value, list) else []
                ftype = pi.get('type', 'unknown')
                if ftype in ('alphanumeric', 'alphabetic'):
                    for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
                        if c not in cases:
                            rec[field_name] = c.ljust(pi.get('length', 1), c)
                            return
                else:
                    for n in range(1, 100):
                        if str(n) not in cases:
                            rec[field_name] = str(n).zfill(pi.get('digits', 0) + pi.get('decimal', 0))
                            return
        return
    # 字段间比较（值侧也是字段名）
    if any(f['name'] == value for f in fields):
        if re.search(r'[+\-*/]', field_name):
            _apply_arith_constraint(rec, field_name, operator, value, want_true, fields)
        else:
            logger.debug(f"字段间比较约束跳过：{field_name} {operator} {value}")
        return
    for f in fields:
        if f['name'] == field_name:
            pi = f.get('pic_info', {})
            val = satisfying_value(pi, operator, value, want_true)
            rec[field_name] = val
            return
 # ── 记录生成入口 ──
 def sync_redefined_fields(rec, fields):
    """赋值/约束后同步 REDEFINES 字段：父字段的值拷贝到所有 REDEFINES 子字段。"""
    redefines_map = {}
    group_redefines = []
    for f in fields:
        if f.get('is_88') or f.get('is_filler'):
            continue
        if f.get('redefines') and f.get('pic'):
            redefines_map.setdefault(f['redefines'], []).append(f['name'])
        elif f.get('redefines') and not f.get('pic'):
            group_redefines.append((f['name'], f['redefines']))
    for parent_name, child_names in redefines_map.items():
        if parent_name in rec:
            for child_name in child_names:
                rec[child_name] = rec[parent_name]
    for redef_name, target_name in group_redefines:
        redef_kids = _children_of(redef_name, fields)
        tgt_kids = _children_of(target_name, fields)
        tgt_idx = 0
        for i, rk in enumerate(redef_kids):
            if tgt_idx >= len(tgt_kids):
                break
            if i == len(redef_kids) - 1 and len(redef_kids) < len(tgt_kids):
                parts = [rec.get(tk['name'], '') for tk in tgt_kids[tgt_idx:]]
                rec[rk['name']] = ''.join(parts)
            elif i == len(redef_kids) - 1 and len(redef_kids) > len(tgt_kids):
                rec[rk['name']] = rec.get(tgt_kids[-1]['name'], '')
            else:
                rec[rk['name']] = rec.get(tgt_kids[tgt_idx]['name'], '')
            tgt_idx += 1
 def apply_occurs_depending(rec, fields):
    """根据 OCCURS DEPENDING ON 变量的当前值，清零超范围的下标字段。"""
    for f in fields:
        dep_var = f.get('occurs_depending')
        if not dep_var:
            continue
        name = f['name']
        m = re.search(r'\((\d+)\)$', name)
        if not m:
            continue
        sub = int(m.group(1))
        max_val = int(rec.get(dep_var, 0))
        if sub <= max_val:
            continue
        pi = f.get('pic_info', {})
        ftype = pi.get('type', 'unknown')
        length = pi.get('length', 0) or 1
        if ftype == 'numeric':
            rec[name] = '0' * (pi.get('digits', 0) + pi.get('decimal', 0))
        elif ftype in ('alphanumeric', 'alphabetic'):
            rec[name] = ' ' * length
        else:
            rec[name] = '0' * length
 def _non_match_for(cond_leaf, fields):
    if not fields or not cond_leaf:
        return None
    base = re.sub(r'\s*\(.*?\)\s*$', '', cond_leaf.field)
    for f in fields:
        if re.sub(r'\s*\(.*?\)\s*$', '', f['name']) == base:
            pic = f.get('pic_info', {})
            if pic.get('type') == 'numeric':
                return '0'
            return ' '
    return None
 def _enum_search_paths(node, fields):
    # 从条件字段名推断 OCCURS 数；如 WS-CODE-VAL(WS-IDX) → 查 WS-CODE-VAL(j) 最大 j
    occurs_count = 1
    if node.when_list and node.cond_trees and node.cond_trees[0]:
        ct = node.cond_trees[0]
        if isinstance(ct, CondLeaf):
            base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
            for f in fields:
                m = re.match(rf'^{re.escape(base)}\((\d+)\)$', f['name'])
                if m:
                    occurs_count = max(occurs_count, int(m.group(1)))
            if occurs_count <= 1:
                # 再查父组名下各字段的后缀
                parent = node.table_name
                for f in fields:
                    m = re.match(rf'^{re.escape(parent)}\((\d+)\)$', f['name'])
                    if m:
                        occurs_count = max(occurs_count, int(m.group(1)))
    paths = []
    for i, (cond_text, body_seq) in enumerate(node.when_list):
        cond_tree = node.cond_trees[i] if i < len(node.cond_trees) else None
        sub = _cap_paths(enum_paths(body_seq, fields))
        if not sub:
            sub = [([], {})]
        extra_assign = {}
        if cond_tree and isinstance(cond_tree, CondLeaf):
            base = re.sub(r'\s*\(.*?\)\s*$', '', cond_tree.field)
            matching_val = cond_tree.value
            elem_key = f'{base}({i + 1})'
            extra_assign[elem_key] = [{'type': 'move_literal', 'literal': matching_val}]
            non_match = _non_match_for(cond_tree, fields) or ' '
            for j in range(i):
                prev_key = f'{base}({j + 1})'
                extra_assign[prev_key] = [{'type': 'move_literal', 'literal': non_match}]
        for sp_cons, sp_assign in (sub or [([], {})]):
            merged_assign = dict(extra_assign)
            for k, v in sp_assign.items():
                merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
            paths.append((sp_cons, merged_assign))
    if node.has_at_end:
        sub = _cap_paths(enum_paths(node.at_end_seq, fields))
        for sp_cons, sp_assign in (sub or [([], {})]):
            extra_assign = {}
            non_match = ' '
            if node.when_list:
                ct = node.cond_trees[0]
                if ct and isinstance(ct, CondLeaf):
                    non_match = _non_match_for(ct, fields) or ' '
                    base = re.sub(r'\s*\(.*?\)\s*$', '', ct.field)
                    for j in range(max(occurs_count, 1)):
                        extra_assign[f'{base}({j + 1})'] = [{'type': 'move_literal', 'literal': non_match}]
            merged_assign = dict(extra_assign)
            for k, v in sp_assign.items():
                merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
            paths.append((sp_cons, merged_assign))
    return paths
 def generate_records(branch_paths_with_assigns, data_fields, base_assignments=None, file_sec=None):
    """生成测试数据记录。
    branch_paths_with_assigns: list of (constraints, path_assignments).
    base_assignments: 全局 assignments dict (用于 trace_to_root).
    返回: (records, kept_path_cons) — kept_path_cons 是与 records 一一对应的约束。
    """
    records = []
    kept_path_cons = []
    if branch_paths_with_assigns:
        for seq, (path_cons, path_assign) in enumerate(branch_paths_with_assigns, start=1):
            path_cons = _filter_stop(path_cons)
            rec = make_base_record(seq, data_fields)
            # Pass A: 先传播赋值（MOVE/COMPUTE/READ INTO 等），模拟到决策点前的程序状态
            if isinstance(path_assign, dict):
                propagate_assignments(rec, path_assign, data_fields, file_sec=file_sec)
            # Pass A.5: 检查约束是否经过链追溯到字面量截断（不可能路径）
            skip_impossible = False
            if base_assignments and isinstance(path_assign, dict):
                for c in path_cons:
                    if len(c) == 4 and not skip_impossible:
                        field, op, val, want = c
                        root_var, chain = trace_to_root(field, base_assignments, data_fields, path_assign)
                        if root_var != field:
                            new_fn, new_op, new_val = invert_through_chain(root_var, chain, op, val)
                            if any(f['name'] == new_fn for f in data_fields):
                                asgn_val = path_assign.get(root_var)
                                if asgn_val is not None:
                                    asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
                                    if asgn_list and asgn_list[-1]['type'] == 'move_literal' and root_var in rec:
                                        if not _check_constraint_satisfied(rec, root_var, new_op, new_val, want, data_fields):
                                            skip_impossible = True
                                            break
            if skip_impossible:
                continue
            # Pass B: 约束覆盖（确保决策条件满足，覆盖 MOVE 带来的值）
            for c in path_cons:
                if len(c) == 4:
                    field, op, val, want = c
                    apply_constraint(rec, field, op, val, want, data_fields, base_assignments, path_assign)
            # Pass B.5: 前向再传播变量间MOVE，保持约束修改后的链一致性
            if isinstance(path_assign, dict):
                forward = {}
                for tgt, asgn_val in path_assign.items():
                    asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
                    filtered = [a for a in asgn_list if a['type'] == 'move' and a.get('source_vars')]
                    if filtered:
                        forward[tgt] = filtered
                if forward:
                    propagate_assignments(rec, forward, data_fields, file_sec=file_sec)
            # Pass C: 同步 REDEFINES（确保共享存储一致）
            sync_redefined_fields(rec, data_fields)
            # Pass D: OCCURS DEPENDING ON — 清零超范围的下标字段
            apply_occurs_depending(rec, data_fields)
            records.append(rec)
            kept_path_cons.append(path_cons)
    if not records:
        rec = make_base_record(1, data_fields)
        if base_assignments:
            propagate_assignments(rec, base_assignments, data_fields, file_sec=file_sec)
        records.append(rec)
        kept_path_cons.append([])
    return records, kept_path_cons
@@ -0,0 +1,35 @@
 start: data_div_content
 data_div_content: (file_section | working_storage | linkage)*
 file_section: "FILE" "SECTION" DOT fd+
 fd: "FD" NAME FD_SUFFIX data_item+
 FD_SUFFIX: /(?:"[^"]*"|'[^']*'|[^.])*\./
 working_storage: "WORKING-STORAGE" "SECTION" DOT data_item*
 linkage: "LINKAGE" "SECTION" DOT data_item*
 data_item: level_num (NAME | "FILLER") clause* DOT
 level_num: LEVEL
 clause: pic_clause | value_clause | occurs_clause | redefines_clause | usage_clause
      | "SYNC" | "SYNCHRONIZED"
      | "JUSTIFIED" "RIGHT"?
      | "BLANK" "WHEN" "ZERO"
      | "GLOBAL" | "EXTERNAL"
 pic_clause: "PIC" "IS"? PICTURE_STRING
 value_clause: "VALUE" "IS"? value_literal+
 value_literal: INT | SIGNED_NUMBER | STRING | SQSTRING
             | "ZERO" | "ZEROS" | "ZEROES"
             | "SPACE" | "SPACES"
             | "HIGH-VALUE" | "HIGH-VALUES"
             | "LOW-VALUE" | "LOW-VALUES"
 SQSTRING: /'[^']*'/
 redefines_clause: "REDEFINES" NAME
 occurs_clause: "OCCURS" INT "TIMES"? ("DEPENDING" "ON" NAME)?
 usage_clause: USAGE_VAL
 USAGE_VAL: "COMP" | "COMP-3" | "COMP-5" | "BINARY" | "PACKED-DECIMAL" | "DISPLAY"
 LEVEL: /0[1-9]|[1-4][0-9]|49|77|88/
 NAME: /[A-Z][A-Z0-9-]*/
 PICTURE_STRING: /[0-9A-Z()+,\-*\/V]+/i
 INT: /[0-9]+/
 DOT: /\./
 %import common.SIGNED_NUMBER
 %import common.ESCAPED_STRING -> STRING
 %import common.WS
 %ignore WS
@@ -0,0 +1,163 @@
 """COBOL数据模型 — 所有层共享，无外部依赖"""
 from dataclasses import dataclass, field
 # ── 字段定义 ──
@dataclass
 class PicInfo:
    type: str = 'unknown'               # "numeric" | "alphanumeric" | "alphabetic"
    digits: int = 0
    decimal: int = 0
    length: int = 0
    signed: bool = False
@dataclass
 class FieldDef:
    name: str
    level: int
    pic: str | None = None
    pic_info: PicInfo | None = None
    is_filler: bool = False
    occurs_count: int = 0
    occurs_depending: str | None = None
    redefines: str | None = None
    usage: str | None = None           # "COMP" | "COMP-3" | "BINARY" | "PACKED-DECIMAL" | ...
    value: str | None = None
    values: list[str] | None = None
    is_88: bool = False
    parent: str | None = None
    section: str | None = None
 # ── 分支树 ──
 class BrSeq:
    def __init__(self):
        self.children = []
    def add(self, child):
        self.children.append(child)
 class BrIf:
    def __init__(self, condition):
        self.condition = condition
        self.cond_tree = None           # 由 core.py 在解析时赋值
        self.true_seq = BrSeq()
        self.false_seq = BrSeq()
 class BrEval:
    def __init__(self, subject):
        self.subject = subject
        self.subjects = []        # ALSO 多主体: ['WS-A', 'WS-B']，空=普通模式
        self.when_list = []
        self.other_seq = BrSeq()
        self.has_other = False
 class BrPerform:
    def __init__(self, perf_type, condition=None, target=None, thru=None, times=None,
                 varying_var=None, varying_from=None, varying_by=None):
        self.perf_type = perf_type
        self.condition = condition
        self.target = target
        self.thru = thru
        self.times = times
        self.varying_var = varying_var
        self.varying_from = varying_from
        self.varying_by = varying_by
        self.body_seq = BrSeq()
 class Assign:
    """赋值节点：MOVE/COMPUTE/ADD/SUBTRACT/MULTIPLY/DIVIDE"""
    def __init__(self, target: str, source_info: dict):
        self.target = target
        self.source_info = source_info
 class CallNode:
    """CALL 子程序调用节点（黑盒模式）"""
    def __init__(self, program_name: str, using_params: list = None):
        self.program_name = program_name
        self.using_params = using_params or []
        # using_params: [{"name": "WS-A", "mechanism": "reference"}, ...]
        # mechanism: "reference" | "content" | "value"
 # ── 条件树 ──
 class CondLeaf:
    def __init__(self, field, op, value):
        self.field = field
        self.op = op
        self.value = value
 class CondNot:
    def __init__(self, child):
        self.child = child
 class CondAnd:
    def __init__(self, left, right):
        self.left = left
        self.right = right
 class CondOr:
    def __init__(self, left, right):
        self.left = left
        self.right = right
 class BrSearch:
    """SEARCH / SEARCH ALL 表查找"""
    def __init__(self, table_name, is_all=False, varying=None):
        self.table_name = table_name
        self.is_all = is_all
        self.varying = varying.upper() if varying else None
        self.at_end_seq = BrSeq()
        self.when_list = []       # [(condition_text, BrSeq)]
        self.cond_trees = []      # [cond_tree, ...]
        self.has_at_end = False
 class GoTo:
    """GO TO 节点：无条件跳转到指定段落"""
    def __init__(self, target: str, body_seq: 'BrSeq' = None):
        self.target = target
        self.body_seq = body_seq or BrSeq()
 class ExitNode:
    """控制流退出节点：EXIT PARAGRAPH / EXIT PERFORM / EXIT SECTION / EXIT PROGRAM"""
    def __init__(self, exit_type: str):
        self.exit_type = exit_type
 # ── 约束路径 ──
 Constraint = tuple   # (field, op, value, want_true)
 Path = list[Constraint]
 # ── 解析错误 ──
@dataclass
 class ParseError:
    line: int
    message: str
    severity: str = 'warning'
@dataclass
 class ProcParseResult:
    tree: BrSeq | None = None
    assignments: dict = field(default_factory=dict)
    errors: list[ParseError] = field(default_factory=list)
    fallback_to_ai: bool = False
@@ -0,0 +1,118 @@
 """输出层：JSON输出（按文件分组入出力 + 工作存储区分）"""
 import json
 from pathlib import Path
 _INVERSE_OP = {'>': '<=', '<': '>=', '=': '<>', '>=': '<', '<=': '>'}
 def _scenario_text(path_cons):
    parts = []
    for c in path_cons:
        if len(c) != 4:
            continue
        field, op, val, want = c
        if op == 'not_in':
            desc = f"{field} not in {val}" if want else f"{field} in {val}"
        elif not want:
            desc = f"{field} {_INVERSE_OP.get(op, '?' + op)} {val}"
        else:
            desc = f"{field} {op} {val}"
        parts.append(desc)
    return ', '.join(parts)
 def output_json(records, outpath, roles=None, fd_fields=None, field_to_fd=None,
                open_dir=None, path_cons_list=None):
    outpath.parent.mkdir(parents=True, exist_ok=True)
    if not roles:
        with open(outpath, 'w', encoding='utf-8') as f:
            json.dump(records, f, ensure_ascii=False, indent=2)
        return
    # FD direction lookup
    out = []
    for i, rec in enumerate(records):
        inp = {}
        out_exp = {}
        ws = {}
        # Group by FD
        if fd_fields and field_to_fd:
            for fd_name, fds_set in fd_fields.items():
                direction = (open_dir or {}).get(fd_name, '')
                inp_block = {}
                out_block = {}
                for fname in fds_set:
                    if fname not in rec:
                        continue
                    r = roles.get(fname, 'unused')
                    val = rec[fname]
                    if direction in ('INPUT', 'I-O') and r in ('input', 'inout'):
                        inp_block[fname] = val
                    if direction in ('OUTPUT', 'I-O') and r in ('output', 'inout'):
                        out_block[fname] = val
                if inp_block:
                    inp[fd_name] = inp_block
                if out_block:
                    out_exp[fd_name] = out_block
        # Working-storage: not belonging to any FD
        for name, val in rec.items():
            if not field_to_fd or name not in field_to_fd:
                ws[name] = val
        entry = {
            'input': inp,
            'expected_output': out_exp,
            'working_storage': ws,
        }
        if path_cons_list and i < len(path_cons_list):
            text = _scenario_text(path_cons_list[i])
            if text:
                entry['scenario'] = text
        out.append(entry)
    with open(outpath, 'w', encoding='utf-8') as f:
        json.dump(out, f, ensure_ascii=False, indent=2)
 def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, open_dir):
    """按 FD 名拆分出力入力 JSON 文件。
    每个 INPUT / I-O 方向 FD 生成一个文件：{stem}_{fd_name}.json
    内容为路径数 × 记录，每条只含该 FD 的入力字段值。
    """
    input_fds = {}
    for fd_name, fds_set in fd_fields.items():
        direction = (open_dir or {}).get(fd_name, '')
        if direction not in ('INPUT', 'I-O'):
            continue
        has_input = any(roles.get(fname, 'unused') in ('input', 'inout') for fname in fds_set)
        if not has_input:
            continue
        input_fds[fd_name] = fds_set
    if not input_fds:
        return
    outdir.mkdir(parents=True, exist_ok=True)
    for fd_name, fds_set in input_fds.items():
        fd_records = []
        direction = (open_dir or {}).get(fd_name, '')
        for rec in records:
            fd_rec = {}
            for fname in fds_set:
                r = roles.get(fname, 'unused')
                if direction in ('INPUT', 'I-O') and r in ('input', 'inout'):
                    if fname in rec:
                        fd_rec[fname] = rec[fname]
            if fd_rec:
                fd_records.append(fd_rec)
        outpath = outdir / f'{stem}_{fd_name}.json'
        with open(outpath, 'w', encoding='utf-8') as f:
            json.dump(fd_records, f, ensure_ascii=False, indent=2)
@@ -0,0 +1,439 @@
 """??????? + COPYBOOK + DATA DIVISION?? + PIC"""
 import re
 from pathlib import Path
 from lark import Lark, Transformer, v_args
 from .models import FieldDef, PicInfo
 # 鈹€鈹€ Preprocessor 鈹€鈹€
 def _is_fixed_format(source: str) -> bool:
    if re.search(r'>>SOURCE\s+FORMAT\s+IS\s+FREE', source, re.IGNORECASE):
        return False
    if re.search(r'>>SOURCE\s+FORMAT\s+IS\s+FIXED', source, re.IGNORECASE):
        return True
    lines = [l for l in source.splitlines() if l.strip()]
    fixed_hits = 0
    free_hits = 0
    for line in lines[:10]:
        if len(line) >= 72:
            free_hits += 1
        elif len(line) >= 7 and line[6] in ('*', '/', '-', 'D'):
            fixed_hits += 1
    return fixed_hits >= free_hits if (fixed_hits + free_hits) > 0 else True
 def preprocess(source: str) -> str:
    fixed = _is_fixed_format(source)
    lines = []
    for raw_line in source.splitlines():
        line = raw_line.rstrip()
        if not line:
            lines.append('')
            continue
        if fixed:
            if len(line) >= 7 and line[6] in ('*', '/'):
                continue
            if len(line) >= 7 and line[6] == '-':
                if lines:
                    lines[-1] = lines[-1] + ' ' + line[7:].lstrip()
                continue
            if len(line) >= 7 and line[6].upper() == 'D':
                continue
            content = line[6:] if len(line) >= 7 else line
        else:
            comment_pos = line.find('*>')
            if comment_pos >= 0:
                line = line[:comment_pos]
            line = line.strip()
            if not line:
                continue
            content = line
        lines.append(re.sub(r'\s+FALSE\s+[^\s.]+', '', content.upper()))
    return '\n'.join(lines)
 def extract_data_division(source: str) -> str:
    m = re.search(r'DATA\s+DIVISION\s*\.', source)
    if not m:
        return ''
    start = m.end()
    end_m = re.search(r'PROCEDURE\s+DIVISION', source[start:])
    if end_m:
        end = start + end_m.start()
    else:
        end = len(source)
    return source[start:end].strip()
 def extract_procedure_division(source: str) -> str:
    m = re.search(r'PROCEDURE\s+DIVISION', source)
    if not m:
        return ''
    return source[m.start():].strip()
 # 鈹€鈹€ COPYBOOK Resolution 鈹€鈹€
 _COPYBOOK_EXTENSIONS = ['.cpy', '.cbl', '.cpb', '']
 def resolve_copybooks(source: str, source_dir: str) -> str:
    """Find COPY statements and replace with copybook content."""
    _RE_COPY = re.compile(
        r"^\s*COPY\s+(\w[\w-]*)(?:\s+REPLACING\s+(.+?))?\s*\.?\s*$",
        re.IGNORECASE
    )
    _RE_PAIR = re.compile(r"==(.+?)==\s+BY\s+==(.+?)==", re.IGNORECASE)
    lines = source.split('\n')
    result = []
    for line in lines:
        m = _RE_COPY.match(line)
        if m:
            name = m.group(1).upper()
            found = None
            for ext in _COPYBOOK_EXTENSIONS:
                p = Path(source_dir, name + ext)
                if p.exists():
                    found = p
                    break
            if found:
                cb = found.read_text(encoding='utf-8')
                if m.group(2):
                    pairs = _RE_PAIR.findall(m.group(2))
                    for old, new in pairs:
                        cb = re.sub(
                            re.escape(old.strip()), new.strip(),
                            cb, flags=re.IGNORECASE
                        )
                result.append(f'      * COPY {name}')
                result.append(cb)
            else:
                result.append(line)
        else:
            result.append(line)
    return '\n'.join(result)
 # 鈹€鈹€ Lark Grammar 鈹€鈹€
 _GRAMMAR_CACHE = None
 def _get_grammar() -> str:
    global _GRAMMAR_CACHE
    if _GRAMMAR_CACHE is None:
        lark_path = Path(__file__).parent / 'grammar.lark'
        _GRAMMAR_CACHE = lark_path.read_text(encoding='utf-8')
    return _GRAMMAR_CACHE
 # 鈹€鈹€ Data Transformer 鈹€鈹€
@v_args(inline=True)
 class DataTransformer(Transformer):
    def __init__(self):
        super().__init__()
        self.fields = []
        self._last_parent = None
        self._pending = []
    def start(self, *items):
        for f in self._pending:
            f['section'] = f.get('section', 'WORKING-STORAGE')
            self.fields.append(f)
        self._pending = []
        return self.fields
    def file_section(self, *args):
        for f in self._pending:
            f['section'] = 'FILE'
            self.fields.append(f)
        self._pending = []
        return None
    def working_storage(self, *args):
        for f in self._pending:
            f['section'] = 'WORKING-STORAGE'
            self.fields.append(f)
        self._pending = []
        return None
    def linkage(self, *args):
        for f in self._pending:
            f['section'] = 'LINKAGE'
            self.fields.append(f)
        self._pending = []
        return None
    def data_item(self, level_num, name, *clauses):
        level = int(str(level_num))
        name = str(name)
        is_filler = (name.upper() == 'FILLER')
        pic = None
        value = None
        values = None
        redefines = None
        usage = None
        occurs_count = 0
        occurs_depending = None
        for c in clauses:
            if isinstance(c, dict):
                if 'pic' in c:
                    pic = c['pic']
                if 'value' in c:
                    value = c['value']
                if 'values' in c:
                    values = c['values']
                if 'redefines' in c:
                    redefines = c['redefines']
                if 'usage' in c:
                    usage = c['usage']
                if 'occurs' in c:
                    occurs_count = c['occurs']
                    if 'depends' in c:
                        occurs_depending = c['depends']
        base = {
            'level': level,
            'name': name,
            'pic': pic if pic else None,
            'value': value,
            'values': values,
            'is_filler': is_filler,
            'redefines': redefines,
            'usage': usage,
            'occurs': occurs_count,
            'occurs_depending': occurs_depending,
        }
        if pic is not None:
            self._pending.append(base)
            self._last_parent = name
        elif level == 88 and value is not None:
            base.update({
                'pic': None,
                'value': value.strip("'").strip('"'),
                'values': [v.strip("'").strip('"') for v in values] if values else None,
                'is_88': True,
                'parent': self._last_parent or '',
            })
            self._pending.append(base)
        else:
            # 组项目（无 PIC，有下级字段）
            self._pending.append(base)
            self._last_parent = name
        return None
    def clause(self, *args):
        # ?????????? dict??????? token
        result = {}
        for a in args:
            if isinstance(a, dict):
                result.update(a)
            elif isinstance(a, str) and a.upper() in (
                'COMP', 'COMP-3', 'COMP-5', 'BINARY', 'PACKED-DECIMAL', 'DISPLAY',
            ):
                result['usage'] = a.upper()
        return result if result else None
    def pic_clause(self, *args):
        return {'pic': str(args[-1])}
    def usage_clause(self, token):
        return {'usage': str(token)}
    def value_clause(self, *args):
        values = []
        for a in args:
            if isinstance(a, str) and a.upper() in ('VALUE', 'IS'):
                continue
            val = str(a).strip("'").strip('"')
            values.append(val)
        return {'value': values[0], 'values': values} if values else {'value': None}
    def value_literal(self, token):
        return str(token)
    def occurs_clause(self, *args):
        result = {'occurs': int(args[0])}
        if len(args) >= 2:
            result['depends'] = str(args[1])
        return result
    def redefines_clause(self, *args):
        return {'redefines': str(args[-1])}
    def level_num(self, token):
        return token
    def NAME(self, token):
        return str(token)
    def PICTURE_STRING(self, token):
        return str(token)
    def INT(self, token):
        return int(token)
 # 鈹€鈹€ PIC Parser 鈹€鈹€
 def _expand_pic(s: str) -> str:
    result = ''
    i = 0
    while i < len(s):
        if s[i] == '(':
            j = s.find(')', i)
            if j > i + 1:
                count = int(s[i + 1:j])
                if result:
                    result += result[-1] * (count - 1)
                i = j + 1
                continue
        result += s[i]
        i += 1
    return result
 def parse_pic(pic_str: str) -> PicInfo:
    info = PicInfo()
    s = pic_str.upper().strip()
    if not s:
        return info
    if s.startswith('S'):
        info.signed = True
        s = s[1:]
    expanded = _expand_pic(s)
    if expanded[0] == '9':
        info.type = 'numeric'
        if 'V' in expanded:
            parts = expanded.split('V')
            info.digits = parts[0].count('9')
            info.decimal = parts[1].count('9')
        else:
            info.digits = expanded.count('9')
            info.decimal = 0
    elif expanded[0] == 'X':
        info.type = 'alphanumeric'
        info.length = len(expanded)
    elif expanded[0] == 'A':
        info.type = 'alphabetic'
        info.length = len(expanded)
    elif expanded[0] in ('Z', '*', '$', '+', '-'):
        info.type = 'numeric-edited'
        info.digits = expanded.count('9')
        if 'V' in expanded:
            info.decimal = expanded.split('V')[1].count('9')
        elif '.' in expanded:
            info.decimal = expanded.split('.')[1].count('9')
        info.length = len(expanded)
    elif expanded.endswith('CR') or expanded.endswith('DB'):
        info.type = 'numeric-edited'
        stripped = expanded[:-2]
        info.digits = stripped.count('9')
        if 'V' in stripped:
            info.decimal = stripped.split('V')[1].count('9')
        elif '.' in stripped:
            info.decimal = stripped.split('.')[1].count('9')
        info.length = len(expanded)
    else:
        info.type = 'alphanumeric'
        info.length = len(expanded)
    return info
 # 鈹€鈹€ DATA DIVISION 鍏ュ彛 鈹€鈹€
 def parse_data_division(data_div_text: str) -> list[FieldDef]:
    """??DATA DIVISION???FieldDef????PIC???"""
    grammar = _get_grammar()
    parser = Lark(grammar, parser='earley', lexer='dynamic')
    tree = parser.parse(data_div_text)
    transformer = DataTransformer()
    raw = transformer.transform(tree)
    result = []
    for r in raw:
        pic = r.get('pic', '')
        info = parse_pic(pic) if pic else None
        f = FieldDef(
            name=r['name'],
            level=r['level'],
            pic=pic,
            pic_info=info,
            is_filler=r.get('is_filler', False),
            occurs_count=r.get('occurs', 0),
            occurs_depending=r.get('occurs_depending'),
            redefines=r.get('redefines'),
            usage=r.get('usage'),
            value=r.get('value'),
            values=r.get('values'),
            is_88=r.get('is_88', False),
            parent=r.get('parent'),
            section=r.get('section'),
        )
        result.append(f)
    return result
 # 鈹€鈹€ FILE-CONTROL / FILE SECTION / OPEN 瑙ｆ瀽 鈹€鈹€
 def parse_file_control(source: str) -> dict:
    """?? FILE-CONTROL??? {?????: ?????}"""
    m = re.search(r'FILE-CONTROL\.(.*?)(?=DATA\s+DIVISION|\Z)', source, re.DOTALL | re.IGNORECASE)
    if not m:
        return {}
    fc = m.group(1)
    result = {}
    for m in re.finditer(
        r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+(["\'])(.*?)\2',
        fc, re.IGNORECASE
    ):
        result[m.group(1).upper()] = m.group(3).upper()
    return result
 def parse_file_section(source: str) -> dict:
    """?? FILE SECTION??? {?????: [01?????...]}"""
    m = re.search(r'FILE\s+SECTION\.(.*?)(?=WORKING-STORAGE\s+SECTION|LINKAGE\s+SECTION|\Z)',
                  source, re.DOTALL | re.IGNORECASE)
    if not m:
        return {}
    fs = m.group(1)
    result = {}
    # ? FD ?????? FD ?
    fd_blocks = re.split(r'\n\s*(?=FD\s+)', fs.strip())
    for block in fd_blocks:
        m = re.match(r'FD\s+(\w[\w-]*)', block, re.IGNORECASE)
        if not m:
            continue
        name = m.group(1).upper()
        # ???????? 01 ????
        recs = re.findall(r'^\s*0{0,1}1\s+(\w[\w-]*)', block, re.MULTILINE)
        result[name] = [r.upper() for r in recs]
    return result
 def scan_open_statements(source: str) -> dict:
    """?? OPEN ????? {?????: 'INPUT'|'OUTPUT'|'I-O'}"""
    dirs = {}
    for m in re.finditer(
        r'OPEN\s+((?:INPUT|OUTPUT|I-O)\s+[\w\s-]+'
        r'(?:\s+(?:INPUT|OUTPUT|I-O)\s+[\w\s-]+)*)',
        source, re.IGNORECASE
    ):
        full = m.group(1)
        for seg_m in re.finditer(
            r'(INPUT|OUTPUT|I-O)\s+([\w\s-]+)', full, re.IGNORECASE
        ):
            direction = seg_m.group(1).upper()
            for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
                dirs[fname.upper()] = direction
    return dirs
@@ -20,11 +20,6 @@ class Config:
    num_records: int = 1000
    branch_pass: float = 0.80
    max_llm_cost: float = 0.50
    quality_gate_mode: str = "warn"
    quality_gate_decision_threshold: float = 0.90
    quality_gate_paragraph_threshold: float = 1.0
    gcov_enabled: bool = False
    max_quality_retries: int = 4
    @classmethod
    def from_toml(cls, path="aurak.toml"):
@@ -28,15 +28,6 @@ class VerificationRun:
    field_results: list[FieldResult] = field(default_factory=list)
    runner: str = "native"
    branch_rate: float = 0.0
    paragraph_rate: float = 0.0          # 段落覆盖率
    decision_rate: float = 0.0            # 决策点覆盖率
    hina_type: str = ""                   # HINA 类型
    hina_confidence: float = 0.0          # HINA 确信度
    quality_score: float = 0.0            # 质量评分
    quality_warn: str = ""                # 质量警告信息
    heal_retry: int = 0                   # 自愈重试次数
    simple_retry: int = 0                 # 朴素重试次数
    total_retry: int = 0                  # 总重试次数
    llm_cost: float = 0.0
    report_path: str = ""
    debug: dict = field(default_factory=dict)
@@ -1 +0,0 @@
 # HINA 程序分类与质量门禁包
@@ -1,120 +0,0 @@
 """
 HINA 程序分类器 — L1 关键字规则 + 确信度计算。
 通过 COBOL 源码中的关键字匹配进行程序分类，支持多级确信度判定。
 """
 from __future__ import annotations
 from typing import Any
 # ── L1 规则 ──────────────────────────────────────────────────────────────
 # 格式: (分类名称, [关键字列表], 置信度阈值)
 L1_RULES: list[tuple[str, list[str], float]] = [
    ("DB操作", ["EXEC SQL"], 0.95),
    ("子程序调用", ["CALL", "LINKAGE SECTION"], 0.90),
    ("IS INITIAL", ["IS INITIAL"], 0.99),
    ("SYSIN", ["SYSIN"], 0.90),
    ("编码转换", ["ALPHABETIC", "ASCII", "EBCDIC"], 0.85),
    ("online", ["DFHCOMMAREA", "MAP"], 0.95),
    ("SORT", ["SORT ON KEY"], 0.95),
    ("MERGE", ["MERGE ON KEY"], 0.95),
    ("编辑输出", ["WRITE AFTER", "WRITE BEFORE"], 0.80),
    ("文件编成", ["ORGANIZATION IS"], 0.99),
    ("替代索引", ["ALTERNATE RECORD KEY"], 0.99),
 ]
 # ── 冲突解决规则 ─────────────────────────────────────────────────────────
 # 当 L1 匹配到多个分类时的消歧策略:
 #   value = "file_count"         → 取测试数更多的分类
 #   value = "has_accumulator"    → 取包含累加器的分类
 CONFLICT_RULES: dict[tuple[str, str], str] = {
    ("マッチング", "キーブレイク"): "file_count",
    ("編集処理", "項目チェック"): "file_count",
    ("キーブレイク", "項目チェック(重複)"): "has_accumulator",
 }
 # ── 关键字检测 ───────────────────────────────────────────────────────────
 def detect_keyword(source: str) -> list[tuple[str, float, str]]:
    """在 COBOL 源码中搜索 L1_RULES 定义的关键字，返回匹配结果。
    Args:
        source: COBOL 程序源码文本。
    Returns:
        list[tuple[str, float, str]]:
            每个元素为 (分类名称, 置信度, 匹配到的关键字原文)。
    """
    results: list[tuple[str, float, str]] = []
    source_upper = source.upper()
    for category, keywords, confidence in L1_RULES:
        for kw in keywords:
            if kw in source_upper:
                results.append((category, confidence, kw))
                break  # 同一分类只记录一次
    return results
 # ── 确信度计算 ───────────────────────────────────────────────────────────
 def compute_confidence(
    source: str,
    structure: dict[str, Any] | None = None,
    llm_result: dict[str, Any] | None = None,
 ) -> dict[str, Any]:
    """计算程序分类的确信度。
    优先级:
      1. L1 关键字命中，且最高置信度 >= 0.90 → 直接返回 L1 结果。
      2. LLM 结果存在 → 使用 LLM 的分类结果。
      3. 否则 → 返回 unknown。
    Args:
        source: COBOL 程序源码文本。
        structure: 可选的程序结构信息（暂未使用，保留扩展）。
        llm_result: 可选的 LLM 分类结果。
                    预期格式: {"category": str, "confidence": float, ...}
    Returns:
        dict:
            - "category": str  — 分类名称或 "unknown"
            - "confidence": float — 确信度 (0.0 ~ 1.0)
            - "source": str    — 结果来源 ("l1" / "llm" / "unknown")
            - "matches": list  — 匹配到的关键字详情
    """
    # ── 1. L1 关键字检测 ──
    matches = detect_keyword(source)
    # 找出最高置信度的 L1 匹配
    if matches:
        best = max(matches, key=lambda m: m[1])  # (category, confidence, keyword)
        category, confidence, _ = best
        if confidence >= 0.90:
            return {
                "category": category,
                "confidence": confidence,
                "source": "l1",
                "matches": matches,
            }
    # ── 2. LLM 结果 ──
    if llm_result is not None:
        llm_category = llm_result.get("category", "unknown")
        llm_confidence = llm_result.get("confidence", 0.0)
        return {
            "category": llm_category,
            "confidence": llm_confidence,
            "source": "llm",
            "matches": matches,
        }
    # ── 3. 未知 ──
    return {
        "category": "unknown",
        "confidence": 0.0,
        "source": "unknown",
        "matches": [],
    }
@@ -1,62 +0,0 @@
 """
 质量门禁 — 执行前检查测试数据是否满足覆盖率和边界要求。
 Phase 1 可用: 决策点覆盖、段落覆盖
 Phase 2 启用: HINA 必须项、字段覆盖
 """
 def check(
    complete_tests: list,
    hina_result: dict,
    coverage: dict,
    decision_threshold: float = 0.90,
    paragraph_threshold: float = 1.0,
 ) -> dict:
    """质量门禁检查。
    Args:
        complete_tests: 完整的测试数据集
        hina_result: HINA 分类结果
        coverage: check_coverage() 输出的覆盖率数据
        decision_threshold: 决策点覆盖率阈值
        paragraph_threshold: 段落覆盖率阈值
    Returns:
        dict with: passed, score, issues
    """
    issues = {}
    branch_rate = coverage.get("branch_rate", 0.0)
    if branch_rate < decision_threshold:
        issues["decision_gaps"] = coverage.get("uncovered_decision_ids", [])
    paragraph_rate = coverage.get("paragraph_rate", 0.0)
    if paragraph_rate < paragraph_threshold:
        issues.setdefault("paragraph_gaps", []).append(
            f"段落覆盖率不足: {paragraph_rate:.0%}"
        )
    if not complete_tests:
        issues["no_data"] = True
    passed = len(issues) == 0
    score = _compute_score(coverage, hina_result)
    return {"passed": passed, "score": score, "issues": issues}
 def _compute_score(coverage: dict, hina_result: dict) -> float:
    """质量评分公式（COBOL 版）。
    评分 = 覆盖质量 × 0.6 + 边界质量 × 0.4
    覆盖质量 = 段落覆盖率 × 0.5 + 分支覆盖率 × 0.5
    边界质量 = HINA 必须项覆盖率（Phase 2 后启用，默认 1.0）
    """
    paragraph_rate = coverage.get("paragraph_rate", 0.0)
    branch_rate = coverage.get("branch_rate", 0.0)
    coverage_quality = paragraph_rate * 0.5 + branch_rate * 0.5
    boundary_quality = 1.0
    return round(coverage_quality * 0.6 + boundary_quality * 0.4, 2)
@@ -1,280 +0,0 @@
 """
 HINA 混淆组判定 — 基于 LLM 的 COBOL 程序结构分类。
 根据 extract_structure() 输出的结构特征，调用 LLM 将程序归类到
 混淆组（confusion group），并返回分类结果和策略参数。
 """
 import json
 import logging
 logger = logging.getLogger(__name__)
 CONFUSION_PROMPT = """你是一个 COBOL 程序混淆组分类专家。请根据以下程序结构特征，将其归类到合适的混淆组中。
 程序结构特征：
 - 段落数: {paragraph_count}
 - 决策点总数: {decision_count}
 - IF 语句数: {if_count}
 - EVALUATE 语句数: {evaluate_count}
 - 关联文件数: {file_count}
 - OPEN 方向: {open_directions}
 - SEARCH ALL: {has_search_all}
 - CALL 语句: {has_call}
 - KEY BREAK 关键词: {has_break}
 - 总分支数: {total_branches}
 混淆组定义：
 1. simple_sequential — 极少决策点（<=2），无 EVALUATE/SEARCH ALL/CALL，直接顺序执行
 2. condition_heavy — IF 语句占比高（>60% 的决策点），嵌套深，逻辑复杂
 3. evaluate_driven — EVALUATE 主导，多分支选择结构
 4. data_file_centric — 文件操作密集（>=2 文件），OPEN 方向多样（I-O/OUTPUT/INPUT）
 5. search_intensive — 包含 SEARCH ALL，表/数组查找为主
 6. call_based — 包含 CALL 语句，模块间调用为主
 7. mixed_complex — 同时具备多种复杂特征（决策点多且文件多且含 CALL/SEARCH 等）
 请按 JSON 格式输出分类结果，不要包含其他文字：
 ```json
 {{
  "category": "<混淆组类别>",
  "subtype": "<子类别，如 nested_if / flat_evaluate / multi_file 等>",
  "confidence": <0~1 置信度>,
  "features": {{
    "paragraph_count": {paragraph_count},
    "decision_count": {decision_count},
    "if_count": {if_count},
    "evaluate_count": {evaluate_count},
    "file_count": {file_count},
    "has_search_all": {has_search_all},
    "has_call": {has_call},
    "has_break": {has_break},
    "total_branches": {total_branches}
  }},
  "required_tests": <建议测试用例数，整数>,
  "strategy_params": {{
    "max_nesting_depth": <最大嵌套深度建议>,
    "coverage_target": "branch" 或 "path",
    "file_isolation": true 或 false,
    "supplement_strategy": "incremental" 或 "full" 或 "skip"
  }}
 }}
 ```"""
 def classify_with_llm(structure: dict, llm) -> dict:
    """调用 LLM 对程序结构进行混淆组分类。
    根据 extract_structure() 返回的结构字典，构造 CONFUSION_PROMPT
    并调用 LLM 进行分类。结果包含 category、subtype、confidence、
    features、required_tests、strategy_params。
    Args:
        structure: extract_structure() 返回的字典，包含 paragraphs、
                   decision_points、file_count、open_directions、
                   has_search_all、has_evaluate、has_call、has_break、
                   total_branches、total_paragraphs 等字段。
        llm: LLMClient 实例，call 方法签名为
             llm.call([{"role":"system","content":"..."},
                       {"role":"user","content":prompt}]) -> str
    Returns:
        dict: {
            "category": str,
            "subtype": str,
            "confidence": float,
            "features": dict,
            "required_tests": int,
            "strategy_params": dict
        }
    """
    decision_points = structure.get("decision_points", [])
    if_count = sum(1 for dp in decision_points if dp.get("kind") == "IF")
    evaluate_count = sum(1 for dp in decision_points if dp.get("kind") == "EVALUATE")
    paragraph_count = structure.get("total_paragraphs", len(structure.get("paragraphs", [])))
    open_dirs = structure.get("open_directions", {})
    has_search_all = str(structure.get("has_search_all", False)).lower()
    has_call = str(structure.get("has_call", False)).lower()
    has_break = str(structure.get("has_break", False)).lower()
    prompt = CONFUSION_PROMPT.format(
        paragraph_count=paragraph_count,
        decision_count=len(decision_points),
        if_count=if_count,
        evaluate_count=evaluate_count,
        file_count=structure.get("file_count", 0),
        open_directions=json.dumps(open_dirs, ensure_ascii=False),
        has_search_all=has_search_all,
        has_call=has_call,
        has_break=has_break,
        total_branches=structure.get("total_branches", 0),
    )
    messages = [
        {"role": "system", "content": "你是一个 COBOL 程序混淆组分类专家。只输出 JSON，不要输出解释。"},
        {"role": "user", "content": prompt},
    ]
    try:
        raw = llm.call(messages)
        result = _parse_llm_response(raw)
        logger.info(
            "HINA classification: %s/%s (confidence=%.2f, tests=%s)",
            result.get("category", "?"),
            result.get("subtype", "?"),
            result.get("confidence", 0.0),
            result.get("required_tests", "?"),
        )
        return result
    except Exception as e:
        logger.warning("HINA LLM classification failed: %s", e)
        return _fallback_classification(structure)
 def _parse_llm_response(raw: str) -> dict:
    """从 LLM 响应中提取 JSON 并解析。
    处理 JSON 可能被 ```json ... ``` 包裹的情况。
    """
    text = raw.strip()
    # 尝试提取 ```json ... ``` 代码块
    if "```json" in text:
        start = text.index("```json") + 7
        end = text.index("```", start) if "```" in text[start:] else len(text)
        text = text[start:end].strip()
    elif "```" in text:
        # 尝试 ``` ... ``` （无 json 标注）
        start = text.index("```") + 3
        end = text.index("```", start) if "```" in text[start:] else len(text)
        text = text[start:end].strip()
    parsed = json.loads(text)
    return _validate_result(parsed)
 def _validate_result(parsed: dict) -> dict:
    """验证并规范化 LLM 返回的分类结果。"""
    defaults = {
        "category": "unknown",
        "subtype": "",
        "confidence": 0.0,
        "features": {},
        "required_tests": 1,
        "strategy_params": {
            "max_nesting_depth": 1,
            "coverage_target": "branch",
            "file_isolation": False,
            "supplement_strategy": "full",
        },
    }
    result = {}
    for key, default_value in defaults.items():
        value = parsed.get(key, default_value)
        if key == "confidence":
            try:
                value = float(value)
                value = max(0.0, min(1.0, value))
            except (ValueError, TypeError):
                value = 0.0
        elif key == "required_tests":
            try:
                value = int(value)
                value = max(1, value)
            except (ValueError, TypeError):
                value = 1
        result[key] = value
    return result
 def _fallback_classification(structure: dict) -> dict:
    """当 LLM 调用失败时，基于规则的兜底分类。"""
    decision_points = structure.get("decision_points", [])
    if_count = sum(1 for dp in decision_points if dp.get("kind") == "IF")
    evaluate_count = sum(1 for dp in decision_points if dp.get("kind") == "EVALUATE")
    total_decisions = len(decision_points)
    file_count = structure.get("file_count", 0)
    has_search_all = structure.get("has_search_all", False)
    has_call = structure.get("has_call", False)
    has_break = structure.get("has_break", False)
    # 规则优先级：从高到低
    if total_decisions == 0:
        category, subtype = "simple_sequential", "no_branch"
        required_tests = 1
        strategy = {"max_nesting_depth": 0, "coverage_target": "branch",
                     "file_isolation": False, "supplement_strategy": "skip"}
    elif has_search_all:
        category, subtype = "search_intensive", "table_lookup"
        required_tests = max(total_decisions, 3)
        strategy = {"max_nesting_depth": 3, "coverage_target": "path",
                     "file_isolation": True, "supplement_strategy": "incremental"}
    elif has_call:
        category, subtype = "call_based", "external_call"
        required_tests = max(total_decisions, 3)
        strategy = {"max_nesting_depth": 2, "coverage_target": "branch",
                     "file_isolation": False, "supplement_strategy": "full"}
    elif evaluate_count > if_count and evaluate_count >= 2:
        category, subtype = "evaluate_driven", "multi_way"
        required_tests = total_decisions + 1
        strategy = {"max_nesting_depth": evaluate_count, "coverage_target": "path",
                     "file_isolation": False, "supplement_strategy": "full"}
    elif file_count >= 2:
        category, subtype = "data_file_centric", "multi_file"
        required_tests = max(total_decisions, file_count * 2)
        strategy = {"max_nesting_depth": 2, "coverage_target": "branch",
                     "file_isolation": True, "supplement_strategy": "incremental"}
    elif if_count >= 5 or total_decisions >= 8:
        category, subtype = "condition_heavy", "nested_if"
        required_tests = total_decisions + 2
        strategy = {"max_nesting_depth": 4, "coverage_target": "path",
                     "file_isolation": False, "supplement_strategy": "incremental"}
    elif if_count >= 2:
        category, subtype = "condition_heavy", "simple_if"
        required_tests = total_decisions + 1
        strategy = {"max_nesting_depth": 2, "coverage_target": "branch",
                     "file_isolation": False, "supplement_strategy": "incremental"}
    else:
        category, subtype = "simple_sequential", "minimal"
        required_tests = 1
        strategy = {"max_nesting_depth": 0, "coverage_target": "branch",
                     "file_isolation": False, "supplement_strategy": "skip"}
    # 检查是否应升级为 mixed_complex
    complexity_flags = sum([
        has_search_all,
        has_call,
        has_break,
        file_count >= 2,
        if_count >= 5,
        evaluate_count >= 3,
    ])
    if complexity_flags >= 3:
        category, subtype = "mixed_complex", f"{subtype}_plus"
        required_tests = max(required_tests, 10)
        strategy["max_nesting_depth"] = max(strategy.get("max_nesting_depth", 2), 5)
        strategy["coverage_target"] = "path"
        strategy["supplement_strategy"] = "full"
    return {
        "category": category,
        "subtype": subtype,
        "confidence": 0.6,
        "features": {
            "paragraph_count": structure.get("total_paragraphs", len(structure.get("paragraphs", []))),
            "decision_count": total_decisions,
            "if_count": if_count,
            "evaluate_count": evaluate_count,
            "file_count": file_count,
            "has_search_all": has_search_all,
            "has_call": has_call,
            "has_break": has_break,
            "total_branches": structure.get("total_branches", 0),
        },
        "required_tests": required_tests,
        "strategy_params": strategy,
    }
@@ -1,82 +0,0 @@
 """
 分层重试 — 部署在 orchestrator 调用者层（main.py / worker.py）。
 """
 import logging
 import os
 from typing import Callable
 from data.diff_result import VerificationRun
 logger = logging.getLogger(__name__)
 HEALING_FIXES = {
    "compile_error": {
        "detect": lambda log: "not found" in (log or "").lower(),
        "fix": lambda: _try_set_env(
            "COB_LIBRARY_PATH",
            "D:\\360安全浏览器下载\\GC32-BDB-SP1-rename-7z-to-exe\\lib\\gnucobol",
        ),
    },
    "s0c7": {
        "detect": lambda log: "S0C7" in (log or ""),
        "fix": lambda: logger.warning("[Retry] S0C7 需要人工修正测试数据中的数值字段"),
    },
 }
 def _try_set_env(key: str, value: str) -> None:
    """尝试设置环境变量（如果当前未设置）"""
    if not os.environ.get(key):
        os.environ[key] = value
        logger.info(f"[Retry] 已设置环境变量 {key}={value}")
    else:
        logger.info(f"[Retry] {key} 已存在，跳过")
 class RetryHandler:
    def __init__(self, max_heal: int = 2, max_simple: int = 3):
        self.max_heal = max_heal
        self.max_simple = max_simple
        self.heal_count = 0
        self.simple_count = 0
        self.history: list[VerificationRun] = []
    def run(self, pipeline_fn: Callable[[], VerificationRun]) -> VerificationRun:
        while (self.heal_count + self.simple_count) < (self.max_heal + self.max_simple):
            vr = pipeline_fn()
            self.history.append(vr)
            if vr.status in ("PASS", "QUALITY_WARN"):
                vr.heal_retry = self.heal_count
                vr.simple_retry = self.simple_count
                vr.total_retry = self.heal_count + self.simple_count
                return vr
            if vr.status in ("BLOCKED", "ERROR") and self.heal_count < self.max_heal:
                build_log = vr.debug.get("cobol_build", {}).get("log", "")
                healed = False
                for name, fix_def in HEALING_FIXES.items():
                    if fix_def["detect"](build_log):
                        fix_def["fix"]()
                        self.heal_count += 1
                        healed = True
                        logger.info(
                            f"[Retry] 自愈修复应用: {name} "
                            f"(heal_retry={self.heal_count})"
                        )
                        break
                if healed:
                    continue
            self.simple_count += 1
            logger.info(f"[Retry] 朴素重试 (simple_retry={self.simple_count})")
        logger.error("[Retry] 重试次数超过上限，标记 FATAL")
        vr = self.history[-1] if self.history else VerificationRun(
            status="FATAL", exit_code=4
        )
        vr.status = "FATAL"
        vr.exit_code = 4
        vr.heal_retry = self.heal_count
        vr.simple_retry = self.simple_count
        vr.total_retry = self.heal_count + self.simple_count
        return vr
@@ -1,103 +0,0 @@
 """
 HINA 策略模板 — 根据程序分类定义必须的测试项和边界条件。
 Task 2.2: 必须项模板 + supplement 函数
 """
 STRATEGY_TEMPLATES: dict[str, dict] = {
    "マッチング": {
        "required": [
            "COM-N001", "COM-N002", "COM-A002", "COM-A003",
            "MT-N001", "MT-N002", "MT-N004", "MT-N005", "MT-N006",
        ],
        "boundary": ["MT-B001", "MT-B002"],
    },
    "キーブレイク": {
        "required": [
            "COM-N001", "COM-A002",
            "KB-N001", "KB-N004", "KB-N005", "KB-A001",
        ],
        "boundary": ["KB-B001", "KB-B002"],
    },
    "条件分岐": {
        "required": [
            "B-N001", "B-N003", "B-N006", "B-N009",
        ],
    },
    "内部表検索": {
        "required": [
            "T-N001", "T-N002", "T-A001", "T-A002",
        ],
    },
    "項目チェック": {
        "required": [
            "VF-N001", "VF-N002", "VF-N004", "VF-A001",
        ],
    },
 }
 def get_strategy(hina_type: str) -> dict:
    """返回对应 HINA 类型的策略模板。
    Args:
        hina_type: HINA 程序分类名称（如 "マッチング"）。
    Returns:
        dict: required 列表及可选的 boundary 列表。
              未知类型返回空模板 {"required": [], "boundary": []}。
    """
    return STRATEGY_TEMPLATES.get(hina_type, {"required": [], "boundary": []})
 def _make_marker(code: str, prefix: str = "REQ") -> dict:
    """生成一条标记记录。"""
    return {
        "id": f"{prefix}-{code}",
        "coverage_targets": [code],
        "fields": {},
    }
 def supplement(base_tests: list[dict], hina_result: dict) -> list[dict]:
    """根据 HINA 类型追加模板中的必须项标记记录。
    从 ``hina_result["category"]`` 获取分类，查找对应的策略模板，
    将模板中所有的 required 和 boundary 项以标记记录形式追加到测试列表末尾。
    Args:
        base_tests: 已有的测试数据列表（每个元素为 dict）。
        hina_result: HINA 分类结果，至少包含 ``{"category": str}``。
    Returns:
        list[dict]: 追加必须项标记记录后的完整测试列表。
    """
    hina_type = hina_result.get("category", "unknown")
    template = get_strategy(hina_type)
    result = list(base_tests)
    for code in template.get("required", []):
        result.append(_make_marker(code))
    for code in template.get("boundary", []):
        result.append(_make_marker(code, prefix="BND"))
    return result
 def supplement_only(base_tests: list[dict], hina_gaps: list[str]) -> list[dict]:
    """增量补充指定必须项的标记记录。
    根据传入的 code 列表（而不是从模板查找），只追加缺失的那些必须项标记。
    Args:
        base_tests: 已有的测试数据列表（每个元素为 dict）。
        hina_gaps: 需要补充的 HINA 必须项 code 列表。
    Returns:
        list[dict]: 追加标记记录后的完整测试列表。
    """
    result = list(base_tests)
    for code in hina_gaps:
        result.append(_make_marker(code))
    return result
@@ -15,9 +15,6 @@ def main():
    p.add_argument("--verbose", action="store_true")
    p.add_argument("--dry-run", action="store_true")
    p.add_argument("--output-dir", default="./reports")
    p.add_argument("--quality-gate-mode", choices=["warn", "off"], default="warn",
                   help="质量门禁模式: warn=记录警告, off=关闭")
    p.add_argument("--gcov", action="store_true", help="启用 gcov 覆盖率采集")
    args = p.parse_args()
    if args.dry_run:
@@ -38,8 +35,6 @@ def main():
    c.runner_mode = args.runner
    c.coverage_default = args.coverage
    c.tolerance = args.tolerance
    c.quality_gate_mode = args.quality_gate_mode
    c.gcov_enabled = args.gcov
    vr = run_pipeline(c, args.copybook, args.cobol_src, args.java_src, args.mapping)
    t = vr.fields_matched + vr.fields_mismatched
    print(f"{vr.program}: {vr.status} ({vr.fields_matched}/{t}, {vr.duration_s:.0f}s)" if t else f"{vr.program}: {vr.status}")
@@ -1,7 +1,7 @@
-import shutil, time, logging
+import shutil, time
 from pathlib import Path
 from data.field_tree import FieldTree
-from data.test_case import TestSuite, SparkConfig, TestCase
+from data.test_case import TestSuite, SparkConfig
 from data.diff_result import VerificationRun, FieldResult
 from runners.runner import Runner
 from runners.native_java_runner import NativeJavaRunner
@@ -18,14 +18,6 @@ from comparator.cobol_binary_reader import CobolBinaryReader
 from report.generator import ReportGenerator
 from storage.bundle import TestDataBundle
 from config import Config
 from cobol_testgen import extract_structure, generate_data, incremental_supplement
 from cobol_testgen.coverage import check_coverage
 from hina.gate import check as gate_check
 from hina.classifier import compute_confidence
 from hina.hina_agent import classify_with_llm
 from hina.strategy import supplement as strategy_supplement
 logger = logging.getLogger(__name__)
 def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) -> VerificationRun:
@@ -48,68 +40,6 @@ def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) ->
        if vr.llm_cost > cfg.max_llm_cost:
            return _done(vr, t0, "BLOCKED", 3)
            # ── Phase 1+2: cobol_testgen + HINA Agent + 策略 Agent + 质量门禁 ──
        try:
            cobol_src_text = Path(cbl).read_text(encoding="utf-8")
            structure = extract_structure(cobol_src_text)
            # HINA Agent 类型判定
            hina_result = {}
            try:
                hina_result = compute_confidence(cobol_src_text, structure)
                if hina_result.get("confidence", 0) < 0.7 and structure:
                    llm_hina = classify_with_llm(structure, llm)
                    if llm_hina.get("confidence", 0) > hina_result.get("confidence", 0):
                        hina_result = llm_hina
                vr.hina_type = hina_result.get("category", "")
                vr.hina_confidence = hina_result.get("confidence", 0.0)
                vr.debug["hina_result"] = hina_result
            except Exception as e:
                vr.debug["hina_agent_error"] = str(e)
                logger.warning(f"[orchestrator] HINA Agent 判定失败: {e}")
            # cobol_testgen 路径枚举 + 基础数据生成
            base_records = generate_data(cobol_src_text, structure)
            vr.debug["cobol_testgen_records"] = len(base_records)
            vr.debug["total_branches"] = structure.get("total_branches", 0)
            base_testcases = []
            for i, rec in enumerate(base_records):
                base_testcases.append(TestCase(id=f"CTG-{i+1:04d}", fields=dict(rec)))
            # 策略 Agent 补充
            strategy_tests = strategy_supplement(base_testcases, hina_result)
            complete_tests = base_testcases + strategy_tests
            # 质量门禁循环
            cov = check_coverage(structure, base_records)
            for attempt in range(cfg.max_quality_retries):
                gate_result = gate_check(
                    complete_tests, hina_result, cov,
                    decision_threshold=cfg.quality_gate_decision_threshold,
                    paragraph_threshold=cfg.quality_gate_paragraph_threshold,
                )
                if gate_result.get("passed"):
                    break
                gaps = gate_result.get("issues", {}).get("decision_gaps", [])
                if gaps and structure.get("branch_tree_obj"):
                    delta = incremental_supplement(structure["branch_tree_obj"], gaps)
                    base_records.extend(delta)
                    cov = check_coverage(structure, base_records)
                else:
                    break
            vr.paragraph_rate = cov.get("paragraph_rate", 0.0)
            vr.branch_rate = cov.get("branch_rate", 0.0)
            vr.decision_rate = cov.get("decision_rate", 0.0)
            if cfg.quality_gate_mode != "off" and not gate_result.get("passed", True):
                vr.quality_warn = f"质量门禁未完全通过 (尝试 {attempt+1} 次)"
                vr.debug["quality_issues"] = gate_result.get("issues", {})
        except Exception as e:
            vr.debug["cobol_testgen_error"] = str(e)
            logger.warning(f"[orchestrator] cobol_testgen 分析失败: {e}")
        suite = Agent2Data(llm).design(tree, cfg.coverage_default, cfg.runner_mode == "spark")
        vr.llm_cost += 0.002
        vr.debug["test_cases"] = [{"id":tc.id,"fields":tc.fields,"targets":tc.coverage_targets} for tc in suite.test_cases]