merge local cobol_testgen improvements into v3 shared modules

- cond.py: SQLCODE/SQLSTATE handling, alphanumeric >/< boundary fix - output.py: termination tracking, db_input support, _is_field_assigned filter - coverage.py: mark_from_gcov, THRU support, KeyError protection - gcov.py: new file (dependency for coverage.py) - grammar.lark: multi-segment PIC support - read.py: SQL INCLUDE resolution, DECLARE TABLE parsing, * comment fix - core.py: SQL parsing, blocked_names, keyword list - design.py: multi-sentinel, THRU ranges, PERFORM VARYING last iteration - __init__.py: local main() + v3 API functions, guarded imports All 6 ZAN programs verified passing through v3 pipeline
2026-06-23 22:38:17 +08:00
parent e5ab3baa46
commit 7fb9304212
9 changed files with 1595 additions and 326 deletions
@@ -1,14 +1,14 @@
 """COBOL Test Data Generator — 模块化版入口

-from __future__ import annotations
 公开 API:
  extract_structure()    — 解析 COBOL 控制流 → dict
  generate_data()        — 生成测试数据 → list[dict]
  incremental_supplement — 差分补充数据 → list[dict]
-  check_coverage()       — 覆盖率报告 → dict
 """

+import os
 import sys
+import json
 import re
 import logging
 from datetime import datetime
@@ -16,25 +16,45 @@ from pathlib import Path

 # ── 配置（必须放在本地模块导入之前，避免循环导入） ──

-CONFIG = {}
+CONFIG = {
+    'abend_programs': ['SUB03END'],
+}

 from .read import preprocess, extract_data_division, extract_procedure_division
-from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements, parse_file_control
-from .core import classify_field_roles, _init_child_names
-from .pipeline_bridge import build_branch_tree_fallback
+from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements
+from .read import parse_file_control, resolve_sql_includes, strip_exec_sql_from_data_div
+from .core import build_branch_tree, classify_field_roles, _init_child_names, sql_register_virtual_fields, _find_multi_write_fds
 from .cond import parse_single_condition, is_field, collect_leaves
-from .design_mcdc import enum_paths, _filter_stop
-from .design import generate_records
+from .pipeline_bridge import build_branch_tree_fallback
+from .design_mcdc import enum_paths as mcdc_enum_paths, _filter_stop
+from .design import enum_paths, generate_records, get_term_type, extend_abend_programs
 from .output import output_json, output_input_files
-from .coverage import run_coverage, generate_coverage_index, check_coverage
+from .coverage import run_coverage, generate_coverage_index
 from japanese_data import generate_fullwidth_text, generate_halfwidth_katakana, generate_wareki_date

+try:
+    from .runner import run_and_compare, run_all, GroupInfo, GroupResult
+    _HAVE_RUNNER = True
+except ImportError:
+    _HAVE_RUNNER = False
+
+try:
+    from .gcov import run_gcov
+    _HAVE_GCOV = True
+except ImportError:
+    _HAVE_GCOV = False
+
+try:
+    from .to_sql import collect_sql_meta, build_db_input
+    _HAVE_TOSQL = True
+except ImportError:
+    _HAVE_TOSQL = False
+
 logger = logging.getLogger(__name__)
-n__all__ = [
+__all__ = [
    "extract_structure",
    "generate_data",
    "incremental_supplement",
-    "check_coverage",
    "CONFIG",
    "generate_fullwidth_text",
    "generate_halfwidth_katakana",
@@ -107,6 +127,149 @@ def expand_occurs(fields):
    return result


+# ── PREV 连锁 ──
+
+
+def _constraint_in(cons, field, op, value, want):
+    for c in cons:
+        if len(c) == 4 and c[0] == field and c[1] == op and c[2] == value and c[3] == want:
+            return True
+    return False
+
+
+def _inc_str(s, length):
+    try:
+        return str(int(s) + 1).zfill(length)
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 9Zz\xff':
+                c[i] = chr(ord(c[i]) + 1)
+                break
+            if c[i] == ' ':
+                c[i] = '0'
+                break
+            if c[i] == '9':
+                c[i] = '0'
+        return ''.join(c)
+
+
+def _dec_str(s, length):
+    try:
+        n = max(0, int(s) - 1)
+        return str(n).zfill(length)
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 0Aa\x00':
+                c[i] = chr(ord(c[i]) - 1)
+                break
+            if c[i] == ' ':
+                break
+            if c[i] == '0':
+                c[i] = '9'
+        return ''.join(c)
+
+
+def _field_length(fname, fields):
+    for f in fields:
+        if f['name'] == fname:
+            pi = f.get('pic_info', {})
+            return pi.get('digits', 0) + pi.get('decimal', 0) or pi.get('length', 0) or 1
+    return 1
+
+
+def _chain_prev(records, path_infos, fields, fd_fields, field_to_fd, open_dir):
+    """跨记录 PREV 连锁。修改 records 使批次执行的路径与实际比较一致。
+    
+    每个路径 k-1 的约束（PREV OP CURRENT）对应批次中 loop iter k-1 的实际比较：
+      PREV = records[prev_src].R01  (程序内部保持的前值)
+      CURRENT = records[k].R01      (当前读入值)
+    本函数调整 records[k] 的字段以保证交叉记录比较满足路径约束。
+    """
+    N = len(records)
+    if N < 2:
+        return
+
+    key_fields = []
+    time_start_field = None
+    time_end_field = None
+    for fname in records[0]:
+        if fname.startswith('R01') and not fname.startswith('R01INNREC'):
+            base = fname[3:]
+            prev_name = 'WRK-PREV-' + base
+            if prev_name in records[0]:
+                if 'EMP-ID' in fname or 'APPL-DATE' in fname:
+                    key_fields.append(fname)
+                if 'END-TIME' in fname:
+                    time_end_field = fname
+                if 'START-TIME' in fname:
+                    time_start_field = fname
+
+    prev_src = 0
+    for k in range(1, N):
+        if k - 1 >= len(path_infos):
+            break
+        cons = path_infos[k - 1][0]
+
+        is_same_key = all(
+            _constraint_in(cons, f'WRK-PREV-{fn[3:]}', '=', fn, True)
+            for fn in key_fields
+        ) if key_fields else False
+        is_overlap = is_same_key and time_end_field and time_start_field and \
+            _constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '>', time_start_field, True)
+        is_normal = is_same_key and time_end_field and time_start_field and \
+            (_constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '<=', time_start_field, True) or
+             _constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '>', time_start_field, False))
+
+        for fname in records[prev_src]:
+            if fname.startswith('R01') and not fname.startswith('R01INNREC'):
+                base = fname[3:]
+                prev_name = 'WRK-PREV-' + base
+                if prev_name in records[k]:
+                    records[k][prev_name] = records[prev_src][fname]
+
+        if is_same_key:
+            for kf in key_fields:
+                if kf in records[k] and kf in records[prev_src]:
+                    records[k][kf] = records[prev_src][kf]
+
+            if is_normal and time_end_field and time_start_field:
+                prev_end = records[prev_src].get(time_end_field, '')
+                curr_start = records[k].get(time_start_field, '')
+                if prev_end >= curr_start:
+                    length = _field_length(time_start_field, fields)
+                    records[k][time_start_field] = _inc_str(prev_end, length)
+
+            if is_overlap and time_end_field and time_start_field:
+                prev_end = records[prev_src].get(time_end_field, '')
+                curr_start = records[k].get(time_start_field, '')
+                if prev_end <= curr_start:
+                    length = _field_length(time_start_field, fields)
+                    records[k][time_start_field] = _dec_str(prev_end, length) if prev_end else '0' * length
+
+        else:
+            for kf in key_fields:
+                if kf in records[k] and kf in records[prev_src]:
+                    if records[k][kf] == records[prev_src][kf]:
+                        length = _field_length(kf, fields)
+                        records[k][kf] = _inc_str(str(records[k][kf]), length)
+
+        records[k]['_w02_path'] = is_same_key and time_end_field and time_start_field and not is_overlap
+        records[k]['_overlap_path'] = is_overlap
+
+        for fn in list(records[k].keys()):
+            if fn.startswith('R01') and not fn.startswith('R01INNREC'):
+                wfn = 'W01' + fn[3:]
+                if wfn in records[k]:
+                    records[k][wfn] = records[k][fn]
+
+        if is_overlap:
+            pass
+        else:
+            prev_src = k
+
+
 # ── 入口 ──

 def main():
@@ -116,7 +279,32 @@ def main():

    args = sys.argv[1:]

-    # 分离 cobol 文件与输出目录
+    do_run = False
+    gcov_mode = False
+    temp_dir = None
+    if '--run' in args:
+        do_run = True
+        args.remove('--run')
+    if '--gcov' in args:
+        gcov_mode = True
+        args.remove('--gcov')
+    i = 0
+    while i < len(args):
+        if args[i] == '--temp-dir':
+            if i + 1 < len(args):
+                temp_dir = args[i + 1]
+                args.pop(i + 1)
+                args.pop(i)
+            else:
+                args.pop(i)
+            break
+        elif args[i].startswith('--temp-dir='):
+            temp_dir = args[i].split('=', 1)[1]
+            args.pop(i)
+            break
+        else:
+            i += 1
+
    cobol_files = []
    outdir = None
    for a in args:
@@ -133,13 +321,13 @@ def main():
    if outdir is None:
        outdir = cobol_files[0].parent

-    # 配置全局 Logger
    outdir.mkdir(parents=True, exist_ok=True)
-    log_path = outdir / f"cobol_testgen_{datetime.now():%Y%m%d_%H%M%S}.log"
+    (outdir / 'logs').mkdir(parents=True, exist_ok=True)
+    log_path = outdir / 'logs' / f"cobol_testgen_{datetime.now():%Y%m%d_%H%M%S}.log"
    fh = logging.FileHandler(log_path, encoding="utf-8", mode="w")
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(logging.Formatter(
-"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+        "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
    ))
    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
@@ -157,12 +345,20 @@ def main():
            continue

        source = filepath.read_text(encoding='utf-8')
-        source = resolve_copybooks(source, str(filepath.parent))
+        source = resolve_copybooks(
+            source,
+            str(filepath.parent),
+            extra_search_paths=[str(filepath.parent / '..' / 'cpy')],
+        )
+        source = resolve_sql_includes(source, str(filepath.parent))
        preprocessed = preprocess(source)
        file_sec = parse_file_section(preprocessed)

-        # DATA DIVISION解析
        data_div = extract_data_division(preprocessed)
+        if data_div:
+            data_div, declared_columns = strip_exec_sql_from_data_div(data_div)
+        else:
+            declared_columns = {}
        if not data_div:
            logger.error(f"错误：{filepath.name} 中没有 DATA DIVISION。")
            continue
@@ -172,7 +368,6 @@ def main():
            logger.error(f"错误：{filepath.name} 中没有找到含 PIC 的字段。")
            continue

-        # FieldDef → dict
        fields_dict = []
        parent_pic = {}
        filler_counter = 0
@@ -206,7 +401,6 @@ def main():
            if f.is_88:
                entry['is_88'] = True
                entry['parent'] = f.parent
-                # Copy parent's pic_info for value generation
                if f.parent and f.parent in parent_pic:
                    entry['pic_info'] = dict(parent_pic[f.parent])
            else:
@@ -215,7 +409,8 @@ def main():

        fields_dict = expand_occurs(fields_dict)

-        # Build FD→children 和 field→FD 映射
+        sql_register_virtual_fields(fields_dict)
+
        fd_fields = {}
        field_to_fd = {}
        if file_sec:
@@ -245,13 +440,12 @@ def main():
            pic_display = str(f.get('pic', '')) if f.get('pic') else ('88-level' if f.get('is_88') else '')
            logger.info(f"{f['level']:<6} {f['name']:<25} {pic_display:<15} {t:<12} {l:<5}")

-        # PROCEDURE DIVISION解析
        proc_div = extract_procedure_division(preprocessed)
        branch_paths = []
        assignments = {}

        if proc_div:
-            branch_tree, assignments = build_branch_tree_fallback(proc_div, fields_dict)
+            branch_tree, assignments = build_branch_tree(proc_div, fields_dict, full_source=preprocessed)

            roles = classify_field_roles(branch_tree, assignments, fields_dict,
                                         source=preprocessed, proc_text=proc_div)
@@ -261,12 +455,32 @@ def main():
                    continue
                logger.info(f"  {f['name']:<30} {roles.get(f['name'], '?')}")

+        abend_list = CONFIG.get('abend_programs', [])
+        if abend_list:
+            extend_abend_programs(abend_list)
            branch_paths_with_assigns = enum_paths(branch_tree, fields_dict)
-            branch_paths_with_assigns = [
-                (_filter_stop(c), a) for c, a in branch_paths_with_assigns
-            ]
+            path_infos = []
+            for c, a in branch_paths_with_assigns:
+                filtered_c, term = get_term_type(c)
+                path_infos.append((filtered_c, a, term))
+
+            def _is_skip(cons):
+                eq1_true = 0
+                other = 0
+                for c in cons:
+                    if len(c) == 4 and c[0] == 'WRK-R01EOF':
+                        val = str(c[2]).strip("'\"")
+                        if val == '1' and c[1] == '=' and c[3]:
+                            eq1_true += 1
+                        else:
+                            other += 1
+                return eq1_true > 0 and other == 0
+
+            before = len(path_infos)
+            path_infos = [p for p in path_infos if not _is_skip(p[0])]
+            after = len(path_infos)
+            logger.info(f"  SKIP 过滤: {before} -> {after} 条路径（预期减少 1）")

-        # OPEN 方向解析
        open_dir = scan_open_statements(proc_div) if proc_div else {}

        if proc_div:
@@ -284,26 +498,104 @@ def main():
        else:
            logger.warning("\n没有找到 PROCEDURE DIVISION。")
            branch_paths_with_assigns = [([], {})]
+            path_infos = [([], {}, 'normal')]
            roles = {f['name']: 'unused' for f in fields_dict}

-        # 覆盖率报告（传入原始源文本用于行号定位）
-        cov_prefix = str(outdir / filepath.stem)
-        index_relpath = 'coverage/index.html'
-        cov_result = run_coverage(branch_tree, branch_paths_with_assigns, fields_dict,
-                                  source, cov_prefix, index_relpath=index_relpath)
+        records, _, term_types = generate_records(path_infos, fields_dict, assignments, file_sec=file_sec)

-        records, kept_path_cons = generate_records(branch_paths_with_assigns, fields_dict, assignments, file_sec=file_sec)
+        def _is_eof_path(cons):
+            last_eq1_true = -1
+            for i, c in enumerate(cons):
+                if len(c) == 4 and c[0] == 'WRK-R01EOF':
+                    val = str(c[2]).strip("'\"")
+                    if val == '1' and c[1] == '=' and c[3]:
+                        last_eq1_true = i
+            if last_eq1_true < 0:
+                return False
+            for i in range(last_eq1_true + 1, len(cons)):
+                if len(cons[i]) == 4 and cons[i][0] == 'WRK-R01EOF':
+                    return False
+            return True
+        eof_mask = [_is_eof_path(c) for c, a, t in path_infos]
+        eof_count = sum(eof_mask)
+        if eof_count:
+            term_types = ['eof' if e else t for e, t in zip(eof_mask, term_types)]
+            logger.info(f"  EOF 路径: {eof_count} 条（将单独执行）")

-        # 输出 JSON（完整文件）
-        outpath = outdir / (filepath.stem + '.json')
+        multi_write_fds = _find_multi_write_fds(branch_tree, field_to_fd) if proc_div and branch_tree else set()
+        if multi_write_fds:
+            logger.info(f"  检测到多 WRITE FD: {', '.join(sorted(multi_write_fds))}")
+            _chain_prev(records, path_infos, fields_dict, fd_fields, field_to_fd, open_dir)
+
+        if _HAVE_TOSQL:
+            sql_meta = collect_sql_meta(assignments, declared_columns)
+            db_input = build_db_input(
+                branch_paths_with_assigns, fields_dict, assignments, sql_meta, declared_columns,
+                records=records,
+            )
+        else:
+            db_input = None
+
+        (outdir / 'json').mkdir(parents=True, exist_ok=True)
+        outpath = outdir / 'json' / (filepath.stem + '.json')
        output_json(records, outpath, roles,
                    fd_fields=fd_fields, field_to_fd=field_to_fd,
                    open_dir=open_dir,
-                    path_cons_list=kept_path_cons)
+                    term_types=term_types,
+                    db_input=db_input if db_input else None,
+                    data_fields=fields_dict)

-        # 输出入力 JSON（按 FD 拆分）
-        output_input_files(records, outdir, filepath.stem, roles,
-                           fd_fields, field_to_fd, open_dir)
+        output_input_files(records, outdir / 'input', filepath.stem, roles,
+                           fd_fields, field_to_fd, open_dir,
+                           term_types=term_types)
+
+        gcov_data = None
+        if gcov_mode and proc_div and _HAVE_GCOV:
+            select_info = parse_file_control(preprocessed)
+            _temp = temp_dir or str(outdir / '.gcov_cache')
+            source_dir = str(filepath.parent)
+            expected_records: list[dict] = [{}] * len(records)
+            if file_sec and os.path.exists(outpath):
+                with open(outpath, encoding='utf-8') as f:
+                    full_json = json.load(f)
+                json_records = full_json.get('records', [])
+                for i in range(len(records)):
+                    exp = {}
+                    if i < len(json_records):
+                        json_rec = json_records[i]
+                        for fd_name in file_sec:
+                            eo = json_rec.get('expected_output', {})
+                            if fd_name in eo:
+                                exp.update(eo[fd_name])
+                    expected_records[i] = exp
+
+            group_results = run_all(
+                filepath.stem, str(outdir), _temp,
+                fields_dict, fd_fields, select_info, open_dir,
+                term_types, records, expected_records=expected_records,
+                source_dir=source_dir, path_infos=path_infos,
+                multi_write_fds=multi_write_fds,
+            )
+            gcov_data = run_gcov(filepath.stem, _temp)
+
+            passed = sum(1 for r in group_results if r.passed)
+            total = len(group_results)
+            logger.info(f"\n  执行验证: {passed}/{total} 组通过")
+            if passed < total:
+                for r in group_results:
+                    if not r.passed and r.details:
+                        fails = [d for d in r.details if not d.match][:3]
+                        for d in fails:
+                            logger.warning(f"    [{r.name}] {d.field}: "
+                                           f"期望={d.expected!r}, 实际={d.actual!r}")
+
+        if do_run and proc_div and _HAVE_RUNNER:
+            select_info = parse_file_control(preprocessed)
+            run_and_compare(
+                filepath.stem, str(outdir), fields_dict,
+                fd_fields, select_info, open_dir,
+                term_types, records,
+            )

        logger.info(f"\n输出：{outpath}（{len(records)} 条记录）")
        logger.debug(f"\n记录明细：")
@@ -315,11 +607,17 @@ def main():
                vals.append(f"{marker}{f['name']}={rec.get(f['name'], '?')}")
            logger.debug(f"  记录 {i}: {' | '.join(vals)}")

+        (outdir / 'coverage').mkdir(parents=True, exist_ok=True)
+        cov_prefix = str(outdir / 'coverage' / filepath.stem)
+        index_relpath = 'index.html'
+        cov_result = run_coverage(branch_tree, branch_paths_with_assigns, fields_dict,
+                                  source, cov_prefix, index_relpath=index_relpath,
+                                  gcov_data=gcov_data)
+
        programs.append(cov_result)

-    # 生成覆盖率总括索引页
    if programs:
-        generate_coverage_index(programs, outdir)
+        generate_coverage_index(programs, outdir / 'coverage')
        logger.info(f"\n覆盖率总览：{outdir / 'coverage' / 'index.html'}")


@@ -429,18 +727,14 @@ def extract_structure(cobol_source: str) -> dict:
        if m:
            paragraphs.add(m.group(1))

-    # ── 新增字段: select_files ──
    select_files = parse_file_control(preprocessed)

-    # ── 新增字段: open_directions_detail (与 open_directions 一致) ──
    open_directions_detail = open_dir

-    # ── 新增字段: has_divide / has_inspect / has_string ──
    has_divide = bool(re.search(r'\bDIVIDE\b', cobol_source.upper()))
    has_inspect = bool(re.search(r'\bINSPECT\b', cobol_source.upper()))
    has_string = bool(re.search(r'\bSTRING\b', cobol_source.upper()))

-    # ── 新增字段: divide_constants ──
    divide_constants = []
    if has_divide and proc_div:
        for dm in re.finditer(r'\bDIVIDE\s+([\d.]+)\b', proc_div, re.IGNORECASE):
@@ -450,7 +744,6 @@ def extract_structure(cobol_source: str) -> dict:
            except ValueError:
                pass

-    # ── 新增字段: perform_patterns ──
    perform_patterns = []

    def _walk_performs(node):
@@ -478,7 +771,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _walk_performs(branch_tree)

-    # ── 新增字段: main_loop ──
    main_loop = None

    def _find_main_loop(node, depth=0):
@@ -533,7 +825,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _find_main_loop(branch_tree)

-    # ── 新增字段: if_types ──
    if_types = {"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0}

    def _walk_if_types(node, depth=0):
@@ -543,7 +834,6 @@ def extract_structure(cobol_source: str) -> dict:
            ct = node.cond_tree
            if ct:
                leaves = collect_leaves(ct)
-                # Check compound: cond_tree is CondAnd or CondOr (not just CondLeaf)
                if isinstance(ct, (CondAnd, CondOr)):
                    if_types["compound"] += 1
                for leaf in leaves:
@@ -566,7 +856,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _walk_if_types(branch_tree)

-    # ── 新增字段: variable_patterns ──
    variable_patterns = {
        "has_prev_key": False,
        "has_accumulator": False,
@@ -597,14 +886,12 @@ def extract_structure(cobol_source: str) -> dict:
            if re.search(r'[-_]W\b|[-_]WORK\b|[-_]WK\b|^WS-W[0O]\w', name, re.IGNORECASE):
                variable_patterns["has_work"] = True

-    # ── 新增字段: open_pattern ──
    open_pattern = "sequential"
    if proc_div:
        proc_upper = proc_div.upper()
        open_positions = [m.start() for m in re.finditer(r'\bOPEN\b', proc_upper)]
        close_positions = [m.start() for m in re.finditer(r'\bCLOSE\b', proc_upper)]
        if open_positions and close_positions:
-            # Check OPEN ... CLOSE ... OPEN sequence
            for i, opos in enumerate(open_positions):
                for cpos in close_positions:
                    if cpos > opos:
@@ -618,30 +905,29 @@ def extract_structure(cobol_source: str) -> dict:
                    break

    return {
-"paragraphs": sorted(paragraphs) if paragraphs else [],
-"decision_points": decision_points,
-"branch_tree": branch_tree,
-"file_count": len(file_sec) if file_sec else 0,
-"open_directions": open_dir,
-"has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
-"has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
-"has_call": 'CALL' in cobol_source.upper(),
-"has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
-"total_branches": total_branches,
-"total_paragraphs": len(paragraphs),
-"branch_tree_obj": branch_tree,
-# ── 新增 8 类结构特征 ──
-"select_files": select_files,
-"open_directions_detail": open_directions_detail,
-"has_divide": has_divide,
-"divide_constants": divide_constants,
-"has_inspect": has_inspect,
-"has_string": has_string,
-"perform_patterns": perform_patterns,
-"main_loop": main_loop,
-"if_types": if_types,
-"variable_patterns": variable_patterns,
-"open_pattern": open_pattern,
+        "paragraphs": sorted(paragraphs) if paragraphs else [],
+        "decision_points": decision_points,
+        "branch_tree": branch_tree,
+        "file_count": len(file_sec) if file_sec else 0,
+        "open_directions": open_dir,
+        "has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
+        "has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
+        "has_call": 'CALL' in cobol_source.upper(),
+        "has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
+        "total_branches": total_branches,
+        "total_paragraphs": len(paragraphs),
+        "branch_tree_obj": branch_tree,
+        "select_files": select_files,
+        "open_directions_detail": open_directions_detail,
+        "has_divide": has_divide,
+        "divide_constants": divide_constants,
+        "has_inspect": has_inspect,
+        "has_string": has_string,
+        "perform_patterns": perform_patterns,
+        "main_loop": main_loop,
+        "if_types": if_types,
+        "variable_patterns": variable_patterns,
+        "open_pattern": open_pattern,
    }


@@ -693,11 +979,12 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:

    file_sec = parse_file_section(preprocessed)

-    branch_paths = enum_paths(branch_tree, fields_dict)
-    branch_paths = [(_filter_stop(c), a) for c, a in branch_paths]
+    branch_paths_unfiltered = mcdc_enum_paths(branch_tree, fields_dict)
+    path_infos = []
+    for c, a in branch_paths_unfiltered:
+        filtered_c, term = get_term_type(c)
+        path_infos.append((filtered_c, a, term))

-    # Filter: remove constraints whose field doesn't exist in fields_dict.
-    # Resolve OF-qualified names and subscripts for matching.
    _fdict_names = {f['name'] for f in fields_dict}
    def _resolve_field(fn: str) -> str:
        ufn = fn.upper()
@@ -708,7 +995,7 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:
            return m.group(1)
        return fn
    filtered_paths = []
-    for cons_list, asgn in branch_paths:
+    for cons_list, asgn, term in path_infos:
        clean = []
        for c in cons_list:
            if len(c) >= 4:
@@ -718,12 +1005,11 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:
                    clean.append(tuple(c))
            else:
                clean.append(c)
-        filtered_paths.append((clean, asgn))
-    branch_paths = filtered_paths
+        filtered_paths.append((clean, asgn, term))
+    path_infos = filtered_paths

-    records, kept_paths = generate_records(branch_paths, fields_dict, assignments, file_sec=file_sec)
+    records, kept_paths, term_types = generate_records(path_infos, fields_dict, assignments, file_sec=file_sec)

-    # Cross-file KEY alignment for matching programs
    if records:
        import re as _re
        proc_upper = (proc_div or "").upper()