merge local cobol_testgen improvements into v3 shared modules

- cond.py: SQLCODE/SQLSTATE handling, alphanumeric >/< boundary fix - output.py: termination tracking, db_input support, _is_field_assigned filter - coverage.py: mark_from_gcov, THRU support, KeyError protection - gcov.py: new file (dependency for coverage.py) - grammar.lark: multi-segment PIC support - read.py: SQL INCLUDE resolution, DECLARE TABLE parsing, * comment fix - core.py: SQL parsing, blocked_names, keyword list - design.py: multi-sentinel, THRU ranges, PERFORM VARYING last iteration - __init__.py: local main() + v3 API functions, guarded imports All 6 ZAN programs verified passing through v3 pipeline
2026-06-23 22:38:17 +08:00
parent e5ab3baa46
commit 7fb9304212
9 changed files with 1595 additions and 326 deletions
@@ -1,14 +1,14 @@
 """COBOL Test Data Generator — 模块化版入口

-from __future__ import annotations
 公开 API:
  extract_structure()    — 解析 COBOL 控制流 → dict
  generate_data()        — 生成测试数据 → list[dict]
  incremental_supplement — 差分补充数据 → list[dict]
-  check_coverage()       — 覆盖率报告 → dict
 """

+import os
 import sys
+import json
 import re
 import logging
 from datetime import datetime
@@ -16,25 +16,45 @@ from pathlib import Path

 # ── 配置（必须放在本地模块导入之前，避免循环导入） ──

-CONFIG = {}
+CONFIG = {
+    'abend_programs': ['SUB03END'],
+}

 from .read import preprocess, extract_data_division, extract_procedure_division
-from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements, parse_file_control
-from .core import classify_field_roles, _init_child_names
-from .pipeline_bridge import build_branch_tree_fallback
+from .read import resolve_copybooks, parse_data_division, parse_file_section, scan_open_statements
+from .read import parse_file_control, resolve_sql_includes, strip_exec_sql_from_data_div
+from .core import build_branch_tree, classify_field_roles, _init_child_names, sql_register_virtual_fields, _find_multi_write_fds
 from .cond import parse_single_condition, is_field, collect_leaves
-from .design_mcdc import enum_paths, _filter_stop
-from .design import generate_records
+from .pipeline_bridge import build_branch_tree_fallback
+from .design_mcdc import enum_paths as mcdc_enum_paths, _filter_stop
+from .design import enum_paths, generate_records, get_term_type, extend_abend_programs
 from .output import output_json, output_input_files
-from .coverage import run_coverage, generate_coverage_index, check_coverage
+from .coverage import run_coverage, generate_coverage_index
 from japanese_data import generate_fullwidth_text, generate_halfwidth_katakana, generate_wareki_date

+try:
+    from .runner import run_and_compare, run_all, GroupInfo, GroupResult
+    _HAVE_RUNNER = True
+except ImportError:
+    _HAVE_RUNNER = False
+
+try:
+    from .gcov import run_gcov
+    _HAVE_GCOV = True
+except ImportError:
+    _HAVE_GCOV = False
+
+try:
+    from .to_sql import collect_sql_meta, build_db_input
+    _HAVE_TOSQL = True
+except ImportError:
+    _HAVE_TOSQL = False
+
 logger = logging.getLogger(__name__)
-n__all__ = [
+__all__ = [
    "extract_structure",
    "generate_data",
    "incremental_supplement",
-    "check_coverage",
    "CONFIG",
    "generate_fullwidth_text",
    "generate_halfwidth_katakana",
@@ -107,6 +127,149 @@ def expand_occurs(fields):
    return result


+# ── PREV 连锁 ──
+
+
+def _constraint_in(cons, field, op, value, want):
+    for c in cons:
+        if len(c) == 4 and c[0] == field and c[1] == op and c[2] == value and c[3] == want:
+            return True
+    return False
+
+
+def _inc_str(s, length):
+    try:
+        return str(int(s) + 1).zfill(length)
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 9Zz\xff':
+                c[i] = chr(ord(c[i]) + 1)
+                break
+            if c[i] == ' ':
+                c[i] = '0'
+                break
+            if c[i] == '9':
+                c[i] = '0'
+        return ''.join(c)
+
+
+def _dec_str(s, length):
+    try:
+        n = max(0, int(s) - 1)
+        return str(n).zfill(length)
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 0Aa\x00':
+                c[i] = chr(ord(c[i]) - 1)
+                break
+            if c[i] == ' ':
+                break
+            if c[i] == '0':
+                c[i] = '9'
+        return ''.join(c)
+
+
+def _field_length(fname, fields):
+    for f in fields:
+        if f['name'] == fname:
+            pi = f.get('pic_info', {})
+            return pi.get('digits', 0) + pi.get('decimal', 0) or pi.get('length', 0) or 1
+    return 1
+
+
+def _chain_prev(records, path_infos, fields, fd_fields, field_to_fd, open_dir):
+    """跨记录 PREV 连锁。修改 records 使批次执行的路径与实际比较一致。
+    
+    每个路径 k-1 的约束（PREV OP CURRENT）对应批次中 loop iter k-1 的实际比较：
+      PREV = records[prev_src].R01  (程序内部保持的前值)
+      CURRENT = records[k].R01      (当前读入值)
+    本函数调整 records[k] 的字段以保证交叉记录比较满足路径约束。
+    """
+    N = len(records)
+    if N < 2:
+        return
+
+    key_fields = []
+    time_start_field = None
+    time_end_field = None
+    for fname in records[0]:
+        if fname.startswith('R01') and not fname.startswith('R01INNREC'):
+            base = fname[3:]
+            prev_name = 'WRK-PREV-' + base
+            if prev_name in records[0]:
+                if 'EMP-ID' in fname or 'APPL-DATE' in fname:
+                    key_fields.append(fname)
+                if 'END-TIME' in fname:
+                    time_end_field = fname
+                if 'START-TIME' in fname:
+                    time_start_field = fname
+
+    prev_src = 0
+    for k in range(1, N):
+        if k - 1 >= len(path_infos):
+            break
+        cons = path_infos[k - 1][0]
+
+        is_same_key = all(
+            _constraint_in(cons, f'WRK-PREV-{fn[3:]}', '=', fn, True)
+            for fn in key_fields
+        ) if key_fields else False
+        is_overlap = is_same_key and time_end_field and time_start_field and \
+            _constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '>', time_start_field, True)
+        is_normal = is_same_key and time_end_field and time_start_field and \
+            (_constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '<=', time_start_field, True) or
+             _constraint_in(cons, f'WRK-PREV-{time_end_field[3:]}', '>', time_start_field, False))
+
+        for fname in records[prev_src]:
+            if fname.startswith('R01') and not fname.startswith('R01INNREC'):
+                base = fname[3:]
+                prev_name = 'WRK-PREV-' + base
+                if prev_name in records[k]:
+                    records[k][prev_name] = records[prev_src][fname]
+
+        if is_same_key:
+            for kf in key_fields:
+                if kf in records[k] and kf in records[prev_src]:
+                    records[k][kf] = records[prev_src][kf]
+
+            if is_normal and time_end_field and time_start_field:
+                prev_end = records[prev_src].get(time_end_field, '')
+                curr_start = records[k].get(time_start_field, '')
+                if prev_end >= curr_start:
+                    length = _field_length(time_start_field, fields)
+                    records[k][time_start_field] = _inc_str(prev_end, length)
+
+            if is_overlap and time_end_field and time_start_field:
+                prev_end = records[prev_src].get(time_end_field, '')
+                curr_start = records[k].get(time_start_field, '')
+                if prev_end <= curr_start:
+                    length = _field_length(time_start_field, fields)
+                    records[k][time_start_field] = _dec_str(prev_end, length) if prev_end else '0' * length
+
+        else:
+            for kf in key_fields:
+                if kf in records[k] and kf in records[prev_src]:
+                    if records[k][kf] == records[prev_src][kf]:
+                        length = _field_length(kf, fields)
+                        records[k][kf] = _inc_str(str(records[k][kf]), length)
+
+        records[k]['_w02_path'] = is_same_key and time_end_field and time_start_field and not is_overlap
+        records[k]['_overlap_path'] = is_overlap
+
+        for fn in list(records[k].keys()):
+            if fn.startswith('R01') and not fn.startswith('R01INNREC'):
+                wfn = 'W01' + fn[3:]
+                if wfn in records[k]:
+                    records[k][wfn] = records[k][fn]
+
+        if is_overlap:
+            pass
+        else:
+            prev_src = k
+
+
 # ── 入口 ──

 def main():
@@ -116,7 +279,32 @@ def main():

    args = sys.argv[1:]

-    # 分离 cobol 文件与输出目录
+    do_run = False
+    gcov_mode = False
+    temp_dir = None
+    if '--run' in args:
+        do_run = True
+        args.remove('--run')
+    if '--gcov' in args:
+        gcov_mode = True
+        args.remove('--gcov')
+    i = 0
+    while i < len(args):
+        if args[i] == '--temp-dir':
+            if i + 1 < len(args):
+                temp_dir = args[i + 1]
+                args.pop(i + 1)
+                args.pop(i)
+            else:
+                args.pop(i)
+            break
+        elif args[i].startswith('--temp-dir='):
+            temp_dir = args[i].split('=', 1)[1]
+            args.pop(i)
+            break
+        else:
+            i += 1
+
    cobol_files = []
    outdir = None
    for a in args:
@@ -133,13 +321,13 @@ def main():
    if outdir is None:
        outdir = cobol_files[0].parent

-    # 配置全局 Logger
    outdir.mkdir(parents=True, exist_ok=True)
-    log_path = outdir / f"cobol_testgen_{datetime.now():%Y%m%d_%H%M%S}.log"
+    (outdir / 'logs').mkdir(parents=True, exist_ok=True)
+    log_path = outdir / 'logs' / f"cobol_testgen_{datetime.now():%Y%m%d_%H%M%S}.log"
    fh = logging.FileHandler(log_path, encoding="utf-8", mode="w")
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(logging.Formatter(
-"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+        "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
    ))
    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
@@ -157,12 +345,20 @@ def main():
            continue

        source = filepath.read_text(encoding='utf-8')
-        source = resolve_copybooks(source, str(filepath.parent))
+        source = resolve_copybooks(
+            source,
+            str(filepath.parent),
+            extra_search_paths=[str(filepath.parent / '..' / 'cpy')],
+        )
+        source = resolve_sql_includes(source, str(filepath.parent))
        preprocessed = preprocess(source)
        file_sec = parse_file_section(preprocessed)

-        # DATA DIVISION解析
        data_div = extract_data_division(preprocessed)
+        if data_div:
+            data_div, declared_columns = strip_exec_sql_from_data_div(data_div)
+        else:
+            declared_columns = {}
        if not data_div:
            logger.error(f"错误：{filepath.name} 中没有 DATA DIVISION。")
            continue
@@ -172,7 +368,6 @@ def main():
            logger.error(f"错误：{filepath.name} 中没有找到含 PIC 的字段。")
            continue

-        # FieldDef → dict
        fields_dict = []
        parent_pic = {}
        filler_counter = 0
@@ -206,7 +401,6 @@ def main():
            if f.is_88:
                entry['is_88'] = True
                entry['parent'] = f.parent
-                # Copy parent's pic_info for value generation
                if f.parent and f.parent in parent_pic:
                    entry['pic_info'] = dict(parent_pic[f.parent])
            else:
@@ -215,7 +409,8 @@ def main():

        fields_dict = expand_occurs(fields_dict)

-        # Build FD→children 和 field→FD 映射
+        sql_register_virtual_fields(fields_dict)
+
        fd_fields = {}
        field_to_fd = {}
        if file_sec:
@@ -245,13 +440,12 @@ def main():
            pic_display = str(f.get('pic', '')) if f.get('pic') else ('88-level' if f.get('is_88') else '')
            logger.info(f"{f['level']:<6} {f['name']:<25} {pic_display:<15} {t:<12} {l:<5}")

-        # PROCEDURE DIVISION解析
        proc_div = extract_procedure_division(preprocessed)
        branch_paths = []
        assignments = {}

        if proc_div:
-            branch_tree, assignments = build_branch_tree_fallback(proc_div, fields_dict)
+            branch_tree, assignments = build_branch_tree(proc_div, fields_dict, full_source=preprocessed)

            roles = classify_field_roles(branch_tree, assignments, fields_dict,
                                         source=preprocessed, proc_text=proc_div)
@@ -261,12 +455,32 @@ def main():
                    continue
                logger.info(f"  {f['name']:<30} {roles.get(f['name'], '?')}")

+        abend_list = CONFIG.get('abend_programs', [])
+        if abend_list:
+            extend_abend_programs(abend_list)
            branch_paths_with_assigns = enum_paths(branch_tree, fields_dict)
-            branch_paths_with_assigns = [
-                (_filter_stop(c), a) for c, a in branch_paths_with_assigns
-            ]
+            path_infos = []
+            for c, a in branch_paths_with_assigns:
+                filtered_c, term = get_term_type(c)
+                path_infos.append((filtered_c, a, term))
+
+            def _is_skip(cons):
+                eq1_true = 0
+                other = 0
+                for c in cons:
+                    if len(c) == 4 and c[0] == 'WRK-R01EOF':
+                        val = str(c[2]).strip("'\"")
+                        if val == '1' and c[1] == '=' and c[3]:
+                            eq1_true += 1
+                        else:
+                            other += 1
+                return eq1_true > 0 and other == 0
+
+            before = len(path_infos)
+            path_infos = [p for p in path_infos if not _is_skip(p[0])]
+            after = len(path_infos)
+            logger.info(f"  SKIP 过滤: {before} -> {after} 条路径（预期减少 1）")

-        # OPEN 方向解析
        open_dir = scan_open_statements(proc_div) if proc_div else {}

        if proc_div:
@@ -284,26 +498,104 @@ def main():
        else:
            logger.warning("\n没有找到 PROCEDURE DIVISION。")
            branch_paths_with_assigns = [([], {})]
+            path_infos = [([], {}, 'normal')]
            roles = {f['name']: 'unused' for f in fields_dict}

-        # 覆盖率报告（传入原始源文本用于行号定位）
-        cov_prefix = str(outdir / filepath.stem)
-        index_relpath = 'coverage/index.html'
-        cov_result = run_coverage(branch_tree, branch_paths_with_assigns, fields_dict,
-                                  source, cov_prefix, index_relpath=index_relpath)
+        records, _, term_types = generate_records(path_infos, fields_dict, assignments, file_sec=file_sec)

-        records, kept_path_cons = generate_records(branch_paths_with_assigns, fields_dict, assignments, file_sec=file_sec)
+        def _is_eof_path(cons):
+            last_eq1_true = -1
+            for i, c in enumerate(cons):
+                if len(c) == 4 and c[0] == 'WRK-R01EOF':
+                    val = str(c[2]).strip("'\"")
+                    if val == '1' and c[1] == '=' and c[3]:
+                        last_eq1_true = i
+            if last_eq1_true < 0:
+                return False
+            for i in range(last_eq1_true + 1, len(cons)):
+                if len(cons[i]) == 4 and cons[i][0] == 'WRK-R01EOF':
+                    return False
+            return True
+        eof_mask = [_is_eof_path(c) for c, a, t in path_infos]
+        eof_count = sum(eof_mask)
+        if eof_count:
+            term_types = ['eof' if e else t for e, t in zip(eof_mask, term_types)]
+            logger.info(f"  EOF 路径: {eof_count} 条（将单独执行）")

-        # 输出 JSON（完整文件）
-        outpath = outdir / (filepath.stem + '.json')
+        multi_write_fds = _find_multi_write_fds(branch_tree, field_to_fd) if proc_div and branch_tree else set()
+        if multi_write_fds:
+            logger.info(f"  检测到多 WRITE FD: {', '.join(sorted(multi_write_fds))}")
+            _chain_prev(records, path_infos, fields_dict, fd_fields, field_to_fd, open_dir)
+
+        if _HAVE_TOSQL:
+            sql_meta = collect_sql_meta(assignments, declared_columns)
+            db_input = build_db_input(
+                branch_paths_with_assigns, fields_dict, assignments, sql_meta, declared_columns,
+                records=records,
+            )
+        else:
+            db_input = None
+
+        (outdir / 'json').mkdir(parents=True, exist_ok=True)
+        outpath = outdir / 'json' / (filepath.stem + '.json')
        output_json(records, outpath, roles,
                    fd_fields=fd_fields, field_to_fd=field_to_fd,
                    open_dir=open_dir,
-                    path_cons_list=kept_path_cons)
+                    term_types=term_types,
+                    db_input=db_input if db_input else None,
+                    data_fields=fields_dict)

-        # 输出入力 JSON（按 FD 拆分）
-        output_input_files(records, outdir, filepath.stem, roles,
-                           fd_fields, field_to_fd, open_dir)
+        output_input_files(records, outdir / 'input', filepath.stem, roles,
+                           fd_fields, field_to_fd, open_dir,
+                           term_types=term_types)
+
+        gcov_data = None
+        if gcov_mode and proc_div and _HAVE_GCOV:
+            select_info = parse_file_control(preprocessed)
+            _temp = temp_dir or str(outdir / '.gcov_cache')
+            source_dir = str(filepath.parent)
+            expected_records: list[dict] = [{}] * len(records)
+            if file_sec and os.path.exists(outpath):
+                with open(outpath, encoding='utf-8') as f:
+                    full_json = json.load(f)
+                json_records = full_json.get('records', [])
+                for i in range(len(records)):
+                    exp = {}
+                    if i < len(json_records):
+                        json_rec = json_records[i]
+                        for fd_name in file_sec:
+                            eo = json_rec.get('expected_output', {})
+                            if fd_name in eo:
+                                exp.update(eo[fd_name])
+                    expected_records[i] = exp
+
+            group_results = run_all(
+                filepath.stem, str(outdir), _temp,
+                fields_dict, fd_fields, select_info, open_dir,
+                term_types, records, expected_records=expected_records,
+                source_dir=source_dir, path_infos=path_infos,
+                multi_write_fds=multi_write_fds,
+            )
+            gcov_data = run_gcov(filepath.stem, _temp)
+
+            passed = sum(1 for r in group_results if r.passed)
+            total = len(group_results)
+            logger.info(f"\n  执行验证: {passed}/{total} 组通过")
+            if passed < total:
+                for r in group_results:
+                    if not r.passed and r.details:
+                        fails = [d for d in r.details if not d.match][:3]
+                        for d in fails:
+                            logger.warning(f"    [{r.name}] {d.field}: "
+                                           f"期望={d.expected!r}, 实际={d.actual!r}")
+
+        if do_run and proc_div and _HAVE_RUNNER:
+            select_info = parse_file_control(preprocessed)
+            run_and_compare(
+                filepath.stem, str(outdir), fields_dict,
+                fd_fields, select_info, open_dir,
+                term_types, records,
+            )

        logger.info(f"\n输出：{outpath}（{len(records)} 条记录）")
        logger.debug(f"\n记录明细：")
@@ -315,11 +607,17 @@ def main():
                vals.append(f"{marker}{f['name']}={rec.get(f['name'], '?')}")
            logger.debug(f"  记录 {i}: {' | '.join(vals)}")

+        (outdir / 'coverage').mkdir(parents=True, exist_ok=True)
+        cov_prefix = str(outdir / 'coverage' / filepath.stem)
+        index_relpath = 'index.html'
+        cov_result = run_coverage(branch_tree, branch_paths_with_assigns, fields_dict,
+                                  source, cov_prefix, index_relpath=index_relpath,
+                                  gcov_data=gcov_data)
+
        programs.append(cov_result)

-    # 生成覆盖率总括索引页
    if programs:
-        generate_coverage_index(programs, outdir)
+        generate_coverage_index(programs, outdir / 'coverage')
        logger.info(f"\n覆盖率总览：{outdir / 'coverage' / 'index.html'}")


@@ -429,18 +727,14 @@ def extract_structure(cobol_source: str) -> dict:
        if m:
            paragraphs.add(m.group(1))

-    # ── 新增字段: select_files ──
    select_files = parse_file_control(preprocessed)

-    # ── 新增字段: open_directions_detail (与 open_directions 一致) ──
    open_directions_detail = open_dir

-    # ── 新增字段: has_divide / has_inspect / has_string ──
    has_divide = bool(re.search(r'\bDIVIDE\b', cobol_source.upper()))
    has_inspect = bool(re.search(r'\bINSPECT\b', cobol_source.upper()))
    has_string = bool(re.search(r'\bSTRING\b', cobol_source.upper()))

-    # ── 新增字段: divide_constants ──
    divide_constants = []
    if has_divide and proc_div:
        for dm in re.finditer(r'\bDIVIDE\s+([\d.]+)\b', proc_div, re.IGNORECASE):
@@ -450,7 +744,6 @@ def extract_structure(cobol_source: str) -> dict:
            except ValueError:
                pass

-    # ── 新增字段: perform_patterns ──
    perform_patterns = []

    def _walk_performs(node):
@@ -478,7 +771,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _walk_performs(branch_tree)

-    # ── 新增字段: main_loop ──
    main_loop = None

    def _find_main_loop(node, depth=0):
@@ -533,7 +825,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _find_main_loop(branch_tree)

-    # ── 新增字段: if_types ──
    if_types = {"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0}

    def _walk_if_types(node, depth=0):
@@ -543,7 +834,6 @@ def extract_structure(cobol_source: str) -> dict:
            ct = node.cond_tree
            if ct:
                leaves = collect_leaves(ct)
-                # Check compound: cond_tree is CondAnd or CondOr (not just CondLeaf)
                if isinstance(ct, (CondAnd, CondOr)):
                    if_types["compound"] += 1
                for leaf in leaves:
@@ -566,7 +856,6 @@ def extract_structure(cobol_source: str) -> dict:
    if branch_tree:
        _walk_if_types(branch_tree)

-    # ── 新增字段: variable_patterns ──
    variable_patterns = {
        "has_prev_key": False,
        "has_accumulator": False,
@@ -597,14 +886,12 @@ def extract_structure(cobol_source: str) -> dict:
            if re.search(r'[-_]W\b|[-_]WORK\b|[-_]WK\b|^WS-W[0O]\w', name, re.IGNORECASE):
                variable_patterns["has_work"] = True

-    # ── 新增字段: open_pattern ──
    open_pattern = "sequential"
    if proc_div:
        proc_upper = proc_div.upper()
        open_positions = [m.start() for m in re.finditer(r'\bOPEN\b', proc_upper)]
        close_positions = [m.start() for m in re.finditer(r'\bCLOSE\b', proc_upper)]
        if open_positions and close_positions:
-            # Check OPEN ... CLOSE ... OPEN sequence
            for i, opos in enumerate(open_positions):
                for cpos in close_positions:
                    if cpos > opos:
@@ -618,30 +905,29 @@ def extract_structure(cobol_source: str) -> dict:
                    break

    return {
-"paragraphs": sorted(paragraphs) if paragraphs else [],
-"decision_points": decision_points,
-"branch_tree": branch_tree,
-"file_count": len(file_sec) if file_sec else 0,
-"open_directions": open_dir,
-"has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
-"has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
-"has_call": 'CALL' in cobol_source.upper(),
-"has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
-"total_branches": total_branches,
-"total_paragraphs": len(paragraphs),
-"branch_tree_obj": branch_tree,
-# ── 新增 8 类结构特征 ──
-"select_files": select_files,
-"open_directions_detail": open_directions_detail,
-"has_divide": has_divide,
-"divide_constants": divide_constants,
-"has_inspect": has_inspect,
-"has_string": has_string,
-"perform_patterns": perform_patterns,
-"main_loop": main_loop,
-"if_types": if_types,
-"variable_patterns": variable_patterns,
-"open_pattern": open_pattern,
+        "paragraphs": sorted(paragraphs) if paragraphs else [],
+        "decision_points": decision_points,
+        "branch_tree": branch_tree,
+        "file_count": len(file_sec) if file_sec else 0,
+        "open_directions": open_dir,
+        "has_search_all": any('SEARCH' in str(dp.get('label', '')) for dp in decision_points),
+        "has_evaluate": any(dp['kind'] == 'EVALUATE' for dp in decision_points),
+        "has_call": 'CALL' in cobol_source.upper(),
+        "has_break": any('KEY' in str(dp.get('label', '')).upper() for dp in decision_points),
+        "total_branches": total_branches,
+        "total_paragraphs": len(paragraphs),
+        "branch_tree_obj": branch_tree,
+        "select_files": select_files,
+        "open_directions_detail": open_directions_detail,
+        "has_divide": has_divide,
+        "divide_constants": divide_constants,
+        "has_inspect": has_inspect,
+        "has_string": has_string,
+        "perform_patterns": perform_patterns,
+        "main_loop": main_loop,
+        "if_types": if_types,
+        "variable_patterns": variable_patterns,
+        "open_pattern": open_pattern,
    }


@@ -693,11 +979,12 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:

    file_sec = parse_file_section(preprocessed)

-    branch_paths = enum_paths(branch_tree, fields_dict)
-    branch_paths = [(_filter_stop(c), a) for c, a in branch_paths]
+    branch_paths_unfiltered = mcdc_enum_paths(branch_tree, fields_dict)
+    path_infos = []
+    for c, a in branch_paths_unfiltered:
+        filtered_c, term = get_term_type(c)
+        path_infos.append((filtered_c, a, term))

-    # Filter: remove constraints whose field doesn't exist in fields_dict.
-    # Resolve OF-qualified names and subscripts for matching.
    _fdict_names = {f['name'] for f in fields_dict}
    def _resolve_field(fn: str) -> str:
        ufn = fn.upper()
@@ -708,7 +995,7 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:
            return m.group(1)
        return fn
    filtered_paths = []
-    for cons_list, asgn in branch_paths:
+    for cons_list, asgn, term in path_infos:
        clean = []
        for c in cons_list:
            if len(c) >= 4:
@@ -718,12 +1005,11 @@ def generate_data(cobol_source: str, structure: dict = None) -> list[dict]:
                    clean.append(tuple(c))
            else:
                clean.append(c)
-        filtered_paths.append((clean, asgn))
-    branch_paths = filtered_paths
+        filtered_paths.append((clean, asgn, term))
+    path_infos = filtered_paths

-    records, kept_paths = generate_records(branch_paths, fields_dict, assignments, file_sec=file_sec)
+    records, kept_paths, term_types = generate_records(path_infos, fields_dict, assignments, file_sec=file_sec)

-    # Cross-file KEY alignment for matching programs
    if records:
        import re as _re
        proc_upper = (proc_div or "").upper()
@@ -44,12 +44,34 @@ def parse_single_condition(text, fields=None):
    - Bare:   WS-EOF                 → (WS-EOF, '=', 'Y')
    - NOT bare: NOT WS-EOF           → (WS-EOF, '<>', 'Y')
    - NOT arith: A+B NOT = C         → ('A+B', '<>', 'C')
+    - SQLCODE: SQLCODE = 100         → ('SQLCODE', '=', '100')
+    - SQLSTATE: SQLSTATE <> '02000'  → ('SQLSTATE', '<>', '02000')

    Returns None for compound (AND/OR) conditions.
    """
    if ' AND ' in text or ' OR ' in text:
        return None
    text = text.strip()
+    field_name = text.split()[0] if text else ''
+
+    # SQLCODE special handling
+    if field_name.upper() == 'SQLCODE':
+        text_upper = text.upper()
+        if 'GREATER THAN 0' in text_upper or 'GREATER THAN ZERO' in text_upper:
+            return ('SQLCODE', '>', '0')
+        if 'LESS THAN 0' in text_upper:
+            return ('SQLCODE', '<', '0')
+        if '= 100' in text_upper:
+            return ('SQLCODE', '=', '100')
+        if 'NOT = 100' in text_upper:
+            return ('SQLCODE', '<>', '100')
+
+    # SQLSTATE special handling
+    if field_name.upper() == 'SQLSTATE':
+        normalized_sql = re.sub(r'\bNOT\s*=', '<>', text, flags=re.IGNORECASE)
+        m = re.match(r"SQLSTATE\s*(>=|<=|<>|>|<|=)\s*['\"]?(.+?)['\"]?\s*$", normalized_sql, re.IGNORECASE)
+        if m:
+            return ('SQLSTATE', m.group(1), m.group(2).strip().strip("'\""))

    # Resolve 88-level condition names
    if fields:
@@ -62,9 +84,9 @@ def parse_single_condition(text, fields=None):

    # Bare NOT field reference (no operator): NOT WS-EOF → WS-EOF <> 'Y'
    if text.upper().startswith('NOT ') and not re.search(r'(>=|<=|<>|>|<|=)', text):
-        field_name = text[4:].strip()
-        if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', field_name, re.IGNORECASE):
-            return (field_name, '<>', 'Y')
+        fn = text[4:].strip()
+        if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', fn, re.IGNORECASE):
+            return (fn, '<>', 'Y')

    # Normalize COBOL NOT-operators: X NOT = Y → X <> Y
    normalized = text
@@ -292,11 +314,31 @@ def satisfying_value(field_info: dict, operator: str, value, want_true: bool) ->
            elif operator in ('<>', '!='):
                other = chr(65 + (ord(base_chr) - 64) % 26)
                return other.ljust(length, other)
+            elif operator == '>':
+                sv = str(value)[:length].ljust(length)
+                chars = list(sv)
+                last = chars[-1]
+                if last not in '9Zz':
+                    chars[-1] = chr(ord(last) + 1)
+                return ''.join(chars)
+            elif operator == '<':
+                sv = str(value)[:length].ljust(length)
+                chars = list(sv)
+                last = chars[-1]
+                if last == ' ':
+                    pass
+                elif last in '0Aa':
+                    chars[-1] = ' '
+                else:
+                    chars[-1] = chr(ord(last) - 1)
+                return ''.join(chars)
        else:
            if operator in ('=', '=='):
                other = chr(65 + (ord(base_chr) - 64) % 26)
                return other.ljust(length, other)
            elif operator in ('<>', '!='):
                return base_chr.ljust(length, base_chr)
+            elif operator in ('>', '<'):
+                return str(value)[:length].ljust(length)

    return '0'.zfill(total)
@@ -15,16 +15,29 @@ _COBOL_SCOPE_ENDERS = {
    'END-SEARCH',
    'ELSE', 'WHEN', 'OTHER',
 }
+_COBOL_KEYWORDS = {
+    'GOBACK', 'EXIT', 'STOP', 'CONTINUE',
+    'ACCEPT', 'DISPLAY', 'MOVE', 'COMPUTE', 'INITIALIZE',
+    'ADD', 'SUBTRACT', 'MULTIPLY', 'DIVIDE',
+    'STRING', 'UNSTRING', 'SET', 'INSPECT',
+    'OPEN', 'CLOSE', 'READ', 'WRITE', 'REWRITE', 'DELETE', 'START',
+    'PERFORM', 'CALL', 'IF', 'EVALUATE', 'SEARCH', 'SORT', 'MERGE',
+    'COMMIT', 'ROLLBACK', 'GO',
+}


-def scan_paragraphs(raw_lines):
+def scan_paragraphs(raw_lines, blocked_names=None):
    paragraphs = {}
    i = 0
+    blocked = set()
+    if blocked_names:
+        for n in blocked_names:
+            blocked.add(n.upper())
    while i < len(raw_lines):
        line = raw_lines[i].strip()
        m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
-        sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
-        if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
+        sec_m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
+        if m and m.group(1) not in _COBOL_SCOPE_ENDERS and m.group(1) not in _COBOL_KEYWORDS and m.group(1) not in blocked:
            name = m.group(1)
        elif sec_m:
            name = sec_m.group(1).upper()
@@ -36,9 +49,9 @@ def scan_paragraphs(raw_lines):
        while j < len(raw_lines):
            nline = raw_lines[j].strip()
            nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
-            if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
+            if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS and nm.group(1) not in _COBOL_KEYWORDS and nm.group(1) not in blocked:
                break
-            if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
+            if re.match(r'^[A-Z0-9][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
                break
            j += 1
        paragraphs[name] = (start, j - 1)
@@ -46,9 +59,47 @@ def scan_paragraphs(raw_lines):
    return paragraphs


-def build_branch_tree(proc_text, fields=None):
+def sql_register_virtual_fields(fields_dict: list[dict]) -> list[dict]:
+    """Inject SQLCODE, SQLSTATE as virtual fields if not already present."""
+    virtual = []
+    if not any(f['name'] == 'SQLCODE' for f in fields_dict):
+        virtual.append({
+            'name': 'SQLCODE',
+            'level': 77, 'pic': 'S9(9)',
+            'pic_info': {'type': 'numeric', 'digits': 9, 'decimal': 0,
+                         'length': 4, 'signed': True},
+            'section': 'WORKING-STORAGE', 'is_filler': False, 'redefines': None,
+            'usage': 'COMP', 'occurs': 0, 'occurs_depending': None,
+            'value': None, 'values': None,
+        })
+    if not any(f['name'] == 'SQLSTATE' for f in fields_dict):
+        virtual.append({
+            'name': 'SQLSTATE',
+            'level': 77, 'pic': 'X(5)',
+            'pic_info': {'type': 'alphanumeric', 'length': 5},
+            'section': 'WORKING-STORAGE', 'is_filler': False, 'redefines': None,
+            'usage': 'DISPLAY', 'occurs': 0, 'occurs_depending': None,
+            'value': None, 'values': None,
+        })
+    fields_dict.extend(virtual)
+    return fields_dict
+
+
+def build_branch_tree(proc_text, fields=None, full_source=None):
    raw_lines = proc_text.split('\n')
-    paragraphs = scan_paragraphs(raw_lines)
+    # Collect data names (FD names, record names, field names) to block paragraph detection
+    blocked_names = set()
+    if fields:
+        for f in fields:
+            if isinstance(f, dict):
+                blocked_names.add(f['name'].upper())
+            else:
+                blocked_names.add(f.name.upper())
+    # Extract FD names from full source if available (includes DATA DIVISION)
+    src = full_source or proc_text
+    for m in re.finditer(r'\bFD\s+(\w[\w-]*)\b', src, re.IGNORECASE):
+        blocked_names.add(m.group(1).upper())
+    paragraphs = scan_paragraphs(raw_lines, blocked_names=blocked_names)

    first_para_name = None
    first_para_idx = None
@@ -169,6 +220,13 @@ class _BrParser:
            if m_search:
                seq.add(self._parse_search(m_search))
                continue
+            m_exec = re.match(r'^EXEC\s+SQL\s*$', line, re.IGNORECASE)
+            if m_exec:
+                sql_block = self._parse_sql_block()
+                assign_node = self._parse_sql(sql_block)
+                if assign_node:
+                    seq.add(assign_node)
+                continue
            m = re.match(r'^INITIALIZE\s+', line)
            if m:
                init_seq = self._parse_initialize()
@@ -192,7 +250,7 @@ class _BrParser:
                seq.add(self._parse_call())
                continue
            m = re.match(
-                r'^ACCEPT\s+(\w[\w-]*)(?:\s+FROM\s+(DATE|TIME|DAY|DAY-OF-WEEK|YEAR|YYYYMMDD|HHMMSS))?\s*$',
+                r'^ACCEPT\s+(\w[\w-]*)(?:\s+FROM\s+(DATE|TIME|DAY|DAY-OF-WEEK|YEAR|YYYYMMDD|HHMMSS|SYSIN|COMMAND-LINE|SYSERR|SYSOUT|ENVIRONMENT-NAME|ENVIRONMENT-VALUE))?\s*$',
                line, re.IGNORECASE
            )
            if m:
@@ -211,21 +269,11 @@ class _BrParser:
                seq.add(Assign(tgt, info))
                self.advance()
                # 跳过 READ 语句剩余行（AT END / NOT AT END / END-READ）
-                # 遇到新的语句关键词时停止，避免贪婪吞咽后续内容
-                _stmt_boundary = re.compile(
-                    r'^(IF |EVALUATE |PERFORM |SEARCH |INITIALIZE |STRING |'
-                    r'UNSTRING |CALL |ACCEPT |READ |WRITE |REWRITE |SET |'
-                    r'INSPECT |MOVE |COMPUTE |ADD |SUBTRACT |MULTIPLY |DIVIDE |'
-                    r'GO\s+TO |GOBACK |STOP\s+RUN|EXIT\s|CLOSE |OPEN |DISPLAY |'
-                    r'DELETE |START |'
-                    r'END-IF|END-PERFORM|END-EVALUATE|END-READ)', re.IGNORECASE)
                while self.pos < len(self.lines):
                    cl = self.clean()
                    if cl in ('END-READ', 'END-READ.'):
                        self.advance()
                        break
-                    if _stmt_boundary.match(cl):
-                        break
                    self.advance()
                continue
            m_set_false = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+FALSE\s*$', line, re.IGNORECASE)
@@ -366,11 +414,34 @@ class _BrParser:
            else:
                tgt_key = tgt_base
            src_clean = raw_src.strip("'").strip('"')
-            is_field_name = self.fields and any(f['name'] == src_clean for f in self.fields)
-            if is_field_name:
-                info = {'type': 'move', 'source_vars': [src_clean]}
+            # 检测引用修饰 FIELD(start:length)
+            rm = re.match(r'^(\w[\w-]*)\(\s*(\d+)\s*:\s*(\d+)\s*\)$', src_clean, re.IGNORECASE)
+            if rm:
+                base_src = rm.group(1)
+                refmod_start = int(rm.group(2))
+                refmod_length = int(rm.group(3))
+                is_field_name = self.fields and any(
+                    (f['name'] if isinstance(f, dict) else f.name) == base_src
+                    for f in self.fields
+                )
+                if is_field_name:
+                    info = {
+                        'type': 'move',
+                        'source_vars': [base_src],
+                        'refmod_start': refmod_start,
+                        'refmod_length': refmod_length,
+                    }
+                else:
+                    info = {'type': 'move_literal', 'literal': src_clean}
            else:
-                info = {'type': 'move_literal', 'literal': src_clean}
+                is_field_name = self.fields and any(
+                    (f['name'] if isinstance(f, dict) else f.name) == src_clean
+                    for f in self.fields
+                )
+                if is_field_name:
+                    info = {'type': 'move', 'source_vars': [src_clean]}
+                else:
+                    info = {'type': 'move_literal', 'literal': src_clean}
            self.assignments.setdefault(tgt_key, []).append(info)
            return Assign(tgt_key, info)

@@ -648,40 +719,11 @@ class _BrParser:
        line = self.clean()
        m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
        cond_text = m.group(1).strip()
-        # Truncate at COBOL statement keywords (single-line IF body after condition)
-        _stmt_pat = (r'\s(?:MOVE|DISPLAY|COMPUTE|ADD|SUBTRACT|MULTIPLY|DIVIDE|STRING|UNSTRING|'
-                     r'INITIALIZE|ACCEPT|CALL|PERFORM|EVALUATE|READ|WRITE|REWRITE|DELETE|START|'
-                     r'INSPECT|SET|IF|ELSE|END-IF|GO\b|EXIT\b|STOP\s+RUN|GOBACK|CLOSE|OPEN|SEARCH)\b')
-        _stmt_starts = re.compile(_stmt_pat, re.IGNORECASE)
-        rest = ""  # remaining text after condition truncation (single-line IF body)
-        sm = _stmt_starts.search(cond_text)
-        if sm:
-            rest = cond_text[sm.start():]
-            cond_text = cond_text[:sm.start()]
        self.advance()
-        if rest:
-            rest = rest.strip()
-            if rest.endswith('.'):
-                rest = rest[:-1]
-            # Split on ELSE but keep ELSE as its own line for parse_seq boundary
-            else_parts = re.split(r'(\s+ELSE\s+)', rest, maxsplit=1, flags=re.IGNORECASE)
-            parts = [p.strip() for p in else_parts if p.strip()]
-            insert_parts = []
-            for p in parts:
-                if p.upper() == 'ELSE':
-                    insert_parts.append('ELSE')
-                else:
-                    insert_parts.append(p if '.' in p else p + '.')
-            for part in reversed(insert_parts):
-                self.lines.insert(self.pos, part)
        # Join continuation lines (multi-line IF conditions)
-        _cont_keywords = (r'THEN|ELSE|END-IF|MOVE|DISPLAY|COMPUTE|ADD|SUBTRACT|MULTIPLY|'
-                          r'DIVIDE|STRING|UNSTRING|INITIALIZE|ACCEPT|CALL|PERFORM|EVALUATE|'
-                          r'READ|WRITE|REWRITE|DELETE|START|INSPECT|SET|IF|GO\b|EXIT\b|'
-                          r'STOP\s+RUN|GOBACK|CLOSE|OPEN|SEARCH')
        while self.pos < len(self.lines):
            peek = self.clean()
-            if re.match(r'^(' + _cont_keywords + r')', peek, re.IGNORECASE):
+            if re.match(r'^(THEN|ELSE|END-IF|EXEC|MOVE|IF|PERFORM|EVALUATE|COMPUTE|CALL|STRING|UNSTRING|INITIALIZE|ADD|SUBTRACT|MULTIPLY|DIVIDE|GO\b|EXIT\b|DISPLAY|ACCEPT|STOP|READ|WRITE|REWRITE|DELETE|SET|SEARCH|OPEN|CLOSE|INSPECT|CONTINUE|GOBACK|COMMIT|ROLLBACK|MERGE|SORT)', peek, re.IGNORECASE):
                break
            if peek.endswith('.'):
                cond_text += ' ' + peek.rstrip('.')
@@ -697,16 +739,8 @@ class _BrParser:
        node = BrIf(cond_text)
        node.cond_tree = parse_compound_condition(node.condition, self.fields)
        node.true_seq = self.parse_seq(['ELSE', 'END-IF'])
-        clean = self.clean()
-        if clean.startswith('ELSE'):
-            self.advance()  # consume ELSE keyword
-            rest = clean[4:].strip() if len(clean) > 4 else ''
-            # ELSE IF → reinsert IF statement as next line for recursive parse
-            if rest.upper().startswith('IF '):
-                self.lines.insert(self.pos, rest)
-            elif rest:
-                # Regular ELSE body text on same line as ELSE: reinsert
-                self.lines.insert(self.pos, rest if '.' in rest else rest + '.')
+        if self.clean() == 'ELSE':
+            self.advance()
            node.false_seq = self.parse_seq(['END-IF'])
        if self.clean() == 'END-IF':
            self.advance()
@@ -728,13 +762,6 @@ class _BrParser:
            m = re.match(r'^WHEN\s+(.+?)\s*$', line)
            if m:
                raw_val = m.group(1).strip().strip("'").strip('"')
-                # Truncate at COBOL statement keywords (single-line WHEN body after condition)
-                _eval_pat = (r'\s(?:MOVE|DISPLAY|COMPUTE|ADD|SUBTRACT|MULTIPLY|DIVIDE|STRING|UNSTRING|'
-                             r'INITIALIZE|ACCEPT|CALL|PERFORM|EVALUATE|READ|WRITE|REWRITE|DELETE|START|'
-                             r'INSPECT|SET|IF|ELSE|END-IF|GO\b|EXIT\b|STOP\b|GOBACK|CLOSE|OPEN|SEARCH)\b')
-                _eval_stmt = re.search(_eval_pat, raw_val, re.IGNORECASE)
-                if _eval_stmt:
-                    raw_val = raw_val[:_eval_stmt.start()]
                self.advance()
                # Capture multi-line WHEN conditions (AND/OR continuation)
                while self.pos < len(self.lines):
@@ -848,6 +875,14 @@ class _BrParser:
                if um:
                    condition = um.group(1).strip()
                    self.advance()
+                    # Join continuation lines (AND/OR on next lines)
+                    while self.pos < len(self.lines):
+                        peek = self.clean()
+                        if re.match(r'^(AND|OR)\s', peek, re.IGNORECASE):
+                            condition += ' ' + peek
+                            self.advance()
+                        else:
+                            break
                    break
                break
            if from_val and by_val and condition:
@@ -894,6 +929,30 @@ class _BrParser:
        m = re.match(r'^PERFORM\s+(\w[\w-]*)\s*$', line)
        if m:
            target = m.group(1).strip()
+            save_pos = self.pos
+            condition = None
+            self.advance()
+            while self.pos < len(self.lines):
+                nxt = self.clean()
+                um = re.match(r'^UNTIL\s+(.+)$', nxt)
+                if um:
+                    condition = um.group(1).strip()
+                    self.advance()
+                    # Join continuation lines (AND/OR on next lines)
+                    while self.pos < len(self.lines):
+                        peek = self.clean()
+                        if re.match(r'^(AND|OR)\s', peek, re.IGNORECASE):
+                            condition += ' ' + peek
+                            self.advance()
+                        else:
+                            break
+                    break
+                break
+            if condition:
+                node = BrPerform('para_until', target=target, condition=condition)
+                self._inline_perform(node, target)
+                return node
+            self.pos = save_pos
            node = BrPerform('para', target=target)
            self.advance()
            self._inline_perform(node, target)
@@ -962,12 +1021,18 @@ class _BrParser:
        parts = [self.clean()]
        self.advance()
        while self.pos < len(self.lines):
+            peek = self.peek()
            cl = self.clean()
            if cl == 'END-STRING':
                self.advance()
                break
+            # Stop when a new COBOL statement keyword is encountered
+            if re.match(r'^(MOVE|IF|PERFORM|EVALUATE|COMPUTE|CALL|STRING|UNSTRING|INITIALIZE|ADD|SUBTRACT|MULTIPLY|DIVIDE|GO\b|EXIT\b|DISPLAY|ACCEPT|STOP|READ|WRITE|REWRITE|DELETE|SET|SEARCH|OPEN|CLOSE|INSPECT|CONTINUE|GOBACK|COMMIT|ROLLBACK|MERGE|SORT|ELSE|END-IF|END-EVALUATE|END-PERFORM|END-READ|END-WRITE|END-STRING)', peek, re.IGNORECASE):
+                break
            parts.append(cl)
            self.advance()
+            if peek.rstrip().endswith('.'):
+                break
        full = ' '.join(parts)
        m = re.match(r'^STRING\s+(.+)\s+INTO\s+(\w[\w-]*)\s*$', full, re.IGNORECASE | re.DOTALL)
        if not m:
@@ -985,12 +1050,17 @@ class _BrParser:
        parts = [self.clean()]
        self.advance()
        while self.pos < len(self.lines):
+            peek = self.peek()
            cl = self.clean()
            if cl == 'END-UNSTRING':
                self.advance()
                break
+            if re.match(r'^(MOVE|IF|PERFORM|EVALUATE|COMPUTE|CALL|STRING|UNSTRING|INITIALIZE|ADD|SUBTRACT|MULTIPLY|DIVIDE|GO\b|EXIT\b|DISPLAY|ACCEPT|STOP|READ|WRITE|REWRITE|DELETE|SET|SEARCH|OPEN|CLOSE|INSPECT|CONTINUE|GOBACK|COMMIT|ROLLBACK|MERGE|SORT|ELSE|END-IF|END-EVALUATE|END-PERFORM|END-READ|END-WRITE|END-UNSTRING)', peek, re.IGNORECASE):
+                break
            parts.append(cl)
            self.advance()
+            if peek.rstrip().endswith('.'):
+                break
        full = ' '.join(parts)
        m = re.match(r'^UNSTRING\s+(.+?)\s+INTO\s+(.+?)\s*$', full, re.IGNORECASE | re.DOTALL)
        if not m:
@@ -1088,6 +1158,75 @@ class _BrParser:
        self.advance()
        return Assign(tgt, info)

+    # ── EXEC SQL parsing ──
+
+    _RE_SELECT_INTO = re.compile(
+        r'SELECT\s+(.*?)\s+INTO\s+(:\w[\w-]*(?:\s*,\s*:\w[\w-]*(?::\w[\w-]*)?)*)'
+        r'\s+FROM\s+(\w[\w-]*)',
+        re.IGNORECASE
+    )
+
+    _RE_WHERE = re.compile(r'\bWHERE\b\s+(.*)', re.IGNORECASE)
+
+    def _parse_sql_block(self) -> str:
+        """Consume lines from EXEC SQL until END-EXEC. Returns SQL text."""
+        texts = []
+        self.advance()
+        while self.pos < len(self.lines):
+            line = self.lines[self.pos].rstrip('.')
+            m = re.match(r'(.*?)END-EXEC\.?\s*$', line, re.IGNORECASE)
+            if m:
+                before = m.group(1).strip()
+                if before:
+                    texts.append(before)
+                self.advance()
+                break
+            texts.append(line)
+            self.advance()
+        result = ' '.join(texts)
+        result = re.sub(r'\s+', ' ', result)
+        return result
+
+    def _parse_sql(self, sql_text: str):
+        """Parse SQL text from EXEC SQL block. Returns Assign node or None."""
+        m = self._RE_SELECT_INTO.search(sql_text)
+        if not m:
+            return None
+
+        select_list = m.group(1).strip()
+        into_raw = m.group(2).strip()
+        from_table = m.group(3).strip().upper()
+        remaining = sql_text[m.end():].strip()
+
+        # Parse INTO variables (handle indicator vars: :host:indicator)
+        into_vars = []
+        for v in re.split(r'\s*,\s*', into_raw):
+            v = v.strip().lstrip(':')
+            parts = v.split(':')
+            into_vars.append(parts[0].upper())
+            if len(parts) > 1:
+                into_vars.append(parts[1].upper())
+
+        # Extract WHERE clause
+        where_clause = ''
+        wm = self._RE_WHERE.search(remaining)
+        if wm:
+            where_clause = wm.group(1).strip()
+
+        info = {
+            'type': 'exec_sql_select',
+            'table': from_table,
+            'select_list': select_list,
+            'into_vars': into_vars,
+            'where': where_clause,
+            'sql_text': sql_text,
+        }
+
+        for var in into_vars:
+            self.assignments.setdefault(var, []).append(info)
+
+        return Assign(into_vars[0], info)
+

 # ── 工具函数 ──

@@ -1141,8 +1280,6 @@ def trace_to_root(field_name, assignments, fields, path_assign=None):
                asgn = asgn_list
        else:
            asgn_list = assignments[var]
-            if not asgn_list:
-                break
            asgn = asgn_list[-1]
            if isinstance(asgn_list, list):
                for a in reversed(asgn_list):
@@ -1152,6 +1289,8 @@ def trace_to_root(field_name, assignments, fields, path_assign=None):
                    asgn = a
                    break
        chain.append((var, asgn))
+        if asgn.get('type') in ('unstring_split',):
+            break
        if not asgn.get('source_vars'):
            break
        sv = asgn['source_vars']
@@ -1332,8 +1471,36 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
                src = asgn['source_vars'][0]
                resolved_tgt = _resolve_subscript(tgt, rec)
                resolved_src = _resolve_subscript(src, rec)
-                if resolved_src in rec:
-                    rec[resolved_tgt] = rec[resolved_src]
+                tgt_children = _init_child_names(resolved_tgt, fields)
+                if tgt_children:
+                    # Group MOVE: propagate to child fields by position
+                    src_children = _init_child_names(resolved_src, fields)
+                    if src_children:
+                        src_str = ''.join(str(rec.get(c, '')) for c in src_children)
+                    elif resolved_src in rec:
+                        src_str = str(rec[resolved_src])
+                    else:
+                        src_str = ''
+                    if src_str:
+                        rec[resolved_tgt] = src_str
+                        pos = 0
+                        for tgt_c in tgt_children:
+                            child_len = 0
+                            for f in fields:
+                                if f['name'] == tgt_c:
+                                    pi = f.get('pic_info', {})
+                                    child_len = pi.get('digits', 0) + pi.get('decimal', 0) or pi.get('length', 0)
+                                    break
+                            if child_len > 0:
+                                rec[tgt_c] = src_str[pos:pos + child_len] if pos < len(src_str) else ('0' if child_len else '')
+                                pos += child_len
+                elif resolved_src in rec:
+                    src_val = str(rec[resolved_src])
+                    if asgn.get('refmod_start') and asgn.get('refmod_length'):
+                        start = asgn['refmod_start'] - 1
+                        end = start + asgn['refmod_length']
+                        src_val = src_val[start:end]
+                    rec[resolved_tgt] = src_val

        # Pass 2: literal MOVE
        for tgt, asgn in flat_list:
@@ -1439,9 +1606,7 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
            resolved_tgt = _resolve_subscript(tgt, rec)
            if resolved_tgt not in rec:
                continue
-            inspect_src = asgn.get('tgt', tgt)
-            resolved_src = _resolve_subscript(inspect_src, rec)
-            src_val = str(rec.get(resolved_src, ''))
+            src_val = str(rec[resolved_tgt])
            for op_type, params in asgn.get('sub_ops', []):
                if op_type == 'tally':
                    cv = params['count_var'].upper()
@@ -1495,6 +1660,10 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
                src_var = asgn.get('source_vars', [None])[0]
                resolved_src = _resolve_subscript(src_var, rec) if src_var else None
                idx = asgn.get('index', 0)
+                if resolved_src and resolved_src not in rec:
+                    children = _init_child_names(resolved_src, fields)
+                    if children:
+                        resolved_src = children[0]
                if resolved_src and resolved_src in rec:
                    src_val = str(rec[resolved_src])
                    ftype = pi.get('type', 'unknown')
@@ -1556,6 +1725,23 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
                    else:
                        rec[resolved_tgt] = val.ljust(length)[:length] if length else val

+        # Pass 9: EXEC SQL SELECT INTO
+        for tgt, asgn in flat_list:
+            if asgn.get('type') == 'exec_sql_select':
+                resolved_tgt = _resolve_subscript(tgt, rec)
+                if resolved_tgt not in rec:
+                    continue
+                src_val = rec.get(resolved_tgt, '')
+                pi = pi_map.get(resolved_tgt, {})
+                if pi.get('type') == 'numeric':
+                    total = pi.get('digits', 0) + pi.get('decimal', 0)
+                    if total > 0:
+                        rec[resolved_tgt] = str(src_val).zfill(total)
+                elif pi.get('type') in ('alphanumeric', 'alphabetic'):
+                    length = pi.get('length', 0)
+                    if length > 0:
+                        rec[resolved_tgt] = str(src_val).ljust(length)[:length]
+
        # Pass 8: SET var TO TRUE (88-level)
        for tgt, asgn in flat_list:
            if asgn['type'] == 'set_true':
@@ -1649,6 +1835,13 @@ def classify_field_roles(tree, assignments, fields, source=None, proc_text=None)
            elif atype == 'write_from':
                if tgt_base in counts:
                    counts[tgt_base]['read'] += 1
+            elif atype == 'exec_sql_select':
+                if tgt_base in counts:
+                    counts[tgt_base]['write'] += 1
+                for v in node.source_info.get('into_vars', []):
+                    v_base = _basename(v)
+                    if v_base in counts:
+                        counts[v_base]['write'] += 1
            elif atype == 'set_true':
                if tgt_base in counts:
                    counts[tgt_base]['write'] += 1
@@ -1705,3 +1898,52 @@ def classify_field_roles(tree, assignments, fields, source=None, proc_text=None)
        if name not in result:
            result[name] = role
    return result
+
+
+# ── 多 WRITE 检测 ──
+
+
+def _collect_write_fds(node, fds_set, field_to_fd):
+    """Recursively collect output FD names from WRITE Assigns."""
+    if isinstance(node, Assign):
+        st = node.source_info.get('type', '')
+        if st in ('write_bare', 'write_from'):
+            fname = node.target
+            if fname in field_to_fd:
+                fds_set.add(field_to_fd[fname])
+    elif isinstance(node, BrSeq):
+        for c in node.children:
+            _collect_write_fds(c, fds_set, field_to_fd)
+    elif isinstance(node, BrIf):
+        _collect_write_fds(node.true_seq, fds_set, field_to_fd)
+        _collect_write_fds(node.false_seq, fds_set, field_to_fd)
+    elif isinstance(node, BrEval):
+        for _, seq in node.when_list:
+            _collect_write_fds(seq, fds_set, field_to_fd)
+        _collect_write_fds(node.other_seq, fds_set, field_to_fd)
+    elif isinstance(node, BrPerform):
+        _collect_write_fds(node.body_seq, fds_set, field_to_fd)
+    elif isinstance(node, BrSearch):
+        _collect_write_fds(node.at_end_seq, fds_set, field_to_fd)
+        for _, seq in node.when_list:
+            _collect_write_fds(seq, fds_set, field_to_fd)
+
+
+def _find_multi_write_fds(tree, field_to_fd):
+    """返回在 INIT 段（主循环前）和循环内部都有 WRITE 的 FD 名集合。
+    主循环 = 顶层 BrSeq 中最后一个 UNTIL 型 BrPerform（包含 para_until）。
+    """
+    if not isinstance(tree, BrSeq):
+        return set()
+    main_loop_idx = -1
+    for i, child in enumerate(tree.children):
+        if isinstance(child, BrPerform) and child.perf_type in ('until', 'para_until', 'varying', 'para_varying'):
+            main_loop_idx = i
+    if main_loop_idx < 0:
+        return set()
+    pre_write = set()
+    for child in tree.children[:main_loop_idx]:
+        _collect_write_fds(child, pre_write, field_to_fd)
+    loop_write = set()
+    _collect_write_fds(tree.children[main_loop_idx], loop_write, field_to_fd)
+    return pre_write & loop_write
@@ -8,6 +8,7 @@ from pathlib import Path
 logger = logging.getLogger(__name__)
 from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, CondLeaf
 from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, evaluate_tree
+from .gcov import mark_from_gcov


 # ── 数据模型 ──
@@ -190,11 +191,14 @@ def _mark_if(dp, cons):
                if _match_leaf(c, leaf):
                    assignment[leaf] = c[3]
                    break
-        if len(assignment) == len(dp.cond_leaves):
-            if evaluate_tree(dp.cond_tree, assignment):
-                dp.active_branches.add('T')
-            else:
-                dp.active_branches.add('F')
+        if assignment:
+            try:
+                if evaluate_tree(dp.cond_tree, assignment):
+                    dp.active_branches.add('T')
+                else:
+                    dp.active_branches.add('F')
+            except KeyError:
+                pass
    else:
        matched = 0
        for leaf in dp.leaves:
@@ -253,6 +257,15 @@ def _mark_eval(dp, cons, fields=None):
                dp.active_branches.add(name)
        elif c[0] == dp.label and c[1] == 'not_in':
            dp.active_branches.add('OTHER')
+    thru_lows = {c[2] for c in cons if c[0] == dp.label and c[1] == '>=' and c[3]}
+    thru_highs = {c[2] for c in cons if c[0] == dp.label and c[1] == '<=' and c[3]}
+    if thru_lows or thru_highs:
+        for when_val, _ in dp.when_list:
+            thru_m = re.match(r'^(\d+)\s+THRU\s+(\d+)$', str(when_val), re.IGNORECASE)
+            if thru_m and thru_m.group(1) in thru_lows and thru_m.group(2) in thru_highs:
+                name = f"WHEN {when_val}"
+                if name in dp.branch_names:
+                    dp.active_branches.add(name)


 def _mark_search(dp, cons, fields=None):
@@ -309,11 +322,14 @@ def _mark_perform(dp, cons):
                if _match_leaf(c, leaf):
                    assignment[leaf] = c[3]
                    break
-        if len(assignment) == len(dp.cond_leaves):
-            if evaluate_tree(dp.cond_tree, assignment):
-                dp.active_branches.add('Skip')
-            else:
-                dp.active_branches.add('Enter')
+        if assignment:
+            try:
+                if evaluate_tree(dp.cond_tree, assignment):
+                    dp.active_branches.add('Skip')
+                else:
+                    dp.active_branches.add('Enter')
+            except KeyError:
+                pass
    else:
        for c in cons:
            if c[0] == dp.label or any(c[0] == f for f in _get_fields_in_cond(dp.label)):
@@ -330,7 +346,6 @@ def _get_fields_in_cond(cond_text):
 # ── 行号定位（基于原始源文本）──

 def locate_decision_lines(decision_points, raw_source):
-    """在原始源文本中搜索每个决策点的近似行号"""
    lines = raw_source.upper().splitlines()
    for dp in decision_points:
        patterns = _build_search_patterns(dp)
@@ -344,7 +359,6 @@ def locate_decision_lines(decision_points, raw_source):


 def _normalize(text):
-    """标准化条件文本用于比较：去多余空白、标准化引号"""
    t = re.sub(r'\s+', ' ', text).strip()
    t = t.replace('"', "'")
    return t
@@ -360,14 +374,13 @@ def _build_search_patterns(dp):
        texts.append((r'\bUNTIL\b', dp.condition if hasattr(dp, 'condition') else dp.label
                       if dp.label else ''))
    else:
-        return [r'$^']  # 永不匹配
+        return [r'$^']

    patterns = []
    for keyword, condition in texts:
        if not condition:
            continue
        norm_cond = _normalize(condition)
-        # 转义正则特殊字符，但保留空格（替换为\s+）
        esc = re.escape(norm_cond)
        esc = esc.replace(r'\ ', r'\s+')
        esc = esc.replace(r'\'', r"['\"]")
@@ -411,7 +424,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  }}
  .section h2 {{ font-size: 16px; font-weight: 600; color: #1a237e; margin-bottom: 16px; padding-bottom: 8px; border-bottom: 2px solid #e8eaf6; }}

-  /* 统计卡片行 */
  .stats-row {{ display: flex; gap: 16px; flex-wrap: wrap; }}
  .stat-card {{
    flex: 1; min-width: 140px; background: #f5f7fa; border-radius: 8px; padding: 14px 18px;
@@ -430,7 +442,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .dot-red {{ background: #ffcdd2; }}
  .dot-amber {{ background: #fff9c4; }}

-  /* 进度条 */
  .prog-bar-detail {{
    width: 100%; height: 12px; border-radius: 6px; background: #ffcdd2; overflow: hidden; margin: 10px 0 6px 0;
  }}
@@ -440,20 +451,17 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .prog-fill-detail.amber {{ background: linear-gradient(90deg, #ffca28, #ff8f00); }}
  .prog-fill-detail.red {{ background: linear-gradient(90deg, #ef5350, #ff1744); }}

-  /* 表格 */
  table {{ width: 100%; border-collapse: collapse; table-layout: fixed; }}
  th, td {{ padding: 10px 14px; text-align: left; border-bottom: 1px solid #eceff1; word-break: break-all; }}
  th {{ background: #f5f7fa; font-weight: 600; font-size: 12px; color: #78909c; text-transform: uppercase; letter-spacing: 0.5px; }}
  tbody tr:hover {{ background: #e8eaf6; }}
  tbody tr:last-child td {{ border-bottom: none; }}

-  /* 决策表列宽 */
  .dp-table th:nth-child(1), .dp-table td:nth-child(1) {{ width: 50px; }}
  .dp-table th:nth-child(2), .dp-table td:nth-child(2) {{ width: 70px; }}
  .dp-table th:nth-child(3), .dp-table td:nth-child(3) {{ width: 50px; }}
  .dp-table th:nth-child(5), .dp-table td:nth-child(5) {{ width: 160px; }}

-  /* 叶条件表列宽 */
  .leaf-table th:nth-child(1), .leaf-table td:nth-child(1) {{ width: 110px; }}
  .leaf-table th:nth-child(2), .leaf-table td:nth-child(2) {{ width: 60px; }}
  .leaf-table th:nth-child(4), .leaf-table td:nth-child(4),
@@ -468,7 +476,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .cond-ok {{ color: #00c853; }}
  .cond-miss {{ color: #ff5252; }}

-  /* 源码 */
  .source-section {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; }}
  .source-line {{ display: flex; padding: 1px 0; }}
  .source-line:hover {{ background: #f5f5f5; }}
@@ -534,20 +541,22 @@ _DETAIL_HTML = '''<!DOCTYPE html>

  {source_section}

+  {source_note}
+
 </div>
 </body>
 </html>'''


 def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
-                         filename='', index_relpath=None, covered_lines=None):
+                         filename='', index_relpath=None, covered_lines=None,
+                         source_note=''):
    title = f"覆盖率报告 — {filename}" if filename else "覆盖率报告"

    total_branches = sum(len(dp.branch_names) for dp in decision_points)
    covered_branches = sum(len(dp.active_branches) for dp in decision_points)
    implied_branches = sum(len(dp.implied_branches) for dp in decision_points)
    if covered_lines:
-        # 无分支程序：隐式 100%
        total_branches = max(total_branches, 1)
        covered_branches = max(covered_branches, 1)

@@ -555,15 +564,13 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    covered_leaves = (sum(1 for l in leaf_stats if l.covered_true) +
                      sum(1 for l in leaf_stats if l.covered_false))

-    # 计算数值
-    is_implicit = bool(covered_lines)  # 无分支程序，隐式 100%
+    is_implicit = bool(covered_lines)
    dec_pct_val = (covered_branches / total_branches * 100) if total_branches else 0
    dec_pct_text = "100% ✓" if is_implicit else (f"{dec_pct_val:.1f}%" if total_branches else "无")
    dec_frac = "全部覆盖" if is_implicit else (f"{covered_branches}/{total_branches}" if total_branches else "—")
    cond_frac = f"{covered_leaves}/{total_leaves}" if total_leaves else "—"
    implied_text = f'（+{implied_branches - covered_branches} 推断）' if implied_branches > covered_branches else ''

-    # 颜色
    if is_implicit or not total_branches or dec_pct_val >= 100:
        dec_val_cls = 'val-green'
        bar_cls = ''
@@ -581,7 +588,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        cond_val_cls = 'val-red'

-    # 决策点表格
    if decision_points:
        dp_rows = []
        for dp in decision_points:
@@ -608,7 +614,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        decision_table = ''

-    # 叶条件表格
    if leaf_stats:
        leaf_rows = []
        for leaf in leaf_stats:
@@ -627,7 +632,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        leaf_table = ''

-    # 源码标注
    if source_lines:
        line_cov = {}
        for dp in decision_points:
@@ -643,7 +647,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
                else:
                    line_cov[dp.source_line].append('hl-amber')

-        # 无分支程序：所有 PD 行标记为已覆盖
        if covered_lines:
            for ln in covered_lines:
                line_cov.setdefault(ln, []).append('hl-green')
@@ -677,6 +680,7 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
        leaf_table=leaf_table,
        source_section=source_section,
        dp_count_text=('—' if is_implicit else str(len(decision_points))),
+        source_note=source_note,
    )

    outpath = Path(outpath)
@@ -699,7 +703,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
    background: #f0f2f5; color: #37474f; font-size: 14px; line-height: 1.6;
  }}

-  /* 顶栏 */
  .topbar {{
    background: linear-gradient(135deg, #1a237e, #283593);
    color: #fff; padding: 18px 32px;
@@ -711,7 +714,6 @@ _INDEX_HTML = '''<!DOCTYPE html>

  .container {{ max-width: 1200px; margin: 0 auto; padding: 28px 24px; }}

-  /* 统计卡片 */
  .cards {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; margin-bottom: 28px; }}
  .card {{
    background: #fff; border-radius: 10px; padding: 20px 22px;
@@ -725,7 +727,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .num-red {{ color: #ff1744; }}
  .num-blue {{ color: #1a237e; }}

-  /* 图表行 */
  .charts-row {{
    display: flex; gap: 32px; justify-content: center; flex-wrap: wrap;
    background: #fff; border-radius: 10px; padding: 28px 20px;
@@ -744,7 +745,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .legend .dot-red {{ background: #ff5252; }}
  .legend .dot-amber {{ background: #ffd740; }}

-  /* 工具栏 */
  .toolbar {{
    display: flex; justify-content: space-between; align-items: center;
    margin-bottom: 14px; flex-wrap: wrap; gap: 10px;
@@ -764,7 +764,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .toolbar .sort-btn:hover {{ background: #eceff1; }}
  .toolbar .sort-btn.active {{ background: #e8eaf6; border-color: #3f51b5; color: #1a237e; font-weight: 500; }}

-  /* 表格 */
  .table-wrap {{
    background: #fff; border-radius: 10px; overflow: hidden;
    box-shadow: 0 1px 4px rgba(0,0,0,0.06);
@@ -789,7 +788,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .prog-name a {{ color: #283593; text-decoration: none; }}
  .prog-name a:hover {{ text-decoration: underline; color: #1a237e; }}

-  /* 进度条 */
  .prog-wrap {{
    display: inline-flex; align-items: center; gap: 10px; width: 100%;
  }}
@@ -812,7 +810,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .prog-fill.full {{ border-radius: 10px; }}
  .prog-text {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; white-space: nowrap; min-width: 48px; }}

-  /* 状态徽标 */
  .badge {{
    display: inline-block; padding: 3px 10px; border-radius: 12px;
    font-size: 12px; font-weight: 600; letter-spacing: 0.3px;
@@ -821,10 +818,8 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .badge-warn {{ background: #fff8e1; color: #e65100; }}
  .badge-fail {{ background: #ffebee; color: #c62828; }}

-  /* 条件覆盖列 */
  .cond-cell {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; }}

-  /* 响应式 */
  @media (max-width: 680px) {{
    .topbar {{ flex-direction: column; align-items: flex-start; gap: 6px; padding: 14px 18px; }}
    .container {{ padding: 16px 12px; }}
@@ -968,7 +963,6 @@ function filterTable() {{


 def _ring_svg(pct, color_stops):
-    """生成 SVG 圆环 HTML。pct: 0-100 浮点数。"""
    r = 54
    circ = 2 * 3.14159265 * r
    offset = circ * (1 - pct / 100) if pct > 0 else circ
@@ -995,7 +989,6 @@ def _ring_svg(pct, color_stops):


 def generate_coverage_index(programs, outdir):
-    """生成覆盖率总括索引页。"""
    from datetime import datetime
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')

@@ -1038,7 +1031,6 @@ def generate_coverage_index(programs, outdir):
        cond_text = f"{cc}/{tc}" if tc else "—"
        bar_pct = int(pct_dec)

-        # 进度条颜色
        if imp or pct_dec >= 100:
            bar_cls = ''
        elif pct_dec >= 80:
@@ -1046,7 +1038,6 @@ def generate_coverage_index(programs, outdir):
        else:
            bar_cls = ' red'

-        # 状态徽标
        if tb == 0 or (cb == tb and not (ib > cb)):
            badge = '<span class="badge badge-pass">&#10003; 完全</span>'
        elif cb == tb and ib > cb:
@@ -1056,7 +1047,6 @@ def generate_coverage_index(programs, outdir):
        else:
            badge = '<span class="badge badge-fail">&#10007; 欠缺</span>'

-        # 条件覆盖数字颜色
        if tc:
            cond_pct = cc / tc * 100
            cond_color = 'num-green' if cond_pct == 100 else ('num-amber' if cond_pct >= 80 else 'num-red')
@@ -1107,7 +1097,6 @@ def generate_coverage_index(programs, outdir):
 # ── PROCEDURE DIVISION 行范围定位（用于无分支程序标记）──

 def _find_proc_range(raw_source: str):
-    """返回 PROCEDURE DIVISION 的行范围 (start_line, end_line) 1-indexed，或 None。"""
    lines = raw_source.splitlines()
    proc_start = None
    for i, line in enumerate(lines):
@@ -1116,26 +1105,36 @@ def _find_proc_range(raw_source: str):
            break
    if proc_start is None:
        return None
-    # 找下一个 DIVISION 作为结束边界（或文件尾）
    for i in range(proc_start, len(lines)):
        if re.search(r'(IDENTIFICATION|DATA|ENVIRONMENT)\s+DIVISION', lines[i].upper()):
-            return (proc_start, i)  # 不包含下一个 DIVISION
+            return (proc_start, i)
    return (proc_start, len(lines) + 1)


 # ── 接入入口 ──

 def run_coverage(branch_tree, branch_paths_with_assigns, fields,
-                 raw_source, output_prefix, index_relpath=None):
-    """完整覆盖率流程：收集 → 标记 → 定位 → 输出。
-
-    Returns:
-        dict: 汇总数据，用于总括页聚合
-    """
+                 raw_source, output_prefix, index_relpath=None,
+                 gcov_data=None):
    decision_points, leaf_stats = collect_decision_points(branch_tree, fields)

    mark_coverage(decision_points, leaf_stats, branch_paths_with_assigns, fields)

+    if gcov_data:
+        mark_from_gcov(decision_points, gcov_data, branch_tree)
+        for ls in leaf_stats:
+            ls.covered_true = False
+            ls.covered_false = False
+
+    _source_note = ''
+    if gcov_data:
+        _source_note = (
+            '<div style="margin-top:16px;font-size:12px;color:#90a4ae;'
+            'text-align:center;border-top:1px solid #eceff1;padding-top:12px;">'
+            '覆盖率基于 gcov 运行时数据'
+            '</div>'
+        )
+
    if raw_source:
        locate_decision_lines(decision_points, raw_source)

@@ -1146,7 +1145,6 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
                     sum(1 for l in leaf_stats if l.covered_false))
    leaf_total = len(leaf_stats) * 2

-    # 无决策点但有路径 → PROCEDURE DIVISION 全部覆盖
    covered_lines = set()
    if total == 0 and branch_paths_with_assigns and raw_source:
        proc_range = _find_proc_range(raw_source)
@@ -1161,9 +1159,9 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
                             f"{output_prefix}_coverage.html",
                             Path(output_prefix).stem,
                             index_relpath=index_relpath,
-                             covered_lines=covered_lines)
+                             covered_lines=covered_lines,
+                             source_note=_source_note)

-    # 控制台摘要
    if total or leaf_total:
        logger.info(f"\n=== 分支覆盖率 ===")
        if covered_lines and not decision_points:
@@ -1194,7 +1192,7 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
    implicit_100 = bool(covered_lines)
    return {
        'name': Path(output_prefix).stem if output_prefix else '',
-        'detail_relpath': ('../' + Path(output_prefix).stem + '_coverage.html'
+        'detail_relpath': (Path(output_prefix).stem + '_coverage.html'
                           if output_prefix else ''),
        'total_branches': total,
        'covered_branches': covered,
@@ -1208,15 +1206,6 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,


 def check_coverage(structure: dict, test_records: list[dict]) -> dict:
-    """报告 COBOL 源码的静态分支结构信息。
-
-    注意: 静态分析无法精确判断每条测试数据运行时覆盖了哪些分支。
-    精确的路径追踪依赖 gcov（Phase 3）。此处仅报告总分支数和记录生成情况。
-
-    Returns:
-        dict with: paragraph_rate, branch_rate, decision_rate, total_branches,
-                   total_paragraphs, records_count, note
-    """
    total_paragraphs = structure.get("total_paragraphs", 0)
    total_branches = structure.get("total_branches", 0)
    decision_points = structure.get("decision_points", [])
@@ -8,12 +8,52 @@ from .core import trace_to_root, invert_through_chain, propagate_assignments, _b

 logger = logging.getLogger(__name__)

-_STOP = ('__STOP__', '', None, True)
-_MAX_PATHS = 500
+_STOP_EXIT_PERFORM = ('__STOP_EXIT_PERFORM__', '', None, True)
+_STOP_SENTINEL = ('__STOP__', '', None, True)
+_ABEND_SENTINEL = ('__ABEND__', '', None, True)
+_SENTINELS_ALL = {_STOP_EXIT_PERFORM, _STOP_SENTINEL, _ABEND_SENTINEL}
+_ABEND_PROGRAMS = {'ABENDPGM'}
+
+def extend_abend_programs(names: list[str]):
+    _ABEND_PROGRAMS.update(n.upper() for n in names)
+_MAX_PATHS = 10000
+
+
+def _is_sentinel(c):
+    return c is _STOP_EXIT_PERFORM or c is _STOP_SENTINEL or c is _ABEND_SENTINEL
+
+
+def _hashable_cons(cons):
+    """将约束列表转为可哈希形式（列表值转tuple）用于签名去重。"""
+    result = []
+    for c in cons:
+        if len(c) == 4:
+            field, op, val, want = c
+            if isinstance(val, list):
+                val = tuple(val)
+            result.append((field, op, val, want))
+        else:
+            result.append(c)
+    return result


 def _filter_stop(cons):
-    return [c for c in cons if c is not _STOP]
+    """Legacy: strip all sentinel markers. 供旧测试代码使用。"""
+    return [c for c in cons if not _is_sentinel(c)]
+
+
+def get_term_type(cons):
+    """提取终止类型，返回 (filtered_cons, term_type)."""
+    remaining = []
+    term = 'normal'
+    for c in cons:
+        if c is _ABEND_SENTINEL:
+            term = 'abend'
+        elif _is_sentinel(c):
+            pass
+        else:
+            remaining.append(c)
+    return remaining, term


 def _cap_paths(paths):
@@ -29,11 +69,11 @@ def _cap_paths_fair(new_active, child_paths):
    k = len(child_paths)
    if k <= 1:
        return new_active[:_MAX_PATHS]
-    # 分离 STOP 路径（不参与组合，直接保留）
-    stop_paths = [(p, a) for p, a in new_active if any(c is _STOP for c in p)]
-    combined = [(p, a) for p, a in new_active if not any(c is _STOP for c in p)]
+    # 分离 sentinel 路径（不参与组合，直接保留）
+    stop_paths = [(p, a) for p, a in new_active if any(_is_sentinel(c) for c in p)]
+    combined = [(p, a) for p, a in new_active if not any(_is_sentinel(c) for c in p)]
    n_pred = len(combined) // k
-    result = list(stop_paths)
+    result = []
    if n_pred <= 1:
        result.extend(combined[:_MAX_PATHS - len(result)])
        return result[:_MAX_PATHS]
@@ -75,24 +115,29 @@ def enum_paths(node, fields):
        for child in node.children:
            child_paths = _cap_paths(enum_paths(child, fields))
            if not child_paths:
-                break
+                continue
            new_active = []
+            covered_sigs = set()
            for p_cons, p_assign in paths:
-                if any(c is _STOP for c in p_cons):
+                if any(_is_sentinel(c) for c in p_cons):
                    new_active.append((p_cons, p_assign))
                    continue
                for cp_cons, cp_assign in child_paths:
-                    merged = {}
-                    for d in (p_assign, cp_assign):
-                        for k, v in d.items():
-                            merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                    merged_cons = p_cons + list(cp_cons)
-                    new_active.append((merged_cons, merged))
-                    if len(new_active) >= _MAX_PATHS:
+                    sig = frozenset(_hashable_cons(merged_cons))
+                    if sig not in covered_sigs:
+                        covered_sigs.add(sig)
+                        merged = {}
+                        for d in (p_assign, cp_assign):
+                            for k, v in d.items():
+                                merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
+                        new_active.append((merged_cons, merged))
+            if not new_active:
+                for pc, pa in paths:
+                    if not any(_is_sentinel(c) for c in pc):
+                        new_active.append((pc, dict(pa)))
                        break
-                if len(new_active) >= _MAX_PATHS:
-                    break
-            paths = _cap_paths_fair(new_active, child_paths)
+            paths = new_active
        return paths

    elif isinstance(node, BrIf):
@@ -186,6 +231,14 @@ def enum_paths(node, fields):
                        constraints.append((cond.field, cond.op, cond.value, True))
                        paths.append((constraints + sp_cons, sp_assign))
                    prior_false_sets.append([(cond.field, cond.op, cond.value, False)])
+                elif cond and isinstance(cond, CondNot) and isinstance(cond.child, CondLeaf) and is_field(cond.child.field, fields):
+                    leaf = cond.child
+                    sub = _cap_paths(enum_paths(seq, fields))
+                    for sp_cons, sp_assign in (sub or [([], {})]):
+                        constraints = [c for pf in prior_false_sets for c in pf]
+                        constraints.append((leaf.field, leaf.op, leaf.value, False))
+                        paths.append((constraints + sp_cons, sp_assign))
+                    prior_false_sets.append([(leaf.field, leaf.op, leaf.value, True)])
                elif cond:
                    leaves = collect_leaves(cond)
                    if leaves and all(is_field(l.field, fields) for l in leaves):
@@ -232,13 +285,36 @@ def enum_paths(node, fields):
        paths = []
        for value, seq in node.when_list:
            sub = _cap_paths(enum_paths(seq, fields))
-            for sp_cons, sp_assign in (sub or [([], {})]):
-                paths.append(([(node.subject, '=', value, True)] + sp_cons, sp_assign))
+            thru_m = re.match(r'^(\d+)\s+THRU\s+(\d+)$', str(value), re.IGNORECASE)
+            if thru_m and not node.subjects:
+                low, high = thru_m.group(1), thru_m.group(2)
+                for sp_cons, sp_assign in (sub or [([], {})]):
+                    paths.append(([(node.subject, '>=', low, True), (node.subject, '<=', high, True)] + sp_cons, sp_assign))
+                    paths.append(([(node.subject, '<=', high, True), (node.subject, '>=', low, True)] + sp_cons, sp_assign))
+            else:
+                for sp_cons, sp_assign in (sub or [([], {})]):
+                    paths.append(([(node.subject, '=', value, True)] + sp_cons, sp_assign))
        if node.has_other:
-            case_vals = [v for v, _ in node.when_list]
            sub = _cap_paths(enum_paths(node.other_seq, fields))
-            for sp_cons, sp_assign in (sub or [([], {})]):
-                paths.append(([(node.subject, 'not_in', case_vals, True)] + sp_cons, sp_assign))
+            thru_found = False
+            for v, _ in node.when_list:
+                thru_m = re.match(r'^(\d+)\s+THRU\s+(\d+)$', str(v), re.IGNORECASE)
+                if thru_m and not node.subjects:
+                    thru_found = True
+                    low_int, high_int = int(thru_m.group(1)), int(thru_m.group(2))
+                    for sp_cons, sp_assign in (sub or [([], {})]):
+                        a_low = dict(sp_assign)
+                        a_low[node.subject] = [{'type': 'move_literal', 'literal': str(max(0, low_int - 1))}]
+                        low_cons = [(node.subject, 'not_in', [thru_m.group(1), thru_m.group(2)], True)]
+                        paths.append((low_cons + sp_cons, a_low))
+                        a_high = dict(sp_assign)
+                        a_high[node.subject] = [{'type': 'move_literal', 'literal': str(high_int + 1)}]
+                        high_cons = [(node.subject, 'not_in', [thru_m.group(1), thru_m.group(2)], True)]
+                        paths.append((high_cons + sp_cons, a_high))
+            if not thru_found:
+                case_vals = [v for v, _ in node.when_list]
+                for sp_cons, sp_assign in (sub or [([], {})]):
+                    paths.append(([(node.subject, 'not_in', case_vals, True)] + sp_cons, sp_assign))
        return paths

    elif isinstance(node, BrSearch):
@@ -247,7 +323,10 @@ def enum_paths(node, fields):
    elif isinstance(node, BrPerform):
        if node.perf_type in ('para', 'thru'):
            if node.body_seq:
-                return enum_paths(node.body_seq, fields)
+                paths = enum_paths(node.body_seq, fields)
+                # EXIT PERFORM 只在 PERFORM 体内有效，剥离后不影响后续 BrSeq 组合
+                paths = [([c for c in cons if c is not _STOP_EXIT_PERFORM], a) for cons, a in paths]
+                return paths
            return [([], {})]
        elif node.perf_type in ('until', 'para_until', 'varying', 'para_varying'):
            # 尝试单条件（现有逻辑）
@@ -256,7 +335,9 @@ def enum_paths(node, fields):
                field, op, val = parsed
                paths = []
                false_sub = _cap_paths(enum_paths(node.body_seq, fields))
+                false_sub = [([c for c in cons if c is not _STOP_EXIT_PERFORM], a) for cons, a in false_sub]
                for sp_cons, sp_assign in (false_sub or [([], {})]):
+                    body_assign = dict(sp_assign)
                    # PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
                    if node.varying_from and node.varying_var:
                        is_fld = any(f['name'] == node.varying_from for f in fields) if fields else False
@@ -268,6 +349,40 @@ def enum_paths(node, fields):
                                merged.setdefault(k, []).extend(v if isinstance(v, list) else [v])
                        sp_assign = merged
                    paths.append(([(field, op, val, False)] + sp_cons, sp_assign))
+                    # PERFORM VARYING: 末次迭代路径（下标=MAX）
+                    if node.varying_from and node.varying_var and op in ('>', '>=', '<', '<=', '='):
+                        try:
+                            if op == '>':
+                                max_val = int(val)
+                            elif op == '>=':
+                                max_val = int(val) - 1
+                            elif op == '<':
+                                max_val = int(val)
+                            elif op == '<=':
+                                max_val = int(val) + 1
+                            elif op == '=':
+                                by_str = str(node.varying_by or '1')
+                                if by_str.lstrip('-').isdigit() and int(by_str) < 0:
+                                    max_val = int(val) + 1
+                                else:
+                                    max_val = int(val) - 1
+                            from_val = int(node.varying_from)
+                            by_str = str(node.varying_by or '1')
+                            if by_str.lstrip('-').isdigit() and int(by_str) < 0:
+                                ok = max_val <= from_val
+                            else:
+                                ok = max_val >= from_val
+                            if ok:
+                                max_asgn = {'type': 'move_literal', 'literal': str(max_val)}
+                                max_assign = {node.varying_var: [max_asgn]}
+                                merged_max = {}
+                                for d in (max_assign, body_assign):
+                                    for k, v in d.items():
+                                        merged_max.setdefault(k, []).extend(v if isinstance(v, list) else [v])
+                                the_cons = [(field, op, val, False)]
+                                paths.append((the_cons + sp_cons, merged_max))
+                        except (ValueError, TypeError):
+                            pass
                paths.append(([(field, op, val, True)], {}))
                return paths
            # 尝试复合条件（AND/OR）
@@ -279,6 +394,7 @@ def enum_paths(node, fields):
                    if sets:
                        paths = []
                        false_sub = _cap_paths(enum_paths(node.body_seq, fields))
+                        false_sub = [([c for c in cons if c is not _STOP_EXIT_PERFORM], a) for cons, a in false_sub]
                        for sp_cons, sp_assign in (false_sub or [([], {})]):
                            # PERFORM VARYING: 将 FROM 值作为 MOVE 赋值加入 Enter 路径
                            if node.varying_from and node.varying_var:
@@ -301,14 +417,18 @@ def enum_paths(node, fields):
        return [([], {})]

    elif isinstance(node, CallNode):
+        if node.program_name in _ABEND_PROGRAMS:
+            return [([_ABEND_SENTINEL], {})]
        return [([], {})]

    elif isinstance(node, ExitNode):
-        return [([_STOP], {})]
+        if node.exit_type == 'PERFORM':
+            return [([_STOP_EXIT_PERFORM], {})]
+        return [([_STOP_SENTINEL], {})]

    elif isinstance(node, GoTo):
        paths = enum_paths(node.body_seq, fields)
-        return [([_STOP] + c, a) for c, a in paths]
+        return [([_STOP_SENTINEL] + c, a) for c, a in paths]

    return [([], {})]

@@ -335,7 +455,7 @@ def seq_date(seq_num: int) -> str:


 def _is_date_field(name: str) -> bool:
-    patterns = [r'DATE', r'YYMMDD', r'YYYYMM', r'YEAR', r'MONTH', r'DAY']
+    patterns = [r'DATE', r'YYMMDD', r'YYYYMM']
    for p in patterns:
        if re.search(p, name.upper()):
            return True
@@ -401,13 +521,12 @@ def _children_of(group_name: str, fields: list) -> list:


 def _make_numeric_value(idx: int, record_num: int, total_digits: int) -> str:
-    max_val = 10 ** total_digits - 1
+    max_val = 10 ** total_digits
    for step in (100, 10, 1):
        val = idx * step + record_num
-        if val < 10 ** total_digits:
-            return str(min(val, max_val)).zfill(total_digits)
-    return str(min(record_num, max_val)).zfill(total_digits)
-    return str(record_num).zfill(total_digits)
+        if val < max_val:
+            return str(val).zfill(total_digits)
+    return str(record_num % max_val).zfill(total_digits)


 def _make_alpha_value(idx: int, record_num: int, length: int) -> str:
@@ -548,6 +667,16 @@ def _check_constraint_satisfied(rec, field_name, operator, value, want_true, fie
                    return eq == want_true
                elif operator == '<>':
                    return (not eq) == want_true
+                elif operator in ('>', '<', '>=', '<='):
+                    if operator == '>':
+                        ok = s_val > s_target
+                    elif operator == '<':
+                        ok = s_val < s_target
+                    elif operator == '>=':
+                        ok = s_val >= s_target
+                    elif operator == '<=':
+                        ok = s_val <= s_target
+                    return ok == want_true
                return True
    return False

@@ -625,6 +754,95 @@ def _apply_arith_constraint(rec, field_name, operator, value, want_true, fields)
            rec[right_field] = pick


+def _inc_str(s, length):
+    s = str(s).strip()
+    try:
+        r = str(int(s) + 1).zfill(length)
+        return r if len(r) <= length else '9' * length
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 9Zz\xff':
+                c[i] = chr(ord(c[i]) + 1)
+                break
+            if c[i] == ' ':
+                c[i] = '0'
+                break
+            if c[i] == '9':
+                c[i] = '0'
+            elif c[i] == 'Z':
+                c[i] = 'A'
+            elif c[i] == 'z':
+                c[i] = 'a'
+        return ''.join(c)
+
+
+def _dec_str(s, length):
+    s = str(s).strip()
+    try:
+        n = max(0, int(s) - 1)
+        return str(n).zfill(length)
+    except ValueError:
+        c = list(str(s).ljust(length)[:length])
+        for i in range(len(c) - 1, -1, -1):
+            if c[i] not in ' 0Aa\x00':
+                c[i] = chr(ord(c[i]) - 1)
+                break
+            if c[i] == ' ':
+                break
+            if c[i] == '0':
+                c[i] = '9'
+            elif c[i] == 'A':
+                c[i] = ' '
+            elif c[i] == 'a':
+                c[i] = ' '
+        return ''.join(c)
+
+
+def _reconcile_unstring_fields(rec, left_field, operator, right_field, want_true,
+                                fields, left_chain, assignments, path_assign):
+    right_root, right_chain = trace_to_root(right_field, assignments, fields, path_assign)
+    if right_root not in rec:
+        logger.debug(f"字段间比较协调：右侧根 {right_root} 不在 rec，跳过")
+        return
+    all_entries = (left_chain or []) + (right_chain or [])
+    for _, asgn in all_entries:
+        if asgn.get('type') not in ('move', 'unstring_split'):
+            logger.debug(f"字段间比较协调：链含非 MOVE 类型 {asgn.get('type')}，跳过")
+            return
+    left_val = str(rec.get(left_field, ''))
+    if not left_val.strip():
+        logger.debug(f"字段间比较协调：左侧 {left_field} 无值，跳过")
+        return
+    length = 0
+    for f in fields:
+        if f['name'] == right_root:
+            length = f.get('pic_info', {}).get('length', 0)
+            break
+    if length == 0:
+        length = len(left_val)
+
+    if operator in ('>=', '<='):
+        if want_true:
+            right_val = left_val
+        else:
+            right_val = _inc_str(left_val, length) if operator == '>=' else _dec_str(left_val, length)
+    elif operator in ('>', '<'):
+        if want_true:
+            right_val = _dec_str(left_val, length) if operator == '>' else _inc_str(left_val, length)
+        else:
+            right_val = left_val
+    elif operator == '=':
+        right_val = left_val if want_true else _inc_str(left_val, length)
+    elif operator == '<>':
+        right_val = _inc_str(left_val, length) if want_true else left_val
+    else:
+        return
+
+    rec[right_root] = right_val[:length] if right_val else right_val
+    logger.debug(f"字段间比较协调：{left_field}={left_val} {operator} {right_field} -> {right_root}={rec[right_root]} (want={want_true})")
+
+
 def apply_constraint(rec, field_name, operator, value, want_true, fields, assignments=None, path_assign=None):
    # 标准化字段名：去除括号内空格（WS-CELL ( 1, 1 ) → WS-CELL(1,1)）
    field_name = re.sub(r'\s*([(),])\s*', r'\1', field_name)
@@ -659,6 +877,7 @@ def apply_constraint(rec, field_name, operator, value, want_true, fields, assign
                apply_constraint(rec, parent_name, operator, value, want_true, fields, assignments, path_assign)
                return
            break
+    chain = None
    if assignments:
        root_var, chain = trace_to_root(field_name, assignments, fields, path_assign)
        if root_var != field_name:
@@ -666,8 +885,41 @@ def apply_constraint(rec, field_name, operator, value, want_true, fields, assign
            if any(f['name'] == new_field_name for f in fields):
                field_name, operator, value = new_field_name, new_op, new_val

+    # 字段间比较：在 satisfied check 前解析/处理
+    if any(f['name'] == value for f in fields):
+        resolved_literal = None
+        for f in fields:
+            if f['name'] == value and f.get('value') is not None:
+                resolved_literal = str(f['value']).strip("'").strip('"')
+                break
+        if resolved_literal is not None:
+            value = resolved_literal
+        elif chain is not None and assignments:
+            _reconcile_unstring_fields(rec, field_name, operator, value, want_true,
+                                        fields, chain, assignments, path_assign)
+            return
+        elif re.search(r'[+\-*/]', field_name):
+            _apply_arith_constraint(rec, field_name, operator, value, want_true, fields)
+            return
+        else:
+            logger.debug(f"字段间比较约束跳过：{field_name} {operator} {value}")
+            return
+
    # 如果当前值已满足该约束，跳过覆盖（保持先前约束的一致性）
+    # 但零值时强制使用边界值（非 0/非 min）
    if _check_constraint_satisfied(rec, field_name, operator, value, want_true, fields):
+        cur = str(rec.get(field_name, '')).strip('0')
+        if (cur == '' or cur == '.') and (
+            (operator in ('>', '>=') and not want_true) or
+            (operator in ('<', '<=') and want_true)
+        ):
+            for f in fields:
+                if f['name'] == field_name:
+                    pi = f.get('pic_info', {})
+                    if pi.get('type') == 'numeric':
+                        val = satisfying_value(pi, operator, value, want_true)
+                        rec[field_name] = val
+                    return
        return

    if operator == 'not_in':
@@ -687,13 +939,6 @@ def apply_constraint(rec, field_name, operator, value, want_true, fields, assign
                            rec[field_name] = str(n).zfill(pi.get('digits', 0) + pi.get('decimal', 0))
                            return
        return
-    # 字段间比较（值侧也是字段名）
-    if any(f['name'] == value for f in fields):
-        if re.search(r'[+\-*/]', field_name):
-            _apply_arith_constraint(rec, field_name, operator, value, want_true, fields)
-        else:
-            logger.debug(f"字段间比较约束跳过：{field_name} {operator} {value}")
-        return
    for f in fields:
        if f['name'] == field_name:
            pi = f.get('pic_info', {})
@@ -738,6 +983,31 @@ def sync_redefined_fields(rec, fields):

 def apply_occurs_depending(rec, fields):
    """根据 OCCURS DEPENDING ON 变量的当前值，清零超范围的下标字段。"""
+    # Phase 1: 将零值的 DEPENDING ON 变量设为最大下标
+    dep_max = {}
+    for f in fields:
+        dep_var = f.get('occurs_depending')
+        if not dep_var:
+            continue
+        m = re.search(r'\((\d+)\)$', f['name'])
+        if m:
+            sub = int(m.group(1))
+            if sub > dep_max.get(dep_var, 0):
+                dep_max[dep_var] = sub
+    for dep_var, max_sub in dep_max.items():
+        try:
+            cur_val = int(float(str(rec.get(dep_var, '0'))))
+        except (ValueError, TypeError):
+            cur_val = 0
+        if cur_val == 0:
+            for f in fields:
+                if f['name'] == dep_var:
+                    pi = f.get('pic_info', {})
+                    digits = pi.get('digits', 0) + pi.get('decimal', 0)
+                    if digits > 0:
+                        rec[dep_var] = str(max_sub).zfill(digits)
+                    break
+    # Phase 2: 清零超范围的下标字段
    for f in fields:
        dep_var = f.get('occurs_depending')
        if not dep_var:
@@ -805,7 +1075,10 @@ def _enum_search_paths(node, fields):
            base = re.sub(r'\s*\(.*?\)\s*$', '', cond_tree.field)
            matching_val = cond_tree.value
            elem_key = f'{base}({i + 1})'
-            extra_assign[elem_key] = [{'type': 'move_literal', 'literal': matching_val}]
+            if any(f['name'] == matching_val for f in fields):
+                extra_assign[elem_key] = [{'type': 'move', 'source_vars': [matching_val]}]
+            else:
+                extra_assign[elem_key] = [{'type': 'move_literal', 'literal': matching_val}]
            non_match = _non_match_for(cond_tree, fields) or ' '
            for j in range(i):
                prev_key = f'{base}({j + 1})'
@@ -815,7 +1088,10 @@ def _enum_search_paths(node, fields):
            merged_assign = dict(extra_assign)
            for k, v in sp_assign.items():
                merged_assign.setdefault(k, []).extend(v if isinstance(v, list) else [v])
-            paths.append((sp_cons, merged_assign))
+            if cond_tree and isinstance(cond_tree, CondLeaf):
+                paths.append(([(elem_key, cond_tree.op, matching_val, True)] + sp_cons, merged_assign))
+            else:
+                paths.append((sp_cons, merged_assign))

    if node.has_at_end:
        sub = _cap_paths(enum_paths(node.at_end_seq, fields))
@@ -837,16 +1113,20 @@ def _enum_search_paths(node, fields):
    return paths


-def generate_records(branch_paths_with_assigns, data_fields, base_assignments=None, file_sec=None):
+def generate_records(path_infos, data_fields, base_assignments=None, file_sec=None):
    """生成测试数据记录。
-    branch_paths_with_assigns: list of (constraints, path_assignments).
+    path_infos: list of (constraints, path_assignments) 或 (constraints, path_assignments, term_type).
    base_assignments: 全局 assignments dict (用于 trace_to_root).
-    返回: (records, kept_path_cons) — kept_path_cons 是与 records 一一对应的约束。
+    返回: (records, kept_path_cons, term_types).
    """
+    # 自动兼容旧 2-tuple 格式
+    if path_infos and len(path_infos[0]) == 2:
+        path_infos = [(c, a, 'normal') for c, a in path_infos]
    records = []
    kept_path_cons = []
-    if branch_paths_with_assigns:
-        for seq, (path_cons, path_assign) in enumerate(branch_paths_with_assigns, start=1):
+    term_types = []
+    if path_infos:
+        for seq, (path_cons, path_assign, term_type) in enumerate(path_infos, start=1):
            path_cons = _filter_stop(path_cons)
            rec = make_base_record(seq, data_fields)
            # Pass A: 先传播赋值（MOVE/COMPUTE/READ INTO 等），模拟到决策点前的程序状态
@@ -869,6 +1149,26 @@ def generate_records(branch_paths_with_assigns, data_fields, base_assignments=No
                                        if not _check_constraint_satisfied(rec, root_var, new_op, new_val, want, data_fields):
                                            skip_impossible = True
                                            break
+                        elif field in rec:
+                            asgn_val = path_assign.get(field)
+                            if asgn_val is not None:
+                                asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
+                                if asgn_list and asgn_list[-1]['type'] == 'move_literal':
+                                    cur_val = str(rec.get(field, ''))
+                                    if cur_val != '':
+                                        pi = next((f.get('pic_info', {}) for f in data_fields if f['name'] == field), {})
+                                        if pi.get('type') == 'numeric':
+                                            try:
+                                                nv = int(float(cur_val))
+                                                tv = int(float(str(val)))
+                                                ops = {'>': lambda a,b: a > b, '<': lambda a,b: a < b, '=': lambda a,b: a == b, '<>': lambda a,b: a != b, '>=': lambda a,b: a >= b, '<=': lambda a,b: a <= b}
+                                                if op in ops:
+                                                    satisfied = ops[op](nv, tv) == want
+                                                    if not satisfied:
+                                                        skip_impossible = True
+                                                        break
+                                            except (ValueError, TypeError):
+                                                pass
            if skip_impossible:
                continue
            # Pass B: 约束覆盖（确保决策条件满足，覆盖 MOVE 带来的值）
@@ -886,17 +1186,121 @@ def generate_records(branch_paths_with_assigns, data_fields, base_assignments=No
                        forward[tgt] = filtered
                if forward:
                    propagate_assignments(rec, forward, data_fields, file_sec=file_sec)
+            # Pass B.75: COMPUTE 重算（约束修改了 COMPUTE 源字段的值）
+            if isinstance(path_assign, dict):
+                compute_only = {}
+                for tgt, asgn_val in path_assign.items():
+                    asgn_list = asgn_val if isinstance(asgn_val, list) else [asgn_val]
+                    filtered = [a for a in asgn_list if a['type'] == 'compute']
+                    if filtered:
+                        compute_only[tgt] = filtered
+                if compute_only:
+                    propagate_assignments(rec, compute_only, data_fields, file_sec=file_sec)
+            # Pass B.8: UNSTRING source reconstruction (targets → source)
+            if base_assignments:
+                _reconstruct_unstring_sources(rec, base_assignments, data_fields)
            # Pass C: 同步 REDEFINES（确保共享存储一致）
            sync_redefined_fields(rec, data_fields)
            # Pass D: OCCURS DEPENDING ON — 清零超范围的下标字段
            apply_occurs_depending(rec, data_fields)

+            # Pass E: PIC 长度约束 — 模拟 COBOL 截断语义
+            for f in data_fields:
+                name = f['name']
+                if name in rec and not f.get('is_88') and not f.get('is_filler'):
+                    pi = f.get('pic_info', {})
+                    ftype = pi.get('type', 'unknown')
+                    val = str(rec[name])
+                    if ftype == 'numeric':
+                        total = pi.get('digits', 0) + pi.get('decimal', 0)
+                        if total > 0 and len(val) > total:
+                            rec[name] = val[-total:].zfill(total)
+                    elif ftype in ('alphanumeric', 'alphabetic'):
+                        length = pi.get('length', 0)
+                        if length > 0 and len(val) > length:
+                            rec[name] = val[:length]
+
            records.append(rec)
            kept_path_cons.append(path_cons)
+            term_types.append(term_type)
+            # Track which fields were explicitly assigned in this path
+            if isinstance(path_assign, dict):
+                rec['_assigned_fields'] = set(path_assign.keys())
+            else:
+                rec['_assigned_fields'] = set()
    if not records:
        rec = make_base_record(1, data_fields)
        if base_assignments:
            propagate_assignments(rec, base_assignments, data_fields, file_sec=file_sec)
+        if base_assignments:
+            _reconstruct_unstring_sources(rec, base_assignments, data_fields)
+        rec['_assigned_fields'] = set()
        records.append(rec)
        kept_path_cons.append([])
-    return records, kept_path_cons
+        term_types.append('normal')
+    return records, kept_path_cons, term_types
+
+
+def _reconstruct_unstring_sources(rec, base_assignments, data_fields):
+    """Build UNSTRING source field value from comma-separated target values.
+    After constraints determine target field values, construct the source
+    string so the COBOL UNSTRING can correctly parse it.
+    """
+    groups = {}
+    for tgt, asgn_list in base_assignments.items():
+        for asgn in asgn_list:
+            if asgn.get('type') == 'unstring_split' and asgn.get('source_vars'):
+                src = asgn['source_vars'][0]
+                idx = asgn.get('index', 0)
+                groups.setdefault(src, []).append((idx, tgt))
+
+    for src_var, targets in groups.items():
+        targets.sort(key=lambda x: x[0])
+        # Resolve group→child name if source not directly in rec
+        resolved_src = src_var
+        if resolved_src not in rec:
+            grp_level = None
+            found = False
+            for f in data_fields:
+                if not found and f['name'] == resolved_src:
+                    grp_level = f.get('level', 0)
+                    found = True
+                    continue
+                if found:
+                    if f.get('level', 0) <= grp_level or f.get('level') == 77:
+                        break
+                    if f.get('pic'):
+                        resolved_src = f['name']
+                        break
+        if resolved_src not in rec:
+            continue
+        csv_parts = []
+        for idx, tgt in targets:
+            val = rec.get(tgt, '')
+            csv_parts.append(val if val is not None else '')
+        csv_value = ','.join(csv_parts)
+        src_len = 0
+        for f in data_fields:
+            if f['name'] == resolved_src:
+                pi = f.get('pic_info', {})
+                if pi:
+                    src_len = pi.get('length', 0)
+                break
+        if src_len > 0:
+            csv_value = csv_value.ljust(src_len)[:src_len]
+        rec[resolved_src] = csv_value
+        # Also sync to child fields (group→elementary) for FD output consistency
+        if resolved_src == src_var:
+            grp_level = None
+            found = False
+            for f in data_fields:
+                if not found and f['name'] == resolved_src:
+                    grp_level = f.get('level', 0)
+                    found = True
+                    continue
+                if found:
+                    if f.get('level', 0) <= grp_level or f.get('level') == 77:
+                        break
+                    if f.get('pic'):
+                        rec[f['name']] = csv_value
+                        break
@@ -0,0 +1,119 @@
+"""gcov 覆盖率数据解析和分支标记"""
+
+import re
+import logging
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def parse_cbl_gcov(gcov_path: str) -> dict[int, int]:
+    """解析 .cbl.gcov 文件，返回 {COBOL行号: 执行次数}。
+
+    gcov 行格式：
+      #####:  6: 源码行   → 未执行（0 次）
+      75*:   12: 源码行   → 执行 75 次
+      1*:    14: 源码行   → 执行 1 次
+      -:     17: 源码行   → 不可执行（注释/声明行，跳过）
+    """
+    counts = {}
+    with open(gcov_path, encoding='utf-8') as f:
+        for line in f:
+            m = re.match(r'^\s*(#####|\d+\*?|-):\s*(\d+):', line)
+            if not m:
+                continue
+            count_str = m.group(1)
+            lineno = int(m.group(2))
+            if count_str == '#####':
+                counts[lineno] = 0
+            elif count_str == '-':
+                continue
+            else:
+                counts[lineno] = int(count_str.rstrip('*'))
+    return counts
+
+
+def run_gcov(program_name: str, work_dir: str) -> dict[int, int]:
+    """在 work_dir 中通过 WSL 执行 gcov 并解析 COBOL 行计数。
+
+    Args:
+        program_name: 程序名（不含扩展名），如 "ALLCMDS"
+        work_dir: 包含 .gcda/.gcno 的目录（Windows 路径）
+
+    Returns:
+        {COBOL行号: 执行次数} 字典。失败时返回空 dict。
+    """
+    wsl_work = _wsl_path(work_dir)
+    cmd = ['wsl', 'sh', '-c', f'cd {wsl_work} && gcov {program_name}.c']
+    result = subprocess.run(
+        cmd,
+        capture_output=True, text=True,
+        encoding='utf-8', errors='replace',
+        timeout=30,
+    )
+    if result.returncode != 0:
+        logger.warning(f"gcov 失败 (exit={result.returncode}): {result.stderr.strip()}")
+        return {}
+
+    cbl_gcov = Path(work_dir) / f'{program_name}.cbl.gcov'
+    if not cbl_gcov.exists():
+        logger.warning(f"gcov 输出不存在: {cbl_gcov}")
+        return {}
+
+    gcov_data = parse_cbl_gcov(str(cbl_gcov))
+    logger.info(f"gcov 解析: {len(gcov_data)} 行, "
+                f"{sum(1 for v in gcov_data.values() if v > 0)} 行已执行")
+    return gcov_data
+
+
+def _wsl_path(windows_path: str) -> str:
+    path = Path(windows_path).resolve()
+    drive = path.drive.lower().rstrip(':')
+    rest = str(path.relative_to(path.anchor)).replace('\\', '/')
+    return f'/mnt/{drive}/{rest}'
+
+
+def mark_from_gcov(decision_points: list, gcov_data: dict[int, int],
+                   branch_tree) -> None:
+    """用 gcov 行执行计数推断决策点分支覆盖，直接修改 decision_points 的 active_branches。
+
+    推断规则（简化版，先覆盖主要场景）：
+
+    IF  (条件行 L):
+      - 条件行 L 在 gcov 中 count == 0 → 不可到达，不标记
+      - 条件行 L 在 gcov 中 count > 0 → 标记 T 和 F 都覆盖
+
+    EVALUATE:
+      - subject 行 count > 0 → 标记所有 WHEN 为已覆盖
+
+    PERFORM UNTIL (条件行 L):
+      - count == 1 → 条件初始即为真，循环体未进入 → Skip 覆盖
+      - count > 1  → 循环体至少进入一次 → Enter 覆盖
+      - Skip 总视为覆盖（无论进入与否，最终都会跳出）
+    """
+    for dp in decision_points:
+        ln = dp.source_line
+        if ln <= 0 or ln not in gcov_data:
+            continue
+
+        count = gcov_data.get(ln)
+        if count is None:
+            continue
+
+        if dp.kind == 'IF':
+            if count == 0:
+                continue
+            dp.active_branches.add('T')
+            dp.active_branches.add('F')
+
+        elif dp.kind == 'EVALUATE':
+            if count == 0:
+                continue
+            for bn in dp.branch_names:
+                dp.active_branches.add(bn)
+
+        elif dp.kind == 'PERFORM':
+            if count > 1:
+                dp.active_branches.add('Enter')
+            dp.active_branches.add('Skip')
@@ -13,7 +13,7 @@ clause: pic_clause | value_clause | occurs_clause | redefines_clause | usage_cla
      | "JUSTIFIED" "RIGHT"?
      | "BLANK" "WHEN" "ZERO"
      | "GLOBAL" | "EXTERNAL"
-pic_clause: "PIC" "IS"? PICTURE_STRING
+pic_clause: "PIC" "IS"? PICTURE_STRING ("." PICTURE_STRING)*
 value_clause: "VALUE" "IS"? value_literal+
 value_literal: INT | SIGNED_NUMBER | STRING | SQSTRING
             | "ZERO" | "ZEROS" | "ZEROES"
@@ -23,27 +23,68 @@ def _scenario_text(path_cons):
    return ', '.join(parts)


+def _write_json(entries, outpath):
+    if not entries:
+        return
+    outpath.parent.mkdir(parents=True, exist_ok=True)
+    with open(outpath, 'w', encoding='utf-8') as f:
+        json.dump(entries, f, ensure_ascii=False, indent=2)
+
+
+def _is_field_assigned(fname, assigned_set, fields, fd_fields_lookup):
+    if not assigned_set:
+        return False
+    if fname in assigned_set:
+        return True
+    level_map = {}
+    name_order = []
+    for f in fields:
+        fn = f['name']
+        lv = f.get('level', 77)
+        level_map[fn] = lv
+        name_order.append((lv, fn))
+    flv = level_map.get(fname, 77)
+    ancestor = None
+    for lv, fn in name_order:
+        if fn == fname:
+            break
+        if lv < flv:
+            ancestor = fn
+    if ancestor and ancestor in assigned_set:
+        return True
+    return False
+
+
 def output_json(records, outpath, roles=None, fd_fields=None, field_to_fd=None,
-                open_dir=None, path_cons_list=None):
+                open_dir=None, term_types=None, db_input=None, data_fields=None):
    outpath.parent.mkdir(parents=True, exist_ok=True)
    if not roles:
+        out = []
+        for i, rec in enumerate(records):
+            entry = dict(rec)
+            entry['termination'] = (term_types or ['normal'] * len(records))[i]
+            out.append(entry)
+        obj = {'program': outpath.stem, 'records': out}
+        if db_input:
+            obj['db_input'] = db_input
        with open(outpath, 'w', encoding='utf-8') as f:
-            json.dump(records, f, ensure_ascii=False, indent=2)
+            json.dump(obj, f, ensure_ascii=False, indent=2)
        return

-    # FD direction lookup
+    term_types = term_types or ['normal'] * len(records)
+
    out = []
    for i, rec in enumerate(records):
        inp = {}
        out_exp = {}
        ws = {}

-        # Group by FD
        if fd_fields and field_to_fd:
            for fd_name, fds_set in fd_fields.items():
                direction = (open_dir or {}).get(fd_name, '')
                inp_block = {}
                out_block = {}
+                assigned_set = rec.get('_assigned_fields', set())
                for fname in fds_set:
                    if fname not in rec:
                        continue
@@ -52,13 +93,13 @@ def output_json(records, outpath, roles=None, fd_fields=None, field_to_fd=None,
                    if direction in ('INPUT', 'I-O') and r in ('input', 'inout'):
                        inp_block[fname] = val
                    if direction in ('OUTPUT', 'I-O') and r in ('output', 'inout'):
-                        out_block[fname] = val
+                        if _is_field_assigned(fname, assigned_set, data_fields or [], fd_fields):
+                            out_block[fname] = val
                if inp_block:
                    inp[fd_name] = inp_block
                if out_block:
                    out_exp[fd_name] = out_block

-        # Working-storage: not belonging to any FD
        for name, val in rec.items():
            if not field_to_fd or name not in field_to_fd:
                ws[name] = val
@@ -66,25 +107,21 @@ def output_json(records, outpath, roles=None, fd_fields=None, field_to_fd=None,
        entry = {
            'input': inp,
            'expected_output': out_exp,
-            'working_storage': ws,
+            'working_storage': {k: v for k, v in ws.items() if k != '_assigned_fields'},
+            'termination': term_types[i] if i < len(term_types) else 'normal',
        }

-        if path_cons_list and i < len(path_cons_list):
-            text = _scenario_text(path_cons_list[i])
-            if text:
-                entry['scenario'] = text
-
        out.append(entry)

-    with open(outpath, 'w', encoding='utf-8') as f:
-        json.dump(out, f, ensure_ascii=False, indent=2)
+    obj = {'program': outpath.stem, 'records': out}
+    if db_input:
+        obj['db_input'] = db_input
+    _write_json(obj, outpath)


-def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, open_dir):
-    """按 FD 名拆分出力入力 JSON 文件。
-    每个 INPUT / I-O 方向 FD 生成一个文件：{stem}_{fd_name}.json
-    内容为路径数 × 记录，每条只含该 FD 的入力字段值。
-    """
+def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, open_dir,
+                       term_types=None):
+    term_types = term_types or ['normal'] * len(records)
    input_fds = {}
    for fd_name, fds_set in fd_fields.items():
        direction = (open_dir or {}).get(fd_name, '')
@@ -101,9 +138,11 @@ def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, ope
    outdir.mkdir(parents=True, exist_ok=True)

    for fd_name, fds_set in input_fds.items():
-        fd_records = []
+        normals = []
+        abends = []
        direction = (open_dir or {}).get(fd_name, '')
-        for rec in records:
+        for i, rec in enumerate(records):
+            term = term_types[i] if i < len(term_types) else 'normal'
            fd_rec = {}
            for fname in fds_set:
                r = roles.get(fname, 'unused')
@@ -111,8 +150,12 @@ def output_input_files(records, outdir, stem, roles, fd_fields, field_to_fd, ope
                    if fname in rec:
                        fd_rec[fname] = rec[fname]
            if fd_rec:
-                fd_records.append(fd_rec)
+                if term == 'abend':
+                    abends.append(fd_rec)
+                else:
+                    normals.append(fd_rec)

-        outpath = outdir / f'{stem}_{fd_name}.json'
-        with open(outpath, 'w', encoding='utf-8') as f:
-            json.dump(fd_records, f, ensure_ascii=False, indent=2)
+        if normals:
+            _write_json(normals, outdir / f'{stem}_{fd_name}.json')
+        if abends:
+            _write_json(abends, outdir / f'{stem}_abend_{fd_name}.json')
@@ -1,9 +1,12 @@
-"""??????? + COPYBOOK + DATA DIVISION?? + PIC"""
+"""Preprocessor + COPYBOOK + DATA DIVISION parse + PIC"""

 import re
+import logging
 from pathlib import Path
 from lark import Lark, Transformer, v_args

+logger = logging.getLogger(__name__)
+
 from .models import FieldDef, PicInfo


@@ -85,6 +88,8 @@ def preprocess(source: str) -> str:
            if len(line) >= 7 and line[6].upper() == 'D':
                continue
            content = line[6:] if len(line) >= 7 else line
+            if content.strip().startswith('*'):
+                continue
        else:
            comment_pos = line.find('*>')
            if comment_pos >= 0:
@@ -192,6 +197,125 @@ def resolve_copybooks(source: str, source_dir: str, _recursion_depth: int = 0,
    return '\n'.join(result)


+# ── EXEC SQL INCLUDE Resolution ──
+
+_RE_SQL_INC = re.compile(
+    r'EXEC\s+SQL\s+INCLUDE\s+(\w[\w-]*)\s+END-EXEC\.',
+    re.IGNORECASE | re.DOTALL
+)
+
+_BUILTIN_SQLCA = """\
+        01 SQLCA.
+            05 SQLCAID      PIC X(8).
+            05 SQLCABC      PIC S9(9) COMP.
+            05 SQLCODE      PIC S9(9) COMP.
+            05 SQLERRM.
+                10 SQLERRML PIC S9(4) COMP.
+                10 SQLERRMC PIC X(70).
+            05 SQLERRP      PIC X(8).
+            05 SQLERRD      OCCURS 6 TIMES PIC S9(9) COMP.
+            05 SQLWARN.
+                10 SQLWARN0 PIC X.
+                10 SQLWARN1 PIC X.
+                10 SQLWARN2 PIC X.
+                10 SQLWARN3 PIC X.
+                10 SQLWARN4 PIC X.
+                10 SQLWARN5 PIC X.
+                10 SQLWARN6 PIC X.
+                10 SQLWARN7 PIC X.
+            05 SQLSTATE     PIC X(5).
+"""
+
+
+def resolve_sql_includes(source: str, source_dir: str) -> str:
+    """Resolve EXEC SQL INCLUDE name END-EXEC. like COPY. Injects built-in SQLCA if not found."""
+    def _resolve_one(m):
+        name = m.group(1).upper()
+        for ext in ('', '.cpy', '.CPY', '.cbl', '.CBL'):
+            p = Path(source_dir) / f"{name}{ext}"
+            if p.exists():
+                return p.read_text(encoding='utf-8')
+        if name == 'SQLCA':
+            return _BUILTIN_SQLCA
+        logger.warning(f"SQL INCLUDE {name} not found, injecting as comment")
+        return f"      * SQL INCLUDE {name} NOT RESOLVED\n"
+    while True:
+        new_source = _RE_SQL_INC.sub(_resolve_one, source)
+        if new_source == source:
+            break
+        source = new_source
+    return source
+
+
+_RE_SQL_BLOCK = re.compile(
+    r'EXEC\s+SQL\s+(.*?)\s+END-EXEC\.?',
+    re.IGNORECASE | re.DOTALL
+)
+
+_RE_DECLARE_TABLE = re.compile(
+    r'EXEC\s+SQL\s+DECLARE\s+(\w[\w-]*)\s+TABLE\s*\((.*?)\)\s+END-EXEC\.?',
+    re.IGNORECASE | re.DOTALL
+)
+
+
+def strip_exec_sql_from_data_div(source: str) -> tuple:
+    """Strip EXEC SQL blocks from DATA DIVISION. Returns (cleaned_source, declared_columns)."""
+    declared_columns = {}
+    def _repl(m):
+        full = m.group(0)
+        dm = _RE_DECLARE_TABLE.match(full)
+        if dm:
+            table_name = dm.group(1).upper()
+            col_text = dm.group(2)
+            cols = _parse_declare_table_columns(col_text)
+            declared_columns[table_name] = cols
+            return f"      *> DECLARE {table_name} TABLE ({len(cols)} cols)\n"
+        return "      *> SKIPPED EXEC SQL\n"
+    cleaned = _RE_SQL_BLOCK.sub(_repl, source)
+    return cleaned, declared_columns
+
+
+def _parse_declare_table_columns(col_text: str) -> list[dict]:
+    """Parse 'CUST_ID CHAR(5) NOT NULL, BALANCE PIC 9(6)' into column list."""
+    cols = []
+    for part in re.split(r',\s*', col_text):
+        part = part.strip()
+        if not part:
+            continue
+        m = re.match(
+            r'(\w[\w-]*)\s+(CHAR\s*\(\s*(\d+)\s*\)'
+            r'|VARCHAR\s*\(\s*(\d+)\s*\)'
+            r'|INTEGER|SMALLINT'
+            r'|DECIMAL\s*\(\s*(\d+)\s*(?:,\s*(\d+))?\s*\)'
+            r'|DATE'
+            r'|PIC\s+([\w().]+))'
+            r'(?:\s+NOT\s+NULL|\s+NULL)?',
+            part, re.IGNORECASE
+        )
+        if m:
+            name = m.group(1).upper()
+            if m.group(3):
+                col_type = {'db_type': 'CHAR', 'size': int(m.group(3))}
+            elif m.group(4):
+                col_type = {'db_type': 'VARCHAR', 'size': int(m.group(4))}
+            elif m.group(2).upper() == 'INTEGER':
+                col_type = {'db_type': 'INTEGER'}
+            elif m.group(2).upper() == 'SMALLINT':
+                col_type = {'db_type': 'SMALLINT'}
+            elif m.group(5):
+                prec = int(m.group(5)) if m.group(5) else 0
+                scale = int(m.group(6)) if m.group(6) else 0
+                col_type = {'db_type': 'DECIMAL', 'precision': prec, 'scale': scale}
+            elif m.group(2).upper() == 'DATE':
+                col_type = {'db_type': 'DATE'}
+            elif m.group(7):
+                col_type = {'db_type': 'PIC', 'pic': m.group(7).upper()}
+            else:
+                col_type = {'db_type': 'CHAR', 'size': 1}
+            cols.append({'name': name, **col_type})
+    return cols
+
+
 # 鈹€鈹€ Lark Grammar 鈹€鈹€

 _GRAMMAR_CACHE = None
@@ -464,7 +588,7 @@ def parse_file_control(source: str) -> dict:
    """Parse FILE-CONTROL paragraph.

    Returns dict:
-        {filename: {"assign_to": str, "organization": str | None}}
+        {filename: {"assign": str, "organization": str, "recording_mode": str}}
    """
    m = re.search(r'FILE-CONTROL\.(.*?)(?=DATA\s+DIVISION|\Z)', source, re.DOTALL | re.IGNORECASE)
    if not m:
@@ -472,21 +596,39 @@ def parse_file_control(source: str) -> dict:
    fc = m.group(1)
    result = {}
    for sel_m in re.finditer(
-        r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+(["\'])(.*?)\2',
+        r'SELECT\s+(\w[\w-]*)\s+[^.]*?\bASSIGN\s+TO\s+'
+        r'(?:(["\'])(.*?)\2|(\w[\w-]*))'
+        r'[^.]*\.',
        fc, re.IGNORECASE
    ):
-        fname = sel_m.group(1).upper()
-        assign_to = sel_m.group(3).upper()
-        # Extract ORGANIZATION clause within this SELECT statement
-        org_m = re.search(
-            r'ORGANIZATION\s+(?:IS\s+)?(\w[\w-]*)',
-            sel_m.group(0), re.IGNORECASE
-        )
-        org = org_m.group(1).upper() if org_m else None
-        result[fname] = {
-            "assign_to": assign_to,
-            "organization": org,
-        }
+        name = sel_m.group(1).upper()
+        if sel_m.group(2):
+            assign_to = sel_m.group(3).upper()
+        else:
+            assign_to = sel_m.group(4).upper()
+        clause = sel_m.group(0)
+        org_m = re.search(r'ORGANIZATION\s+(LINE\s+)?SEQUENTIAL', clause, re.IGNORECASE)
+        if org_m and org_m.group(1):
+            org = 'LINE SEQUENTIAL'
+        elif org_m:
+            org = 'SEQUENTIAL'
+        else:
+            org = 'SEQUENTIAL'
+        result[name] = {'assign': assign_to, 'organization': org, 'recording_mode': 'F'}
+    # Extract RECORDING MODE from FD blocks in FILE SECTION
+    fd_sec_m = re.search(r'FILE\s+SECTION\.(.*?)(?=WORKING-STORAGE\s+SECTION|LINKAGE\s+SECTION|\Z)',
+                          source, re.DOTALL | re.IGNORECASE)
+    if fd_sec_m:
+        fs = fd_sec_m.group(1)
+        for block in re.split(r'\n\s*(?=FD\s+)', fs.strip()):
+            fd_m = re.match(r'FD\s+(\w[\w-]*)', block, re.IGNORECASE)
+            if not fd_m:
+                continue
+            fd_name = fd_m.group(1).upper()
+            if fd_name in result:
+                rm_m = re.search(r'RECORDING\s+MODE\s+IS\s+(\w)', block, re.IGNORECASE)
+                if rm_m:
+                    result[fd_name]['recording_mode'] = rm_m.group(1).upper()
    return result


@@ -499,14 +641,12 @@ def parse_file_section(source: str) -> dict:
    fs = m.group(1)
    result = {}
    # FD 和 SD 条目
-    blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
-    for block in blocks:
+    fd_blocks = re.split(r'\n\s*(?=(?:FD|SD)\s+)', fs.strip())
+    for block in fd_blocks:
        m = re.match(r'(FD|SD)\s+(\w[\w-]*)', block, re.IGNORECASE)
        if not m:
            continue
-        entry_type = m.group(1).upper()  # "FD" or "SD"
        name = m.group(2).upper()
-        # 找 01 层记录
        recs = re.findall(r'^\s*0{0,1}1\s+(\w[\w-]*)', block, re.MULTILINE)
        result[name] = [r.upper() for r in recs]
    return result
@@ -521,11 +661,15 @@ def scan_open_statements(source: str) -> dict:
        source, re.IGNORECASE
    ):
        full = m.group(1)
-        for seg_m in re.finditer(
-            r'(INPUT|OUTPUT|I-O)\s+([\w\s-]+)', full, re.IGNORECASE
-        ):
-            direction = seg_m.group(1).upper()
-            for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
-                if fname.upper() not in ('INPUT', 'OUTPUT', 'I-O'):
+        full = re.sub(r'\s+', ' ', full)
+        tokens = re.split(r'\s+(?=(?:INPUT|OUTPUT|I-O)\s)', full)
+        for seg in tokens:
+            seg = seg.strip()
+            if not seg:
+                continue
+            seg_m = re.match(r'(INPUT|OUTPUT|I-O)\s+([\w -]+)', seg, re.IGNORECASE)
+            if seg_m:
+                direction = seg_m.group(1).upper()
+                for fname in re.findall(r'\w[\w-]*', seg_m.group(2)):
                    dirs[fname.upper()] = direction
    return dirs