merge local cobol_testgen improvements into v3 shared modules

- cond.py: SQLCODE/SQLSTATE handling, alphanumeric >/< boundary fix - output.py: termination tracking, db_input support, _is_field_assigned filter - coverage.py: mark_from_gcov, THRU support, KeyError protection - gcov.py: new file (dependency for coverage.py) - grammar.lark: multi-segment PIC support - read.py: SQL INCLUDE resolution, DECLARE TABLE parsing, * comment fix - core.py: SQL parsing, blocked_names, keyword list - design.py: multi-sentinel, THRU ranges, PERFORM VARYING last iteration - __init__.py: local main() + v3 API functions, guarded imports All 6 ZAN programs verified passing through v3 pipeline
2026-06-23 22:38:17 +08:00
parent e5ab3baa46
commit 7fb9304212
9 changed files with 1595 additions and 326 deletions
@@ -8,6 +8,7 @@ from pathlib import Path
 logger = logging.getLogger(__name__)
 from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, CondLeaf
 from .cond import parse_single_condition, parse_compound_condition, is_field, collect_leaves, evaluate_tree
+from .gcov import mark_from_gcov


 # ── 数据模型 ──
@@ -190,11 +191,14 @@ def _mark_if(dp, cons):
                if _match_leaf(c, leaf):
                    assignment[leaf] = c[3]
                    break
-        if len(assignment) == len(dp.cond_leaves):
-            if evaluate_tree(dp.cond_tree, assignment):
-                dp.active_branches.add('T')
-            else:
-                dp.active_branches.add('F')
+        if assignment:
+            try:
+                if evaluate_tree(dp.cond_tree, assignment):
+                    dp.active_branches.add('T')
+                else:
+                    dp.active_branches.add('F')
+            except KeyError:
+                pass
    else:
        matched = 0
        for leaf in dp.leaves:
@@ -253,6 +257,15 @@ def _mark_eval(dp, cons, fields=None):
                dp.active_branches.add(name)
        elif c[0] == dp.label and c[1] == 'not_in':
            dp.active_branches.add('OTHER')
+    thru_lows = {c[2] for c in cons if c[0] == dp.label and c[1] == '>=' and c[3]}
+    thru_highs = {c[2] for c in cons if c[0] == dp.label and c[1] == '<=' and c[3]}
+    if thru_lows or thru_highs:
+        for when_val, _ in dp.when_list:
+            thru_m = re.match(r'^(\d+)\s+THRU\s+(\d+)$', str(when_val), re.IGNORECASE)
+            if thru_m and thru_m.group(1) in thru_lows and thru_m.group(2) in thru_highs:
+                name = f"WHEN {when_val}"
+                if name in dp.branch_names:
+                    dp.active_branches.add(name)


 def _mark_search(dp, cons, fields=None):
@@ -309,11 +322,14 @@ def _mark_perform(dp, cons):
                if _match_leaf(c, leaf):
                    assignment[leaf] = c[3]
                    break
-        if len(assignment) == len(dp.cond_leaves):
-            if evaluate_tree(dp.cond_tree, assignment):
-                dp.active_branches.add('Skip')
-            else:
-                dp.active_branches.add('Enter')
+        if assignment:
+            try:
+                if evaluate_tree(dp.cond_tree, assignment):
+                    dp.active_branches.add('Skip')
+                else:
+                    dp.active_branches.add('Enter')
+            except KeyError:
+                pass
    else:
        for c in cons:
            if c[0] == dp.label or any(c[0] == f for f in _get_fields_in_cond(dp.label)):
@@ -330,7 +346,6 @@ def _get_fields_in_cond(cond_text):
 # ── 行号定位（基于原始源文本）──

 def locate_decision_lines(decision_points, raw_source):
-    """在原始源文本中搜索每个决策点的近似行号"""
    lines = raw_source.upper().splitlines()
    for dp in decision_points:
        patterns = _build_search_patterns(dp)
@@ -344,7 +359,6 @@ def locate_decision_lines(decision_points, raw_source):


 def _normalize(text):
-    """标准化条件文本用于比较：去多余空白、标准化引号"""
    t = re.sub(r'\s+', ' ', text).strip()
    t = t.replace('"', "'")
    return t
@@ -360,14 +374,13 @@ def _build_search_patterns(dp):
        texts.append((r'\bUNTIL\b', dp.condition if hasattr(dp, 'condition') else dp.label
                       if dp.label else ''))
    else:
-        return [r'$^']  # 永不匹配
+        return [r'$^']

    patterns = []
    for keyword, condition in texts:
        if not condition:
            continue
        norm_cond = _normalize(condition)
-        # 转义正则特殊字符，但保留空格（替换为\s+）
        esc = re.escape(norm_cond)
        esc = esc.replace(r'\ ', r'\s+')
        esc = esc.replace(r'\'', r"['\"]")
@@ -411,7 +424,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  }}
  .section h2 {{ font-size: 16px; font-weight: 600; color: #1a237e; margin-bottom: 16px; padding-bottom: 8px; border-bottom: 2px solid #e8eaf6; }}

-  /* 统计卡片行 */
  .stats-row {{ display: flex; gap: 16px; flex-wrap: wrap; }}
  .stat-card {{
    flex: 1; min-width: 140px; background: #f5f7fa; border-radius: 8px; padding: 14px 18px;
@@ -430,7 +442,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .dot-red {{ background: #ffcdd2; }}
  .dot-amber {{ background: #fff9c4; }}

-  /* 进度条 */
  .prog-bar-detail {{
    width: 100%; height: 12px; border-radius: 6px; background: #ffcdd2; overflow: hidden; margin: 10px 0 6px 0;
  }}
@@ -440,20 +451,17 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .prog-fill-detail.amber {{ background: linear-gradient(90deg, #ffca28, #ff8f00); }}
  .prog-fill-detail.red {{ background: linear-gradient(90deg, #ef5350, #ff1744); }}

-  /* 表格 */
  table {{ width: 100%; border-collapse: collapse; table-layout: fixed; }}
  th, td {{ padding: 10px 14px; text-align: left; border-bottom: 1px solid #eceff1; word-break: break-all; }}
  th {{ background: #f5f7fa; font-weight: 600; font-size: 12px; color: #78909c; text-transform: uppercase; letter-spacing: 0.5px; }}
  tbody tr:hover {{ background: #e8eaf6; }}
  tbody tr:last-child td {{ border-bottom: none; }}

-  /* 决策表列宽 */
  .dp-table th:nth-child(1), .dp-table td:nth-child(1) {{ width: 50px; }}
  .dp-table th:nth-child(2), .dp-table td:nth-child(2) {{ width: 70px; }}
  .dp-table th:nth-child(3), .dp-table td:nth-child(3) {{ width: 50px; }}
  .dp-table th:nth-child(5), .dp-table td:nth-child(5) {{ width: 160px; }}

-  /* 叶条件表列宽 */
  .leaf-table th:nth-child(1), .leaf-table td:nth-child(1) {{ width: 110px; }}
  .leaf-table th:nth-child(2), .leaf-table td:nth-child(2) {{ width: 60px; }}
  .leaf-table th:nth-child(4), .leaf-table td:nth-child(4),
@@ -468,7 +476,6 @@ _DETAIL_HTML = '''<!DOCTYPE html>
  .cond-ok {{ color: #00c853; }}
  .cond-miss {{ color: #ff5252; }}

-  /* 源码 */
  .source-section {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; }}
  .source-line {{ display: flex; padding: 1px 0; }}
  .source-line:hover {{ background: #f5f5f5; }}
@@ -534,20 +541,22 @@ _DETAIL_HTML = '''<!DOCTYPE html>

  {source_section}

+  {source_note}
+
 </div>
 </body>
 </html>'''


 def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
-                         filename='', index_relpath=None, covered_lines=None):
+                         filename='', index_relpath=None, covered_lines=None,
+                         source_note=''):
    title = f"覆盖率报告 — {filename}" if filename else "覆盖率报告"

    total_branches = sum(len(dp.branch_names) for dp in decision_points)
    covered_branches = sum(len(dp.active_branches) for dp in decision_points)
    implied_branches = sum(len(dp.implied_branches) for dp in decision_points)
    if covered_lines:
-        # 无分支程序：隐式 100%
        total_branches = max(total_branches, 1)
        covered_branches = max(covered_branches, 1)

@@ -555,15 +564,13 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    covered_leaves = (sum(1 for l in leaf_stats if l.covered_true) +
                      sum(1 for l in leaf_stats if l.covered_false))

-    # 计算数值
-    is_implicit = bool(covered_lines)  # 无分支程序，隐式 100%
+    is_implicit = bool(covered_lines)
    dec_pct_val = (covered_branches / total_branches * 100) if total_branches else 0
    dec_pct_text = "100% ✓" if is_implicit else (f"{dec_pct_val:.1f}%" if total_branches else "无")
    dec_frac = "全部覆盖" if is_implicit else (f"{covered_branches}/{total_branches}" if total_branches else "—")
    cond_frac = f"{covered_leaves}/{total_leaves}" if total_leaves else "—"
    implied_text = f'（+{implied_branches - covered_branches} 推断）' if implied_branches > covered_branches else ''

-    # 颜色
    if is_implicit or not total_branches or dec_pct_val >= 100:
        dec_val_cls = 'val-green'
        bar_cls = ''
@@ -581,7 +588,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        cond_val_cls = 'val-red'

-    # 决策点表格
    if decision_points:
        dp_rows = []
        for dp in decision_points:
@@ -608,7 +614,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        decision_table = ''

-    # 叶条件表格
    if leaf_stats:
        leaf_rows = []
        for leaf in leaf_stats:
@@ -627,7 +632,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
    else:
        leaf_table = ''

-    # 源码标注
    if source_lines:
        line_cov = {}
        for dp in decision_points:
@@ -643,7 +647,6 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
                else:
                    line_cov[dp.source_line].append('hl-amber')

-        # 无分支程序：所有 PD 行标记为已覆盖
        if covered_lines:
            for ln in covered_lines:
                line_cov.setdefault(ln, []).append('hl-green')
@@ -677,6 +680,7 @@ def generate_html_report(decision_points, leaf_stats, source_lines, outpath,
        leaf_table=leaf_table,
        source_section=source_section,
        dp_count_text=('—' if is_implicit else str(len(decision_points))),
+        source_note=source_note,
    )

    outpath = Path(outpath)
@@ -699,7 +703,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
    background: #f0f2f5; color: #37474f; font-size: 14px; line-height: 1.6;
  }}

-  /* 顶栏 */
  .topbar {{
    background: linear-gradient(135deg, #1a237e, #283593);
    color: #fff; padding: 18px 32px;
@@ -711,7 +714,6 @@ _INDEX_HTML = '''<!DOCTYPE html>

  .container {{ max-width: 1200px; margin: 0 auto; padding: 28px 24px; }}

-  /* 统计卡片 */
  .cards {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; margin-bottom: 28px; }}
  .card {{
    background: #fff; border-radius: 10px; padding: 20px 22px;
@@ -725,7 +727,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .num-red {{ color: #ff1744; }}
  .num-blue {{ color: #1a237e; }}

-  /* 图表行 */
  .charts-row {{
    display: flex; gap: 32px; justify-content: center; flex-wrap: wrap;
    background: #fff; border-radius: 10px; padding: 28px 20px;
@@ -744,7 +745,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .legend .dot-red {{ background: #ff5252; }}
  .legend .dot-amber {{ background: #ffd740; }}

-  /* 工具栏 */
  .toolbar {{
    display: flex; justify-content: space-between; align-items: center;
    margin-bottom: 14px; flex-wrap: wrap; gap: 10px;
@@ -764,7 +764,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .toolbar .sort-btn:hover {{ background: #eceff1; }}
  .toolbar .sort-btn.active {{ background: #e8eaf6; border-color: #3f51b5; color: #1a237e; font-weight: 500; }}

-  /* 表格 */
  .table-wrap {{
    background: #fff; border-radius: 10px; overflow: hidden;
    box-shadow: 0 1px 4px rgba(0,0,0,0.06);
@@ -789,7 +788,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .prog-name a {{ color: #283593; text-decoration: none; }}
  .prog-name a:hover {{ text-decoration: underline; color: #1a237e; }}

-  /* 进度条 */
  .prog-wrap {{
    display: inline-flex; align-items: center; gap: 10px; width: 100%;
  }}
@@ -812,7 +810,6 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .prog-fill.full {{ border-radius: 10px; }}
  .prog-text {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; white-space: nowrap; min-width: 48px; }}

-  /* 状态徽标 */
  .badge {{
    display: inline-block; padding: 3px 10px; border-radius: 12px;
    font-size: 12px; font-weight: 600; letter-spacing: 0.3px;
@@ -821,10 +818,8 @@ _INDEX_HTML = '''<!DOCTYPE html>
  .badge-warn {{ background: #fff8e1; color: #e65100; }}
  .badge-fail {{ background: #ffebee; color: #c62828; }}

-  /* 条件覆盖列 */
  .cond-cell {{ font-family: "Cascadia Code","Fira Code","JetBrains Mono",Consolas,monospace; font-size: 13px; }}

-  /* 响应式 */
  @media (max-width: 680px) {{
    .topbar {{ flex-direction: column; align-items: flex-start; gap: 6px; padding: 14px 18px; }}
    .container {{ padding: 16px 12px; }}
@@ -968,7 +963,6 @@ function filterTable() {{


 def _ring_svg(pct, color_stops):
-    """生成 SVG 圆环 HTML。pct: 0-100 浮点数。"""
    r = 54
    circ = 2 * 3.14159265 * r
    offset = circ * (1 - pct / 100) if pct > 0 else circ
@@ -995,7 +989,6 @@ def _ring_svg(pct, color_stops):


 def generate_coverage_index(programs, outdir):
-    """生成覆盖率总括索引页。"""
    from datetime import datetime
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')

@@ -1038,7 +1031,6 @@ def generate_coverage_index(programs, outdir):
        cond_text = f"{cc}/{tc}" if tc else "—"
        bar_pct = int(pct_dec)

-        # 进度条颜色
        if imp or pct_dec >= 100:
            bar_cls = ''
        elif pct_dec >= 80:
@@ -1046,7 +1038,6 @@ def generate_coverage_index(programs, outdir):
        else:
            bar_cls = ' red'

-        # 状态徽标
        if tb == 0 or (cb == tb and not (ib > cb)):
            badge = '<span class="badge badge-pass">&#10003; 完全</span>'
        elif cb == tb and ib > cb:
@@ -1056,7 +1047,6 @@ def generate_coverage_index(programs, outdir):
        else:
            badge = '<span class="badge badge-fail">&#10007; 欠缺</span>'

-        # 条件覆盖数字颜色
        if tc:
            cond_pct = cc / tc * 100
            cond_color = 'num-green' if cond_pct == 100 else ('num-amber' if cond_pct >= 80 else 'num-red')
@@ -1107,7 +1097,6 @@ def generate_coverage_index(programs, outdir):
 # ── PROCEDURE DIVISION 行范围定位（用于无分支程序标记）──

 def _find_proc_range(raw_source: str):
-    """返回 PROCEDURE DIVISION 的行范围 (start_line, end_line) 1-indexed，或 None。"""
    lines = raw_source.splitlines()
    proc_start = None
    for i, line in enumerate(lines):
@@ -1116,26 +1105,36 @@ def _find_proc_range(raw_source: str):
            break
    if proc_start is None:
        return None
-    # 找下一个 DIVISION 作为结束边界（或文件尾）
    for i in range(proc_start, len(lines)):
        if re.search(r'(IDENTIFICATION|DATA|ENVIRONMENT)\s+DIVISION', lines[i].upper()):
-            return (proc_start, i)  # 不包含下一个 DIVISION
+            return (proc_start, i)
    return (proc_start, len(lines) + 1)


 # ── 接入入口 ──

 def run_coverage(branch_tree, branch_paths_with_assigns, fields,
-                 raw_source, output_prefix, index_relpath=None):
-    """完整覆盖率流程：收集 → 标记 → 定位 → 输出。
-
-    Returns:
-        dict: 汇总数据，用于总括页聚合
-    """
+                 raw_source, output_prefix, index_relpath=None,
+                 gcov_data=None):
    decision_points, leaf_stats = collect_decision_points(branch_tree, fields)

    mark_coverage(decision_points, leaf_stats, branch_paths_with_assigns, fields)

+    if gcov_data:
+        mark_from_gcov(decision_points, gcov_data, branch_tree)
+        for ls in leaf_stats:
+            ls.covered_true = False
+            ls.covered_false = False
+
+    _source_note = ''
+    if gcov_data:
+        _source_note = (
+            '<div style="margin-top:16px;font-size:12px;color:#90a4ae;'
+            'text-align:center;border-top:1px solid #eceff1;padding-top:12px;">'
+            '覆盖率基于 gcov 运行时数据'
+            '</div>'
+        )
+
    if raw_source:
        locate_decision_lines(decision_points, raw_source)

@@ -1146,7 +1145,6 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
                     sum(1 for l in leaf_stats if l.covered_false))
    leaf_total = len(leaf_stats) * 2

-    # 无决策点但有路径 → PROCEDURE DIVISION 全部覆盖
    covered_lines = set()
    if total == 0 and branch_paths_with_assigns and raw_source:
        proc_range = _find_proc_range(raw_source)
@@ -1161,9 +1159,9 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
                             f"{output_prefix}_coverage.html",
                             Path(output_prefix).stem,
                             index_relpath=index_relpath,
-                             covered_lines=covered_lines)
+                             covered_lines=covered_lines,
+                             source_note=_source_note)

-    # 控制台摘要
    if total or leaf_total:
        logger.info(f"\n=== 分支覆盖率 ===")
        if covered_lines and not decision_points:
@@ -1194,7 +1192,7 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,
    implicit_100 = bool(covered_lines)
    return {
        'name': Path(output_prefix).stem if output_prefix else '',
-        'detail_relpath': ('../' + Path(output_prefix).stem + '_coverage.html'
+        'detail_relpath': (Path(output_prefix).stem + '_coverage.html'
                           if output_prefix else ''),
        'total_branches': total,
        'covered_branches': covered,
@@ -1208,15 +1206,6 @@ def run_coverage(branch_tree, branch_paths_with_assigns, fields,


 def check_coverage(structure: dict, test_records: list[dict]) -> dict:
-    """报告 COBOL 源码的静态分支结构信息。
-
-    注意: 静态分析无法精确判断每条测试数据运行时覆盖了哪些分支。
-    精确的路径追踪依赖 gcov（Phase 3）。此处仅报告总分支数和记录生成情况。
-
-    Returns:
-        dict with: paragraph_rate, branch_rate, decision_rate, total_branches,
-                   total_paragraphs, records_count, note
-    """
    total_paragraphs = structure.get("total_paragraphs", 0)
    total_branches = structure.get("total_branches", 0)
    decision_points = structure.get("decision_points", [])