chore: SETUP.md + 测试报告脚本 + 文档更新

- SETUP.md: 完整环境搭建指南（同事用） - SETUP_QUICK.md: 快速搭环境（4步） - s22~s26: TNA端到端、覆盖率报告、回归检查 - procedure_grammar.lark: 实验性Lark语法 Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-25 08:50:17 +08:00
parent 56d1cf5e78
commit 50995d3335
25 changed files with 6861 additions and 0 deletions
@@ -0,0 +1,182 @@
+"""S24: 全量最终报告 — 程序分类 + 测试基准 + 分支覆盖率 + 行覆盖率"""
+import sys, os, re, time
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+ROOT_BENCH = "D:/cobol-java/cobol-test-programs/"
+COPYBOOKS_BENCH = os.path.join(ROOT_BENCH, "common", "copybooks")
+ROOT_TNA = "D:/cobol-java/cobol-tna-system/"
+COPYBOOKS_TNA = os.path.join(ROOT_TNA, "cpy")
+
+from cobol_testgen import extract_structure, generate_data
+from cobol_testgen.read import preprocess, resolve_copybooks
+from cobol_testgen.flatfile import analyze_fd_layout, write_all_files
+
+def find_main(d):
+    cbls = [f for f in os.listdir(d) if f.endswith(".cbl")]
+    ws = [f for f in cbls if re.match(r"main-\d{2}-", f, re.IGNORECASE)]
+    if ws: return max(ws, key=lambda f: os.path.getsize(os.path.join(d, f)))
+    return max(cbls, key=lambda f: os.path.getsize(os.path.join(d, f))) if cbls else None
+
+# ── Program classification based on directory/content ──
+CLASS_MAP = {}
+# Benchmark programs
+CLASS_MAP["01-matching-1-1"] = {"type": "Matching", "subtype": "1:1照合", "benchmark": "S18/S19"}
+CLASS_MAP["02-matching-1-N"] = {"type": "Matching", "subtype": "1:N照合", "benchmark": "S18/S19"}
+CLASS_MAP["03-matching-N-1"] = {"type": "Matching", "subtype": "N:1照合", "benchmark": "S18/S19"}
+CLASS_MAP["04-edit-getput"] = {"type": "Edit/Output", "subtype": "请求书编辑", "benchmark": "S18/S19"}
+CLASS_MAP["05-branch-if"] = {"type": "ControlFlow", "subtype": "IF判定", "benchmark": "S18/S19"}
+CLASS_MAP["06-branch-evaluate"] = {"type": "ControlFlow", "subtype": "EVALUATE多分岐", "benchmark": "S18/S19"}
+CLASS_MAP["07-keybreak-summary"] = {"type": "KeyBreak", "subtype": "キーブレイク集計", "benchmark": "S18/S19"}
+CLASS_MAP["08-keybreak-aggregate"] = {"type": "KeyBreak", "subtype": "キーブレイク集計2", "benchmark": "S18/S19"}
+CLASS_MAP["09-db-update"] = {"type": "DB/SQL", "subtype": "DB更新", "benchmark": "S18/S19"}
+CLASS_MAP["10-divide-50"] = {"type": "Division", "subtype": "50件分割", "benchmark": "S18/S19"}
+CLASS_MAP["11-divide-25"] = {"type": "Division", "subtype": "25件分割", "benchmark": "S18/S19"}
+CLASS_MAP["12-divide-100"] = {"type": "Division", "subtype": "100件分割", "benchmark": "S18/S19"}
+CLASS_MAP["13-validation-nodup"] = {"type": "Validation", "subtype": "重複無チェック", "benchmark": "S18/S19"}
+CLASS_MAP["14-online-cics"] = {"type": "CICS/Online", "subtype": "CICSオンライン", "benchmark": "S18/S19"}
+CLASS_MAP["15-csv-fb-nolf"] = {"type": "CSV", "subtype": "CSV→FB改行無", "benchmark": "S18/S19"}
+CLASS_MAP["16-matching-2stage-1-1"] = {"type": "Matching", "subtype": "2段階1:1照合", "benchmark": "S18/S19"}
+CLASS_MAP["17-matching-2stage-N-1"] = {"type": "Matching", "subtype": "2段階N:1照合", "benchmark": "S18/S19"}
+CLASS_MAP["18-matching-MN-to-M"] = {"type": "Matching", "subtype": "MN→M照合", "benchmark": "S18/S19"}
+CLASS_MAP["19-matching-MN-to-N"] = {"type": "Matching", "subtype": "MN→N照合", "benchmark": "S18/S19"}
+CLASS_MAP["20-matching-MN-to-MxN"] = {"type": "Matching", "subtype": "MN→MxN照合", "benchmark": "S18/S19"}
+CLASS_MAP["21-csv-fb-lf"] = {"type": "CSV", "subtype": "CSV→FB改行有", "benchmark": "S18/S19"}
+CLASS_MAP["22-matching-2stage-MN"] = {"type": "Matching", "subtype": "2段階MN照合", "benchmark": "S18/S19"}
+CLASS_MAP["23-select-condition"] = {"type": "DB/SQL", "subtype": "条件抽出", "benchmark": "S18/S19"}
+CLASS_MAP["24-table-search"] = {"type": "Table/Search", "subtype": "内部表検索", "benchmark": "S18/S19"}
+CLASS_MAP["25-subprogram"] = {"type": "Subprogram", "subtype": "CALLサブプログラム", "benchmark": "S18/S19"}
+CLASS_MAP["26-db-search"] = {"type": "DB/SQL", "subtype": "DB検索", "benchmark": "S18/S19"}
+CLASS_MAP["27-validation-halfwidth"] = {"type": "Validation", "subtype": "半角チェック", "benchmark": "S18/S19"}
+CLASS_MAP["28-sysin"] = {"type": "ControlFlow", "subtype": "SYSINパラメータ", "benchmark": "S18/S19"}
+CLASS_MAP["29-ascii-ebcdic"] = {"type": "Encoding", "subtype": "ASCII/EBCDIC変換", "benchmark": "S18/S19"}
+CLASS_MAP["30-keybreak-other"] = {"type": "KeyBreak", "subtype": "キーブレイク別", "benchmark": "S18/S19"}
+CLASS_MAP["31-validation-withdup"] = {"type": "Validation", "subtype": "重複有チェック", "benchmark": "S18/S19"}
+CLASS_MAP["32-mix-1N-samekeybreak"] = {"type": "Matching", "subtype": "混合1N同KEY", "benchmark": "S18/S19"}
+CLASS_MAP["33-mix-1N-diffkeybreak"] = {"type": "Matching", "subtype": "混合1N別KEY", "benchmark": "S18/S19"}
+CLASS_MAP["34-sort"] = {"type": "Sort/Merge", "subtype": "SORT処理", "benchmark": "S18/S19"}
+CLASS_MAP["35-merge"] = {"type": "Sort/Merge", "subtype": "MERGE処理", "benchmark": "S18/S19"}
+CLASS_MAP["36-billing-calc"] = {"type": "Division", "subtype": "料金計算", "benchmark": "S18/S19"}
+CLASS_MAP["pipeline"] = {"type": "Pipeline", "subtype": "パイプラインドライバ", "benchmark": "S19"}
+CLASS_MAP["ZAN01CHK"] = {"type": "Matching", "subtype": "残業申請振分", "benchmark": "S22/TNA"}
+CLASS_MAP["ZAN02CHK"] = {"type": "Validation", "subtype": "重複チェック", "benchmark": "S22/TNA"}
+CLASS_MAP["ZAN03CHK"] = {"type": "Matching", "subtype": "残業申請照合", "benchmark": "S22/TNA"}
+CLASS_MAP["ZAN04MAT"] = {"type": "Matching", "subtype": "残業実績照合", "benchmark": "S22/TNA"}
+CLASS_MAP["ZAN05CAL"] = {"type": "Division", "subtype": "残業計算", "benchmark": "S22/TNA"}
+CLASS_MAP["ZAN06UPD"] = {"type": "DB/SQL", "subtype": "DB更新処理", "benchmark": "S22/TNA"}
+
+def analyze_one(name, fpath, source_dir, copybook_dirs):
+    result = {"name": name, "branches": 0, "covered": 0, "dpoints": 0, "records": 0,
+              "flat_files": 0, "lines": 0, "code_lines": 0, "error": "", "time_ms": 0}
+    try:
+        src = open(fpath, encoding="utf-8-sig").read()
+        result["lines"] = len(src.split("\n"))
+        result["code_lines"] = sum(1 for l in src.split("\n") if l.strip() and not l.strip().startswith("*"))
+        t0 = time.time()
+        st = extract_structure(src)
+        result["branches"] = st.get("total_branches", 0)
+        result["dpoints"] = len(st.get("decision_points", []))
+        # Pass RAW source to generate_data (it internally calls preprocess)
+        recs = generate_data(src, st)
+        result["records"] = len(recs)
+        cov = st.get("coverage", {})
+        result["covered"] = cov.get("covered", 0)
+        result["cov_total"] = cov.get("total", 0)
+        result["cov_pct"] = cov.get("pct", 0)
+        pp2 = preprocess(resolve_copybooks(src, source_dir, extra_search_paths=copybook_dirs))
+        layouts = analyze_fd_layout(pp2)
+        result["flat_files"] = len(layouts)
+        result["time_ms"] = int((time.time()-t0)*1000)
+    except Exception as e:
+        result["error"] = str(e)[:80]
+    return result
+
+# ── Run ALL programs ──
+print("=" * 130)
+print("PROGRAM CLASSIFICATION & COVERAGE REPORT")
+print("=" * 130)
+print(f"{'Program':<28} {'Type':<16} {'Subtype':<18} {'Br':>4} {'Cov':>4} {'C%':>5} {'DPs':>4} {'Recs':>4} {'Flats':>4} {'CodeL':>5} {'Lns/Br':>6} {'Time':>6}")
+print("-" * 130)
+
+results = []
+# Benchmark programs
+for d in sorted(os.listdir(ROOT_BENCH)):
+    dp = os.path.join(ROOT_BENCH, d)
+    if not os.path.isdir(dp) or d in ("common","docs","cross-cutting"): continue
+    fn = find_main(dp)
+    if not fn: continue
+    r = analyze_one(d, os.path.join(dp, fn), dp, [COPYBOOKS_BENCH])
+    results.append(r)
+    cls = CLASS_MAP.get(d, {"type":"?", "subtype":"?"})
+    status = r.get("error","")[:10] if r.get("error") else ""
+    print(f"  {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")
+
+print("-" * 130)
+# TNA programs
+for f in ["ZAN01CHK","ZAN02CHK","ZAN03CHK","ZAN04MAT","ZAN05CAL","ZAN06UPD"]:
+    fpath = os.path.join(ROOT_TNA, "src", f + ".cbl")
+    if not os.path.exists(fpath): continue
+    r = analyze_one(f, fpath, os.path.join(ROOT_TNA, "src"), [COPYBOOKS_TNA])
+    results.append(r)
+    cls = CLASS_MAP.get(f, {"type":"?", "subtype":"?"})
+    status = r.get("error","")[:10] if r.get("error") else ""
+    print(f"  {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")
+
+print("=" * 130)
+
+# ── Summary by classification ──
+from collections import defaultdict
+by_type = defaultdict(lambda: {"count":0, "branches":0, "covered":0, "records":0, "lines":0})
+for r in results:
+    cls = CLASS_MAP.get(r["name"], {"type":"?"})
+    t = cls["type"]
+    by_type[t]["count"] += 1
+    by_type[t]["branches"] += r["branches"]
+    by_type[t]["covered"] += r.get("covered",0)
+    by_type[t]["records"] += r["records"]
+    by_type[t]["lines"] += r["code_lines"]
+
+print(f"\n{'='*100}")
+print("COVERAGE BY CLASSIFICATION")
+print(f"{'='*100}")
+print(f"{'Type':<20} {'Count':>5} {'Branches':>10} {'Covered':>8} {'Cov%':>6} {'Records':>8} {'CodeLines':>10}")
+print(f"{'-'*70}")
+for t, data in sorted(by_type.items(), key=lambda x: -x[1]["branches"]):
+    cov = data["covered"]/max(data["branches"],1)*100
+    print(f"  {t:<20} {data['count']:>5} {data['branches']:>10} {data['covered']:>8} {cov:>5.0f}% {data['records']:>8} {data['lines']:>10}")
+print(f"{'-'*70}")
+
+# ── Totals ──
+total_br = sum(r["branches"] for r in results)
+total_cov = sum(r.get("covered",0) for r in results)
+total_recs = sum(r["records"] for r in results)
+total_lines = sum(r["code_lines"] for r in results)
+total_flats = sum(r["flat_files"] for r in results)
+total_time = sum(r.get("time_ms",0) for r in results)
+with_br = sum(1 for r in results if r["branches"] > 0)
+with_err = sum(1 for r in results if r.get("error"))
+print(f"\n{'='*100}")
+print("SYSTEM SUMMARY")
+print(f"{'='*100}")
+print(f"  Total programs:           {len(results)}")
+print(f"  With branch detection:    {with_br}")
+print(f"  With errors:              {with_err}")
+print(f"  Total decision branches:  {total_br}")
+print(f"  Covered branches:         {total_cov}")
+print(f"  Branch coverage rate:     {total_cov/max(total_br,1)*100:.1f}%")
+print(f"  Total test records:       {total_recs}")
+print(f"  Flat file layouts:        {total_flats}")
+print(f"  Code lines (non-comment): {total_lines}")
+print(f"  Test density:             {total_recs/total_lines:.2f} recs/code-line")
+print(f"  Total execution time:     {total_time/1000:.1f}s")
+print(f"  Avg per program:          {total_time/max(len(results),1)/1000:.2f}s")
+print(f"{'='*100}")
+print("NOTES:")
+print("  Br   = Static decision branches (2 per IF/EVAL/PERFORM)")
+print("  Cov  = Branches covered by generated test data")
+print("  C%   = Branch coverage rate")
+print("  DPs  = Decision points (IF/EVAL/PERFORM count)")
+print("  Recs = Generated test data records")
+print("  CodeL= Source lines (non-comment, non-empty)")
+print("  Lns/Br = Code density (lines per decision branch)")
+print("  All values are REAL from extract_structure + generate_data + mark_coverage")
+print(f"{'='*100}")