chore: SETUP.md + 测试报告脚本 + 文档更新

- SETUP.md: 完整环境搭建指南(同事用)
- SETUP_QUICK.md: 快速搭环境(4步)
- s22~s26: TNA端到端、覆盖率报告、回归检查
- procedure_grammar.lark: 实验性Lark语法

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
NB-076
2026-06-25 08:50:17 +08:00
parent 56d1cf5e78
commit 50995d3335
25 changed files with 6861 additions and 0 deletions
+182
View File
@@ -0,0 +1,182 @@
"""S24: 全量最终报告 — 程序分类 + 测试基准 + 分支覆盖率 + 行覆盖率"""
import sys, os, re, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
ROOT_BENCH = "D:/cobol-java/cobol-test-programs/"
COPYBOOKS_BENCH = os.path.join(ROOT_BENCH, "common", "copybooks")
ROOT_TNA = "D:/cobol-java/cobol-tna-system/"
COPYBOOKS_TNA = os.path.join(ROOT_TNA, "cpy")
from cobol_testgen import extract_structure, generate_data
from cobol_testgen.read import preprocess, resolve_copybooks
from cobol_testgen.flatfile import analyze_fd_layout, write_all_files
def find_main(d):
cbls = [f for f in os.listdir(d) if f.endswith(".cbl")]
ws = [f for f in cbls if re.match(r"main-\d{2}-", f, re.IGNORECASE)]
if ws: return max(ws, key=lambda f: os.path.getsize(os.path.join(d, f)))
return max(cbls, key=lambda f: os.path.getsize(os.path.join(d, f))) if cbls else None
# ── Program classification based on directory/content ──
CLASS_MAP = {}
# Benchmark programs
CLASS_MAP["01-matching-1-1"] = {"type": "Matching", "subtype": "1:1照合", "benchmark": "S18/S19"}
CLASS_MAP["02-matching-1-N"] = {"type": "Matching", "subtype": "1:N照合", "benchmark": "S18/S19"}
CLASS_MAP["03-matching-N-1"] = {"type": "Matching", "subtype": "N:1照合", "benchmark": "S18/S19"}
CLASS_MAP["04-edit-getput"] = {"type": "Edit/Output", "subtype": "请求书编辑", "benchmark": "S18/S19"}
CLASS_MAP["05-branch-if"] = {"type": "ControlFlow", "subtype": "IF判定", "benchmark": "S18/S19"}
CLASS_MAP["06-branch-evaluate"] = {"type": "ControlFlow", "subtype": "EVALUATE多分岐", "benchmark": "S18/S19"}
CLASS_MAP["07-keybreak-summary"] = {"type": "KeyBreak", "subtype": "キーブレイク集計", "benchmark": "S18/S19"}
CLASS_MAP["08-keybreak-aggregate"] = {"type": "KeyBreak", "subtype": "キーブレイク集計2", "benchmark": "S18/S19"}
CLASS_MAP["09-db-update"] = {"type": "DB/SQL", "subtype": "DB更新", "benchmark": "S18/S19"}
CLASS_MAP["10-divide-50"] = {"type": "Division", "subtype": "50件分割", "benchmark": "S18/S19"}
CLASS_MAP["11-divide-25"] = {"type": "Division", "subtype": "25件分割", "benchmark": "S18/S19"}
CLASS_MAP["12-divide-100"] = {"type": "Division", "subtype": "100件分割", "benchmark": "S18/S19"}
CLASS_MAP["13-validation-nodup"] = {"type": "Validation", "subtype": "重複無チェック", "benchmark": "S18/S19"}
CLASS_MAP["14-online-cics"] = {"type": "CICS/Online", "subtype": "CICSオンライン", "benchmark": "S18/S19"}
CLASS_MAP["15-csv-fb-nolf"] = {"type": "CSV", "subtype": "CSV→FB改行無", "benchmark": "S18/S19"}
CLASS_MAP["16-matching-2stage-1-1"] = {"type": "Matching", "subtype": "2段階1:1照合", "benchmark": "S18/S19"}
CLASS_MAP["17-matching-2stage-N-1"] = {"type": "Matching", "subtype": "2段階N:1照合", "benchmark": "S18/S19"}
CLASS_MAP["18-matching-MN-to-M"] = {"type": "Matching", "subtype": "MN→M照合", "benchmark": "S18/S19"}
CLASS_MAP["19-matching-MN-to-N"] = {"type": "Matching", "subtype": "MN→N照合", "benchmark": "S18/S19"}
CLASS_MAP["20-matching-MN-to-MxN"] = {"type": "Matching", "subtype": "MN→MxN照合", "benchmark": "S18/S19"}
CLASS_MAP["21-csv-fb-lf"] = {"type": "CSV", "subtype": "CSV→FB改行有", "benchmark": "S18/S19"}
CLASS_MAP["22-matching-2stage-MN"] = {"type": "Matching", "subtype": "2段階MN照合", "benchmark": "S18/S19"}
CLASS_MAP["23-select-condition"] = {"type": "DB/SQL", "subtype": "条件抽出", "benchmark": "S18/S19"}
CLASS_MAP["24-table-search"] = {"type": "Table/Search", "subtype": "内部表検索", "benchmark": "S18/S19"}
CLASS_MAP["25-subprogram"] = {"type": "Subprogram", "subtype": "CALLサブプログラム", "benchmark": "S18/S19"}
CLASS_MAP["26-db-search"] = {"type": "DB/SQL", "subtype": "DB検索", "benchmark": "S18/S19"}
CLASS_MAP["27-validation-halfwidth"] = {"type": "Validation", "subtype": "半角チェック", "benchmark": "S18/S19"}
CLASS_MAP["28-sysin"] = {"type": "ControlFlow", "subtype": "SYSINパラメータ", "benchmark": "S18/S19"}
CLASS_MAP["29-ascii-ebcdic"] = {"type": "Encoding", "subtype": "ASCII/EBCDIC変換", "benchmark": "S18/S19"}
CLASS_MAP["30-keybreak-other"] = {"type": "KeyBreak", "subtype": "キーブレイク別", "benchmark": "S18/S19"}
CLASS_MAP["31-validation-withdup"] = {"type": "Validation", "subtype": "重複有チェック", "benchmark": "S18/S19"}
CLASS_MAP["32-mix-1N-samekeybreak"] = {"type": "Matching", "subtype": "混合1N同KEY", "benchmark": "S18/S19"}
CLASS_MAP["33-mix-1N-diffkeybreak"] = {"type": "Matching", "subtype": "混合1N別KEY", "benchmark": "S18/S19"}
CLASS_MAP["34-sort"] = {"type": "Sort/Merge", "subtype": "SORT処理", "benchmark": "S18/S19"}
CLASS_MAP["35-merge"] = {"type": "Sort/Merge", "subtype": "MERGE処理", "benchmark": "S18/S19"}
CLASS_MAP["36-billing-calc"] = {"type": "Division", "subtype": "料金計算", "benchmark": "S18/S19"}
CLASS_MAP["pipeline"] = {"type": "Pipeline", "subtype": "パイプラインドライバ", "benchmark": "S19"}
CLASS_MAP["ZAN01CHK"] = {"type": "Matching", "subtype": "残業申請振分", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN02CHK"] = {"type": "Validation", "subtype": "重複チェック", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN03CHK"] = {"type": "Matching", "subtype": "残業申請照合", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN04MAT"] = {"type": "Matching", "subtype": "残業実績照合", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN05CAL"] = {"type": "Division", "subtype": "残業計算", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN06UPD"] = {"type": "DB/SQL", "subtype": "DB更新処理", "benchmark": "S22/TNA"}
def analyze_one(name, fpath, source_dir, copybook_dirs):
result = {"name": name, "branches": 0, "covered": 0, "dpoints": 0, "records": 0,
"flat_files": 0, "lines": 0, "code_lines": 0, "error": "", "time_ms": 0}
try:
src = open(fpath, encoding="utf-8-sig").read()
result["lines"] = len(src.split("\n"))
result["code_lines"] = sum(1 for l in src.split("\n") if l.strip() and not l.strip().startswith("*"))
t0 = time.time()
st = extract_structure(src)
result["branches"] = st.get("total_branches", 0)
result["dpoints"] = len(st.get("decision_points", []))
# Pass RAW source to generate_data (it internally calls preprocess)
recs = generate_data(src, st)
result["records"] = len(recs)
cov = st.get("coverage", {})
result["covered"] = cov.get("covered", 0)
result["cov_total"] = cov.get("total", 0)
result["cov_pct"] = cov.get("pct", 0)
pp2 = preprocess(resolve_copybooks(src, source_dir, extra_search_paths=copybook_dirs))
layouts = analyze_fd_layout(pp2)
result["flat_files"] = len(layouts)
result["time_ms"] = int((time.time()-t0)*1000)
except Exception as e:
result["error"] = str(e)[:80]
return result
# ── Run ALL programs ──
print("=" * 130)
print("PROGRAM CLASSIFICATION & COVERAGE REPORT")
print("=" * 130)
print(f"{'Program':<28} {'Type':<16} {'Subtype':<18} {'Br':>4} {'Cov':>4} {'C%':>5} {'DPs':>4} {'Recs':>4} {'Flats':>4} {'CodeL':>5} {'Lns/Br':>6} {'Time':>6}")
print("-" * 130)
results = []
# Benchmark programs
for d in sorted(os.listdir(ROOT_BENCH)):
dp = os.path.join(ROOT_BENCH, d)
if not os.path.isdir(dp) or d in ("common","docs","cross-cutting"): continue
fn = find_main(dp)
if not fn: continue
r = analyze_one(d, os.path.join(dp, fn), dp, [COPYBOOKS_BENCH])
results.append(r)
cls = CLASS_MAP.get(d, {"type":"?", "subtype":"?"})
status = r.get("error","")[:10] if r.get("error") else ""
print(f" {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")
print("-" * 130)
# TNA programs
for f in ["ZAN01CHK","ZAN02CHK","ZAN03CHK","ZAN04MAT","ZAN05CAL","ZAN06UPD"]:
fpath = os.path.join(ROOT_TNA, "src", f + ".cbl")
if not os.path.exists(fpath): continue
r = analyze_one(f, fpath, os.path.join(ROOT_TNA, "src"), [COPYBOOKS_TNA])
results.append(r)
cls = CLASS_MAP.get(f, {"type":"?", "subtype":"?"})
status = r.get("error","")[:10] if r.get("error") else ""
print(f" {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")
print("=" * 130)
# ── Summary by classification ──
from collections import defaultdict
by_type = defaultdict(lambda: {"count":0, "branches":0, "covered":0, "records":0, "lines":0})
for r in results:
cls = CLASS_MAP.get(r["name"], {"type":"?"})
t = cls["type"]
by_type[t]["count"] += 1
by_type[t]["branches"] += r["branches"]
by_type[t]["covered"] += r.get("covered",0)
by_type[t]["records"] += r["records"]
by_type[t]["lines"] += r["code_lines"]
print(f"\n{'='*100}")
print("COVERAGE BY CLASSIFICATION")
print(f"{'='*100}")
print(f"{'Type':<20} {'Count':>5} {'Branches':>10} {'Covered':>8} {'Cov%':>6} {'Records':>8} {'CodeLines':>10}")
print(f"{'-'*70}")
for t, data in sorted(by_type.items(), key=lambda x: -x[1]["branches"]):
cov = data["covered"]/max(data["branches"],1)*100
print(f" {t:<20} {data['count']:>5} {data['branches']:>10} {data['covered']:>8} {cov:>5.0f}% {data['records']:>8} {data['lines']:>10}")
print(f"{'-'*70}")
# ── Totals ──
total_br = sum(r["branches"] for r in results)
total_cov = sum(r.get("covered",0) for r in results)
total_recs = sum(r["records"] for r in results)
total_lines = sum(r["code_lines"] for r in results)
total_flats = sum(r["flat_files"] for r in results)
total_time = sum(r.get("time_ms",0) for r in results)
with_br = sum(1 for r in results if r["branches"] > 0)
with_err = sum(1 for r in results if r.get("error"))
print(f"\n{'='*100}")
print("SYSTEM SUMMARY")
print(f"{'='*100}")
print(f" Total programs: {len(results)}")
print(f" With branch detection: {with_br}")
print(f" With errors: {with_err}")
print(f" Total decision branches: {total_br}")
print(f" Covered branches: {total_cov}")
print(f" Branch coverage rate: {total_cov/max(total_br,1)*100:.1f}%")
print(f" Total test records: {total_recs}")
print(f" Flat file layouts: {total_flats}")
print(f" Code lines (non-comment): {total_lines}")
print(f" Test density: {total_recs/total_lines:.2f} recs/code-line")
print(f" Total execution time: {total_time/1000:.1f}s")
print(f" Avg per program: {total_time/max(len(results),1)/1000:.2f}s")
print(f"{'='*100}")
print("NOTES:")
print(" Br = Static decision branches (2 per IF/EVAL/PERFORM)")
print(" Cov = Branches covered by generated test data")
print(" C% = Branch coverage rate")
print(" DPs = Decision points (IF/EVAL/PERFORM count)")
print(" Recs = Generated test data records")
print(" CodeL= Source lines (non-comment, non-empty)")
print(" Lns/Br = Code density (lines per decision branch)")
print(" All values are REAL from extract_structure + generate_data + mark_coverage")
print(f"{'='*100}")