cobol-java-v3/test-data/s24_final_report.py

"""S24: 全量最终报告 — 程序分类 + 测试基准 + 分支覆盖率 + 行覆盖率"""
import sys, os, re, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

ROOT_BENCH = "D:/cobol-java/cobol-test-programs/"
COPYBOOKS_BENCH = os.path.join(ROOT_BENCH, "common", "copybooks")
ROOT_TNA = "D:/cobol-java/cobol-tna-system/"
COPYBOOKS_TNA = os.path.join(ROOT_TNA, "cpy")

from cobol_testgen import extract_structure, generate_data
from cobol_testgen.read import preprocess, resolve_copybooks
from cobol_testgen.flatfile import analyze_fd_layout, write_all_files

def find_main(d):
    cbls = [f for f in os.listdir(d) if f.endswith(".cbl")]
    ws = [f for f in cbls if re.match(r"main-\d{2}-", f, re.IGNORECASE)]
    if ws: return max(ws, key=lambda f: os.path.getsize(os.path.join(d, f)))
    return max(cbls, key=lambda f: os.path.getsize(os.path.join(d, f))) if cbls else None

# ── Program classification based on directory/content ──
CLASS_MAP = {}
# Benchmark programs
CLASS_MAP["01-matching-1-1"] = {"type": "Matching", "subtype": "1:1照合", "benchmark": "S18/S19"}
CLASS_MAP["02-matching-1-N"] = {"type": "Matching", "subtype": "1:N照合", "benchmark": "S18/S19"}
CLASS_MAP["03-matching-N-1"] = {"type": "Matching", "subtype": "N:1照合", "benchmark": "S18/S19"}
CLASS_MAP["04-edit-getput"] = {"type": "Edit/Output", "subtype": "请求书编辑", "benchmark": "S18/S19"}
CLASS_MAP["05-branch-if"] = {"type": "ControlFlow", "subtype": "IF判定", "benchmark": "S18/S19"}
CLASS_MAP["06-branch-evaluate"] = {"type": "ControlFlow", "subtype": "EVALUATE多分岐", "benchmark": "S18/S19"}
CLASS_MAP["07-keybreak-summary"] = {"type": "KeyBreak", "subtype": "キーブレイク集計", "benchmark": "S18/S19"}
CLASS_MAP["08-keybreak-aggregate"] = {"type": "KeyBreak", "subtype": "キーブレイク集計2", "benchmark": "S18/S19"}
CLASS_MAP["09-db-update"] = {"type": "DB/SQL", "subtype": "DB更新", "benchmark": "S18/S19"}
CLASS_MAP["10-divide-50"] = {"type": "Division", "subtype": "50件分割", "benchmark": "S18/S19"}
CLASS_MAP["11-divide-25"] = {"type": "Division", "subtype": "25件分割", "benchmark": "S18/S19"}
CLASS_MAP["12-divide-100"] = {"type": "Division", "subtype": "100件分割", "benchmark": "S18/S19"}
CLASS_MAP["13-validation-nodup"] = {"type": "Validation", "subtype": "重複無チェック", "benchmark": "S18/S19"}
CLASS_MAP["14-online-cics"] = {"type": "CICS/Online", "subtype": "CICSオンライン", "benchmark": "S18/S19"}
CLASS_MAP["15-csv-fb-nolf"] = {"type": "CSV", "subtype": "CSV→FB改行無", "benchmark": "S18/S19"}
CLASS_MAP["16-matching-2stage-1-1"] = {"type": "Matching", "subtype": "2段階1:1照合", "benchmark": "S18/S19"}
CLASS_MAP["17-matching-2stage-N-1"] = {"type": "Matching", "subtype": "2段階N:1照合", "benchmark": "S18/S19"}
CLASS_MAP["18-matching-MN-to-M"] = {"type": "Matching", "subtype": "MN→M照合", "benchmark": "S18/S19"}
CLASS_MAP["19-matching-MN-to-N"] = {"type": "Matching", "subtype": "MN→N照合", "benchmark": "S18/S19"}
CLASS_MAP["20-matching-MN-to-MxN"] = {"type": "Matching", "subtype": "MN→MxN照合", "benchmark": "S18/S19"}
CLASS_MAP["21-csv-fb-lf"] = {"type": "CSV", "subtype": "CSV→FB改行有", "benchmark": "S18/S19"}
CLASS_MAP["22-matching-2stage-MN"] = {"type": "Matching", "subtype": "2段階MN照合", "benchmark": "S18/S19"}
CLASS_MAP["23-select-condition"] = {"type": "DB/SQL", "subtype": "条件抽出", "benchmark": "S18/S19"}
CLASS_MAP["24-table-search"] = {"type": "Table/Search", "subtype": "内部表検索", "benchmark": "S18/S19"}
CLASS_MAP["25-subprogram"] = {"type": "Subprogram", "subtype": "CALLサブプログラム", "benchmark": "S18/S19"}
CLASS_MAP["26-db-search"] = {"type": "DB/SQL", "subtype": "DB検索", "benchmark": "S18/S19"}
CLASS_MAP["27-validation-halfwidth"] = {"type": "Validation", "subtype": "半角チェック", "benchmark": "S18/S19"}
CLASS_MAP["28-sysin"] = {"type": "ControlFlow", "subtype": "SYSINパラメータ", "benchmark": "S18/S19"}
CLASS_MAP["29-ascii-ebcdic"] = {"type": "Encoding", "subtype": "ASCII/EBCDIC変換", "benchmark": "S18/S19"}
CLASS_MAP["30-keybreak-other"] = {"type": "KeyBreak", "subtype": "キーブレイク別", "benchmark": "S18/S19"}
CLASS_MAP["31-validation-withdup"] = {"type": "Validation", "subtype": "重複有チェック", "benchmark": "S18/S19"}
CLASS_MAP["32-mix-1N-samekeybreak"] = {"type": "Matching", "subtype": "混合1N同KEY", "benchmark": "S18/S19"}
CLASS_MAP["33-mix-1N-diffkeybreak"] = {"type": "Matching", "subtype": "混合1N別KEY", "benchmark": "S18/S19"}
CLASS_MAP["34-sort"] = {"type": "Sort/Merge", "subtype": "SORT処理", "benchmark": "S18/S19"}
CLASS_MAP["35-merge"] = {"type": "Sort/Merge", "subtype": "MERGE処理", "benchmark": "S18/S19"}
CLASS_MAP["36-billing-calc"] = {"type": "Division", "subtype": "料金計算", "benchmark": "S18/S19"}
CLASS_MAP["pipeline"] = {"type": "Pipeline", "subtype": "パイプラインドライバ", "benchmark": "S19"}
CLASS_MAP["ZAN01CHK"] = {"type": "Matching", "subtype": "残業申請振分", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN02CHK"] = {"type": "Validation", "subtype": "重複チェック", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN03CHK"] = {"type": "Matching", "subtype": "残業申請照合", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN04MAT"] = {"type": "Matching", "subtype": "残業実績照合", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN05CAL"] = {"type": "Division", "subtype": "残業計算", "benchmark": "S22/TNA"}
CLASS_MAP["ZAN06UPD"] = {"type": "DB/SQL", "subtype": "DB更新処理", "benchmark": "S22/TNA"}

def analyze_one(name, fpath, source_dir, copybook_dirs):
    result = {"name": name, "branches": 0, "covered": 0, "dpoints": 0, "records": 0,
              "flat_files": 0, "lines": 0, "code_lines": 0, "error": "", "time_ms": 0}
    try:
        src = open(fpath, encoding="utf-8-sig").read()
        result["lines"] = len(src.split("\n"))
        result["code_lines"] = sum(1 for l in src.split("\n") if l.strip() and not l.strip().startswith("*"))
        t0 = time.time()
        st = extract_structure(src)
        result["branches"] = st.get("total_branches", 0)
        result["dpoints"] = len(st.get("decision_points", []))
        # Pass RAW source to generate_data (it internally calls preprocess)
        recs = generate_data(src, st)
        result["records"] = len(recs)
        cov = st.get("coverage", {})
        result["covered"] = cov.get("covered", 0)
        result["cov_total"] = cov.get("total", 0)
        result["cov_pct"] = cov.get("pct", 0)
        pp2 = preprocess(resolve_copybooks(src, source_dir, extra_search_paths=copybook_dirs))
        layouts = analyze_fd_layout(pp2)
        result["flat_files"] = len(layouts)
        result["time_ms"] = int((time.time()-t0)*1000)
    except Exception as e:
        result["error"] = str(e)[:80]
    return result

# ── Run ALL programs ──
print("=" * 130)
print("PROGRAM CLASSIFICATION & COVERAGE REPORT")
print("=" * 130)
print(f"{'Program':<28} {'Type':<16} {'Subtype':<18} {'Br':>4} {'Cov':>4} {'C%':>5} {'DPs':>4} {'Recs':>4} {'Flats':>4} {'CodeL':>5} {'Lns/Br':>6} {'Time':>6}")
print("-" * 130)

results = []
# Benchmark programs
for d in sorted(os.listdir(ROOT_BENCH)):
    dp = os.path.join(ROOT_BENCH, d)
    if not os.path.isdir(dp) or d in ("common","docs","cross-cutting"): continue
    fn = find_main(dp)
    if not fn: continue
    r = analyze_one(d, os.path.join(dp, fn), dp, [COPYBOOKS_BENCH])
    results.append(r)
    cls = CLASS_MAP.get(d, {"type":"?", "subtype":"?"})
    status = r.get("error","")[:10] if r.get("error") else ""
    print(f"  {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")

print("-" * 130)
# TNA programs
for f in ["ZAN01CHK","ZAN02CHK","ZAN03CHK","ZAN04MAT","ZAN05CAL","ZAN06UPD"]:
    fpath = os.path.join(ROOT_TNA, "src", f + ".cbl")
    if not os.path.exists(fpath): continue
    r = analyze_one(f, fpath, os.path.join(ROOT_TNA, "src"), [COPYBOOKS_TNA])
    results.append(r)
    cls = CLASS_MAP.get(f, {"type":"?", "subtype":"?"})
    status = r.get("error","")[:10] if r.get("error") else ""
    print(f"  {r['name']:<28} {cls['type']:<16} {cls['subtype']:<18} {r['branches']:>4} {r['covered']:>4} {r.get('cov_pct',0):>4.0f}% {r['dpoints']:>4} {r['records']:>4} {r['flat_files']:>4} {r['code_lines']:>5} {r['code_lines']/max(r['branches'],1):>5.0f} {r.get('time_ms',0):>5}ms {status}")

print("=" * 130)

# ── Summary by classification ──
from collections import defaultdict
by_type = defaultdict(lambda: {"count":0, "branches":0, "covered":0, "records":0, "lines":0})
for r in results:
    cls = CLASS_MAP.get(r["name"], {"type":"?"})
    t = cls["type"]
    by_type[t]["count"] += 1
    by_type[t]["branches"] += r["branches"]
    by_type[t]["covered"] += r.get("covered",0)
    by_type[t]["records"] += r["records"]
    by_type[t]["lines"] += r["code_lines"]

print(f"\n{'='*100}")
print("COVERAGE BY CLASSIFICATION")
print(f"{'='*100}")
print(f"{'Type':<20} {'Count':>5} {'Branches':>10} {'Covered':>8} {'Cov%':>6} {'Records':>8} {'CodeLines':>10}")
print(f"{'-'*70}")
for t, data in sorted(by_type.items(), key=lambda x: -x[1]["branches"]):
    cov = data["covered"]/max(data["branches"],1)*100
    print(f"  {t:<20} {data['count']:>5} {data['branches']:>10} {data['covered']:>8} {cov:>5.0f}% {data['records']:>8} {data['lines']:>10}")
print(f"{'-'*70}")

# ── Totals ──
total_br = sum(r["branches"] for r in results)
total_cov = sum(r.get("covered",0) for r in results)
total_recs = sum(r["records"] for r in results)
total_lines = sum(r["code_lines"] for r in results)
total_flats = sum(r["flat_files"] for r in results)
total_time = sum(r.get("time_ms",0) for r in results)
with_br = sum(1 for r in results if r["branches"] > 0)
with_err = sum(1 for r in results if r.get("error"))
print(f"\n{'='*100}")
print("SYSTEM SUMMARY")
print(f"{'='*100}")
print(f"  Total programs:           {len(results)}")
print(f"  With branch detection:    {with_br}")
print(f"  With errors:              {with_err}")
print(f"  Total decision branches:  {total_br}")
print(f"  Covered branches:         {total_cov}")
print(f"  Branch coverage rate:     {total_cov/max(total_br,1)*100:.1f}%")
print(f"  Total test records:       {total_recs}")
print(f"  Flat file layouts:        {total_flats}")
print(f"  Code lines (non-comment): {total_lines}")
print(f"  Test density:             {total_recs/total_lines:.2f} recs/code-line")
print(f"  Total execution time:     {total_time/1000:.1f}s")
print(f"  Avg per program:          {total_time/max(len(results),1)/1000:.2f}s")
print(f"{'='*100}")
print("NOTES:")
print("  Br   = Static decision branches (2 per IF/EVAL/PERFORM)")
print("  Cov  = Branches covered by generated test data")
print("  C%   = Branch coverage rate")
print("  DPs  = Decision points (IF/EVAL/PERFORM count)")
print("  Recs = Generated test data records")
print("  CodeL= Source lines (non-comment, non-empty)")
print("  Lns/Br = Code density (lines per decision branch)")
print("  All values are REAL from extract_structure + generate_data + mark_coverage")
print(f"{'='*100}")