"""S14: External benchmark suite — 58 telecom billing COBOL programs"""
import sys, os, time, json
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
P=0;F=0;BUGS=[]
def ck(v,m=""): global P,F; (P:=P+1) if v else (F:=F+1,BUGS.append(m))
def sec(n): print(f"\n--- {n} ---")

ROOT = "D:/cobol-java/cobol-test-programs/"
from cobol_testgen import extract_structure, generate_data
from hina.pipeline.pipeline import classify_program
from hina.classifier import detect_keyword

progs = []
for d in sorted(os.listdir(ROOT)):
    dp = os.path.join(ROOT, d)
    if os.path.isdir(dp):
        for f in sorted(os.listdir(dp)):
            if f.endswith(".cbl"):
                progs.append(os.path.join(dp, f))
print(f"Total: {len(progs)} programs")

sec("PARSE: Extract structure for all 58 programs")
parse_ok=0; parse_fail=0
for fp in progs:
    name = os.path.relpath(fp, ROOT).replace("\\","/")
    src = open(fp, encoding="utf-8-sig").read()
    try:
        st = extract_structure(src)
        parse_ok += 1
    except Exception as e:
        parse_fail += 1
        ck(False, f"PARSE: {name} -> {str(e)[:40]}")
ck(parse_fail == 0, f"Parse: {parse_fail}/{len(progs)} FAIL")

sec("CLASSIFY: Directory name vs classification match")
# Expected types from directory names
expected_map = {
    "matching": ["01","02","03","16","17","18","19","20","22","32","33"],
    "keybreak": ["07","08","30"],
    "divide": ["10","11","12"],
    "validation": ["13","27","31"],
    "csv": ["15","21"],
    "select": ["23"],
    "search": ["24","26"],
    "subprogram": ["25"],
    "sort": ["34"],
    "merge": ["35"],
    "evaluate": ["06"],
    "branch": ["05"],
    "edit": ["04"],
    "cics": ["14"],
    "sysin": ["28"],
    "ascii": ["29"],
    "pipeline": ["pipeline"],
}
for fp in progs:
    name = os.path.relpath(fp, ROOT).replace("\\","/")
    src = open(fp, encoding="utf-8-sig").read()
    try:
        cp = classify_program(src)
        cat = cp.get("category", "?")
    except:
        cat = "ERROR"
    # Check if directory name indicates matching type
    dir_id = name.split("-")[0] if "-" in name else name[:2]
    # Matching programs should say マッチング
    if dir_id in ["01","02","03","16","17","18","19","20","22"]:
        is_matching = "マッチング" in str(cat) or "matching" in str(cat).lower()
        if not is_matching:
            BUGS.append(f"MISCLASSIFY: {name} -> {cat}")
            ck(False, f"CLASSIFY: {name} expected matching, got {cat}")
    # Division programs should say DIVIDE
    if dir_id in ["10","11","12"]:
        if "DIVIDE" not in str(cat).upper() and "divide" not in str(cat).lower():
            BUGS.append(f"MISCLASSIFY: {name} (divide) -> {cat}")
    # Sort programs should say SORT
    if dir_id == "34":
        if "SORT" not in str(cat).upper() and "sort" not in str(cat).lower():
            BUGS.append(f"MISCLASSIFY: {name} (sort) -> {cat}")

ck(len([b for b in BUGS if "MISCLASSIFY" in b]) <= 10, f"Classification mismatch count")

sec("GENERATE: Non-zero data produce")
zero_data = 0
max_recs = 0; max_name = ""
for fp in progs:
    name = os.path.relpath(fp, ROOT).replace("\\","/")
    src = open(fp, encoding="utf-8-sig").read()
    try:
        st = extract_structure(src)
        recs = generate_data(src, st)
        if len(recs) == 0:
            zero_data += 1
        if len(recs) > max_recs:
            max_recs = len(recs); max_name = name
    except:
        zero_data += 1
ck(zero_data <= len(progs) * 0.5, f"Generate: {zero_data}/{len(progs)} zero records")
ck(max_recs < 10000, f"Max records: {max_recs} ({max_name}) - path explosion risk")

sec("PERF: Average performance")
times = []
for fp in progs[:10]:
    src = open(fp, encoding="utf-8-sig").read()
    t0=time.time(); st=extract_structure(src); t1=time.time()
    times.append(t1-t0)
avg = sum(times)/len(times)
ck(avg < 5.0, f"Avg extract time: {avg:.3f}s (max 5s)")

sec("SUMMARY")
print(f"\n{'='*55}")
print(f"S14: {P} PASS / {F} FAIL")
print(f"Bugs found: {len(BUGS)}")
for b in BUGS:
    print(f"  {b}")
print(f"{'='*55}")
if F > 0: sys.exit(1)