cobol-java-v3/test-data/s20v2_runtime_gcov.py

"""S20v2: Runtime branch coverage via gcov — no source modification

Approach:
1. Parse COBOL → list of IF/EVALUATE/PERFORM line numbers (our expected decision points)
2. Compile with --coverage + generate test data
3. Run the program
4. Run gcov -b → get per-line hit counts
5. Verify: every IF/ELSE/AT_END line identified by our parser is actually hit at runtime
6. If gcov shows 0 hits on a line we claim to cover, we have a bug.

This is INDEPENDENT verification — gcov is GnuCOBOL's own tool.
"""
import sys, os, re, subprocess
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
P=0;F=0
def ck(v,m=""): global P,F; (P:=P+1) if v else (F:=F+1, print(f"  FAIL {m}"))
def sec(n): print(f"\n{'='*60}\n{n}\n{'='*60}")

ROOT = "D:/cobol-java/cobol-test-programs/"
COPYBOOKS = os.path.join(ROOT, "common", "copybooks")

from cobol_testgen import extract_structure, generate_data
from cobol_testgen.read import preprocess, resolve_copybooks
from cobol_testgen.flatfile import analyze_fd_layout, write_all_files

def find_main(d):
    cbls = [f for f in os.listdir(d) if f.endswith('.cbl')]
    ws = [f for f in cbls if re.match(r'main-\d{2}-', f, re.IGNORECASE)]
    if ws: return max(ws, key=lambda f: os.path.getsize(os.path.join(d, f)))
    return max(cbls, key=lambda f: os.path.getsize(os.path.join(d, f))) if cbls else None

def get_decision_lines(source: str) -> list[dict]:
    """Find all decision-point lines in a COBOL source by lineno.

    Returns: list of {line, kind, text}
    """
    lines = source.split('\n')
    decisions = []
    for i, l in enumerate(lines):
        upper = l.upper()
        stripped = upper.strip()
        # Detect decision-making keywords (IF, ELSE, EVALUATE, WHEN, AT END)
        if stripped.startswith('IF ') and not stripped.startswith('IF NOT ') and not stripped.startswith('IF ('):
            decisions.append({"line": i+1, "kind": "IF", "text": stripped[:60]})
        elif stripped == 'IF' or stripped.startswith('IF '):
            decisions.append({"line": i+1, "kind": "IF", "text": stripped[:60]})
        elif stripped == 'ELSE' or stripped.startswith('ELSE '):
            if not stripped.startswith('ELSE IF'):
                decisions.append({"line": i+1, "kind": "ELSE", "text": stripped[:60]})
        elif stripped.startswith('EVALUATE'):
            decisions.append({"line": i+1, "kind": "EVALUATE", "text": stripped[:60]})
        elif stripped.startswith('WHEN '):
            decisions.append({"line": i+1, "kind": "WHEN", "text": stripped[:60]})
        elif stripped == 'WHEN OTHER':
            decisions.append({"line": i+1, "kind": "WHEN_OTHER", "text": stripped[:60]})
        elif stripped.startswith('AT END') or stripped.startswith('AT END-PAGE'):
            decisions.append({"line": i+1, "kind": "AT_END", "text": stripped[:60]})
        elif stripped.startswith('NOT AT END'):
            decisions.append({"line": i+1, "kind": "NOT_AT_END", "text": stripped[:60]})
        elif stripped.startswith('INVALID') or stripped.startswith('NOT INVALID'):
            decisions.append({"line": i+1, "kind": "INVALID_KEY", "text": stripped[:60]})
    return decisions

def parse_gcov_line_hits(gcov_path: str) -> dict[int, str]:
    """Parse .cbl.gcov → dict of {lineno: status}
    status = "#####" (never executed) | "N" (N times) | "-" (non-executable)
    """
    result = {}
    with open(gcov_path, encoding='utf-8', errors='replace') as f:
        for l in f:
            # gcov format: "exec_count:lineno:source"
            m = re.match(r'\s*(\S+):\s*(\d+):', l)
            if m:
                status = m.group(1)
                lineno = int(m.group(2))
                result[lineno] = status
    return result

# ── Test: pick 3 diverse programs ──
test_progs = [
    ('01-matching-1-1', 'Simple 1:1 matching'),
    ('34-sort', 'SORT with many IFs'),
    ('28-sysin', 'SYSIN param dispatch'),
]

for dirname, desc in test_progs:
    sec(f"{dirname}: {desc}")
    dp = os.path.join(ROOT, dirname)
    fn = find_main(dp)
    if not fn:
        ck(False, f"No main file"); continue
    fpath = os.path.join(dp, fn)

    # ── 1. Our static analysis ──
    print("[1/4] Our static analysis...")
    src = open(fpath, encoding='utf-8').read()
    st = extract_structure(src)
    static_br = st.get('total_branches', 0)
    print(f"  Our parser: {static_br} branches")

    # ── 2. Generate data + write flat files ──
    print("[2/4] Generate test data + flat files...")
    pp = resolve_copybooks(src, dp, extra_search_paths=[COPYBOOKS])
    pp_str = preprocess(pp)
    recs = generate_data(pp_str, st)
    layouts = analyze_fd_layout(pp_str)
    # Clean old non-supplied files
    for f in os.listdir(dp):
        ffn = os.path.join(dp, f)
        if f.endswith(('.exe', '.gcno', '.gcda', '.gcov')):
            os.remove(ffn)
        elif f.endswith('.dat') or f.endswith('.txt'):
            # Only remove if we're going to re-generate it
            if not any(f.startswith(name) for name in ['MASTER', 'DETAIL', 'sort-input', 'SORT-INPUT']):
                try: os.remove(ffn)
                except: pass
    written = write_all_files(recs, pp_str, dp)
    print(f"  {len(recs)} records, {len(written)} flat files")

    # ── 3. Compile with --coverage + run ──
    print("[3/4] Compile with --coverage + run...")
    exe = os.path.join(dp, f"test-gcov-{dirname}.exe")
    r = subprocess.run(['cobc', '-x', '-Wall', '--coverage', fpath, '-o', exe,
                        '-I', COPYBOOKS, '-I', dp], capture_output=True, timeout=30, cwd=dp)
    if r.returncode != 0:
        err = r.stderr.decode('utf-8','replace') if r.stderr else ''
        ck(False, f"Compile FAIL: {err[:100]}")
        continue
    print(f"  Compile OK: {os.path.getsize(exe)} bytes")

    run = subprocess.run([exe], capture_output=True, timeout=30, cwd=dp, shell=True)
    rc = run.returncode
    run_out = run.stdout.decode('utf-8','replace') if run.stdout else ''
    print(f"  Run RC={rc}, stdout={len(run_out)} chars")

    # ── 4. gcov analysis ──
    print("[4/4] gcov branch coverage analysis...")
    # Run gcov on the compiled program
    gcov_r = subprocess.run(['gcov', '-b', fpath], capture_output=True, text=True, timeout=10, cwd=dp)
    print(f"  gcov output: {gcov_r.stdout[:200]}")

    # Find the .cbl.gcov file
    # gcov creates <filename>.cbl.gcov
    cbl_gcov = os.path.join(dp, os.path.basename(fpath) + '.gcov')
    if not os.path.exists(cbl_gcov):
        # Try different naming
        for f in os.listdir(dp):
            if f.endswith('.cbl.gcov'):
                cbl_gcov = os.path.join(dp, f)
                break
        else:
            ck(False, "No .cbl.gcov file produced")
            continue

    print(f"  gcov file: {cbl_gcov}")
    line_hits = parse_gcov_line_hits(cbl_gcov)

    # Get decision lines from source
    dec_lines = get_decision_lines(src)
    print(f"  Decision lines found: {len(dec_lines)}")

    # Check coverage
    hit_count = 0
    miss_count = 0
    total_checked = 0
    missed_lines = []

    for dl in dec_lines:
        lineno = dl["line"]
        if lineno in line_hits:
            total_checked += 1
            status = line_hits[lineno]
            if status.startswith('#'):
                miss_count += 1
                missed_lines.append(dl)
            else:
                hit_count += 1

    # Also aggregate: our parser claims to cover N branches,
    # gcov shows how many IF/ELSE lines were actually hit
    print(f"\n  Gcov line hits at decision points:")
    print(f"    Hit:     {hit_count}")
    print(f"    Missed:  {miss_count}")
    print(f"    Total:   {total_checked}")

    if missed_lines and miss_count <= 5:
        print(f"    Missed lines:")
        for ml in missed_lines:
            print(f"      Line {ml['line']}: {ml['kind']}  {ml['text'][:40]}")

    # Compare with our static analysis
    coverage_pct = hit_count / max(total_checked, 1) * 100
    print(f"\n  Our #{static_br} branches vs gcov {hit_count}/{total_checked} lines hit ({coverage_pct:.0f}%)")

    ck(miss_count <= total_checked * 0.5,
       f"gcov missed {miss_count}/{total_checked} decision lines ({100-miss_count/max(total_checked,1)*100:.0f}% hit)")
    ck(hit_count >= static_br * 0.2,
       f"gcov line hits {hit_count} vs our branches {static_br} (ratio: {hit_count/max(static_br,1):.2f})")

    # Cleanup
    for f in os.listdir(dp):
        if f.startswith('test-gcov-') and (f.endswith('.exe') or f.endswith('.gcov') or f.endswith('.gcno') or f.endswith('.gcda')):
            try: os.remove(os.path.join(dp, f))
            except: pass
        if f.endswith(('.gcno', '.gcda', '.gcov')):
            try: os.remove(os.path.join(dp, f))
            except: pass

print(f"\n{'='*55}")
print(f"S20v2: {P} PASS / {F} FAIL")
print(f"{'='*55}")
if F > 0: sys.exit(1)