Files
cobol-java-v3/test-data/s16_benchmark_e2e.py
T
NB-076 e5ab3baa46 提升:37/37基准程序全量解析+O(N)路径枚举+运行时gcov验证
## 核心变更

### 1. 新PROCEDURE DIVISION解析器(procedure_parser.py)
- 行级状态机替换旧的BrParser regex解析器
- 覆盖:IF/ELSE/END-IF(嵌套)、EVALUATE/WHEN/ALSO、
  PERFORM UNTIL/VARYING、READ/AT END/NOT AT END、
  SORT/MERGE、GO TO DEPENDING ON
- 之前:3/37程序有分支检测  →  现在:37/37全部有分支
- 速度:~20ms/程序,纯规则引擎

### 2. 桥接层(pipeline_bridge.py)
- 新解析器为主,旧解析器3秒超时兜底
- 自动选取分支数更多的结果

### 3. 线性路径枚举(design_mcdc.py)
- 替换旧的Cartesian积路径枚举(O(2^N))为每决策点独立枚举(O(N))
- 28-sysin: 162分支仅163条路径(之前需截断到60DP)
- 消除了500路径硬上限和60DP截断

### 4. 条件解析修复(cond.py)
- NOT运算符规范化:X NOT = 5 → X <> 5
- 88-level反向:NOT WS-EOF-Y → parent <> value
- 裸字段引用:NOT WS-EOF → WS-EOF <> 'Y'
- 验证:1182个IF条件中0个NOT污染

### 5. 约束字段过滤(__init__.py)
- OF限定词剥离:STD-KEY OF MASTER-REC → STD-KEY
- 下标字段解析:WS-ITEM(SUB) → WS-ITEM
- 跳过不在fields_dict中的字段(group item/伪影)

### 6. 预处理器增强(read.py)
- VALUE ALL剥离(VALUE ALL '*' → VALUE '*')
- &续行合并(COBOL多行字符串拼接)
- PIC小数点点→V转换(Z(9)9.99. → Z(9)9V99.)
- 缺少点号补全

### 7. Grammar修复(grammar.lark)
- OCCURS 1 TIME支持(原只认TIMES)
- USAGE IS COMP支持(可选IS)
- $符号在PICTURE_STRING中
- 无NAME条款支持(clause+)

### 8. Flatfile写入(flatfile.py)
- 多记录FD支持(选字段最多的记录)
- Path类型强制转换
- 回退零值记录

### 9. Bug修复
- trace_to_root空列表保护(core.py)

### 10. 测试套件(S16-S21)
- S16: 全量基准程序端到端
- S17: gcov运行时对比
- S18/S19: 桥接器验证
- S20: DISPLAY插桩运行时验证+gcov分支覆盖率
- S21: 条件解析修复验证
- 全部17/17回归测试通过

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-22 23:41:22 +08:00

132 lines
5.7 KiB
Python

"""S16: External benchmark E2E — focused on parse → generate → compile"""
import sys, os, subprocess, re
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
P=0;F=0
def ck(v,m=""): global P,F; (P:=P+1) if v else (F:=F+1, print(f" FAIL {m}"))
def sec(n): print(f"\n{'='*60}\n{n}\n{'='*60}")
ROOT = "D:/cobol-java/cobol-test-programs/"
COPYBOOKS = os.path.join(ROOT, "common", "copybooks")
COBC = "cobc"
from cobol_testgen import extract_structure, generate_data
from cobol_testgen.read import preprocess, resolve_copybooks
from cobol_testgen.flatfile import analyze_fd_layout, write_all_files
def find_main(directory):
cbls = [f for f in os.listdir(directory) if f.endswith('.cbl') and not f.startswith('.')]
wrappers = [f for f in cbls if re.match(r'main-\d{2}-', f, re.IGNORECASE)]
if wrappers:
best = max(wrappers, key=lambda f: os.path.getsize(os.path.join(directory, f)))
return best
return max(cbls, key=lambda f: os.path.getsize(os.path.join(directory, f))) if cbls else None
progs = []; all_results = {}
for d in sorted(os.listdir(ROOT)):
dp = os.path.join(ROOT, d)
if os.path.isdir(dp) and d not in ('common','docs','cross-cutting') and (fname := find_main(dp)):
progs.append((d, fname, os.path.join(dp, fname)))
print(f"Found {len(progs)} programs")
# ── PHASE 1: Parse + Generate + Flatfiles ──
sec("PHASE 1: Parse → Generate → Flat files")
parse_ok=0; gen_ok=0; flat_written=0
for dirname, fname, fpath in progs:
dp = os.path.join(ROOT, dirname)
try:
src = open(fpath, encoding='utf-8').read()
st = extract_structure(src)
pp_path = resolve_copybooks(src, dp, extra_search_paths=[COPYBOOKS])
pp = preprocess(pp_path)
recs = generate_data(pp, st)
layouts = analyze_fd_layout(pp)
flats = write_all_files(recs, pp, dp) if layouts else []
parse_ok += 1
gen_ok += 1
flat_written += len(flats)
all_results[dirname] = {"recs": len(recs), "fds": len(layouts), "flats": len(flats)}
print(f" {dirname:30s} {len(recs):3d} recs {len(layouts)} FDs {len(flats)} files")
except Exception as e:
all_results[dirname] = {"status": "fail", "error": str(e)[:80]}
print(f" {dirname:30s} FAIL: {str(e)[:60]}")
ck(parse_ok == len(progs), f"Parse: {parse_ok}/{len(progs)}")
ck(gen_ok >= len(progs) - 3, f"Generate: {gen_ok}/{len(progs)}")
print(f"\n Flat files written: {flat_written} total")
# ── PHASE 2: Compile ──
sec("PHASE 2: Compile with GnuCOBOL")
compile_ok=0; compile_fail=0; skipped=[]
for dirname, fname, fpath in progs:
dp = os.path.join(ROOT, dirname)
exe = os.path.join(dp, fname.replace('.cbl', '.exe'))
if dirname in ('14-online-cics',):
skipped.append(dirname); continue
cmd = [COBC, '-x', '-Wall', fpath, '-o', exe, '-I', COPYBOOKS, '-I', dp]
try:
p = subprocess.run(cmd, capture_output=True, timeout=45, cwd=dp)
out = p.stdout.decode('utf-8', errors='replace') if p.stdout else ''
err = p.stderr.decode('utf-8', errors='replace') if p.stderr else ''
if p.returncode == 0:
compile_ok += 1; all_results[dirname]["compile"] = "ok"
all_results[dirname]["exe_size"] = os.path.getsize(exe) if os.path.exists(exe) else 0
else:
compile_fail += 1; all_results[dirname]["compile"] = "fail"
all_results[dirname]["compile_err"] = (err or out or "")[:120]
except subprocess.TimeoutExpired:
compile_fail += 1; all_results[dirname]["compile"] = "timeout"
print(f" {dirname:30s} {all_results[dirname].get('compile','N/A'):>5} {all_results[dirname].get('exe_size',0):>6}B")
print(f"\nCompile: {compile_ok} OK / {compile_fail} FAIL / {len(skipped)} skipped")
ck(compile_fail < 10, f"Compile: {compile_fail} failures")
# ── PHASE 3: Run ──
sec("PHASE 3: Run (compiled programs)")
run_ok=0; run_fail=0; run_timeout=0
for dirname, fname, _ in progs:
dp = os.path.join(ROOT, dirname)
exe = os.path.join(dp, fname.replace('.cbl', '.exe'))
if dirname in ('14-online-cics',) or not os.path.exists(exe):
continue
try:
p = subprocess.run([exe], capture_output=True, timeout=10, cwd=dp, shell=True)
if p.returncode == 0:
run_ok += 1; all_results[dirname]["run"] = "ok"
out_files = [fn for fn in os.listdir(dp) if fn.endswith('.dat')
and os.path.getsize(os.path.join(dp, fn)) > 0
and not any(x in fn.lower() for x in ['file-in'])]
all_results[dirname]["out_files"] = out_files
else:
run_fail += 1; all_results[dirname]["run"] = f"fail({p.returncode})"
except subprocess.TimeoutExpired:
run_timeout += 1; all_results[dirname]["run"] = "timeout"
print(f" Run: {run_ok} OK / {run_fail} FAIL / {run_timeout} timeout")
ck(run_fail + run_timeout < compile_ok, f"Run failures: {run_fail} + {run_timeout} timeout")
# ── Summary ──
sec("SUMMARY")
print(f"Programs: {len(progs)}")
print(f"Parse OK: {parse_ok}")
print(f"Generate OK: {gen_ok}")
print(f"Compile OK: {compile_ok}")
print(f"Compile FAIL: {compile_fail}")
print(f"Run OK: {run_ok}")
print(f"Run FAIL: {run_fail}")
print(f"Run TIMEOUT: {run_timeout}")
print()
for dirname, r in all_results.items():
status = r.get("status", "")
if status == "fail":
print(f" {dirname:<28} FAIL: {r.get('error','')[:50]}")
continue
recs = r.get("recs", 0)
comp = r.get("compile", "-")
run_st = r.get("run", "-")
outs = len(r.get("out_files", []))
flats = r.get("flats", 0)
print(f" {dirname:<28} {recs:3d} rec C={comp:<5} R={run_st:<8} {flats}fl/{outs}out")
print(f"\nS16: {P} PASS / {F} FAIL")
if F > 0: sys.exit(1)