Files
cobol-java-v3/test-data/r16_bug_hunt.py
T
NB-076 50995d3335 chore: SETUP.md + 测试报告脚本 + 文档更新
- SETUP.md: 完整环境搭建指南(同事用)
- SETUP_QUICK.md: 快速搭环境(4步)
- s22~s26: TNA端到端、覆盖率报告、回归检查
- procedure_grammar.lark: 实验性Lark语法

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-25 08:50:17 +08:00

90 lines
3.9 KiB
Python

"""R16: Real bug hunting — classification accuracy + data generation correctness"""
import sys, glob, json
from pathlib import Path
sys.path.insert(0, ".")
P=0;F=0
def ck(v,m=""): global P,F; (P:=P+1) if v else (F:=F+1,print(f" FAIL {m}"))
def sec(n): print(f"\n--- {n} ---")
from cobol_testgen import extract_structure, generate_data
from hina.pipeline.pipeline import classify_program
from hina.rule_engine.confusion_groups import resolve_matching_vs_keybreak
BASE = Path("test-data/cobol")
def load(name, subdir=None):
candidates = [BASE / subdir / name] if subdir else []
for sd in ["category_matching","category_validation","category_csv","category_division",
"category_cics","category_db","statement","adversarial","matching"]:
p = BASE / sd / name
if p.exists(): return p.read_text(encoding="utf-8-sig")
return None
sec("BUG#1: MT32 mixed same key -> falsely dedup")
src = load("MT32_MIXED_SAME_KEY.cbl")
if src:
cp = classify_program(src); st = extract_structure(src)
vpat = st.get("variable_patterns", {})
ck(vpat.get("has_prev_key") or st.get("file_count",0)>=2,"mt32 has matching signals")
gr = resolve_matching_vs_keybreak({"file_count":st.get("file_count",0),"if_types":st.get("if_types",{}),"variable_patterns":vpat})
print(f" MT32: cat={cp.get('category')} conf={cp.get('confidence'):.3f} vpat={vpat} grp={gr.get('type')}")
sec("BUG#2: VL02 no-dup -> keybreak")
src = load("VL02_CHECK_NO_DUP.cbl")
if src:
cp = classify_program(src); st = extract_structure(src)
print(f" VL02: cat={cp.get('category')} conf={cp.get('confidence'):.3f} vpat={st.get('variable_patterns')}")
sec("BUG#3: Low confidence on statement programs")
for nm in ["ST-ADD-TO","ST-SUB-FROM","ST-MUL-BY","ST-DIV-BY-GIVING","ST-IF-COMP"]:
src = load(f"{nm}.cbl")
if src:
cp = classify_program(src)
print(f" {nm:20s} cat={cp.get('category','?'):20s} conf={cp.get('confidence',0):.3f} meth={cp.get('method','?')}")
sec("BUG#4: generate_data on real COBOL")
for nm in ["ST-IF-COMP","ST-EVAL-ALSO","ST-SET-88","ST-PERF-UNTIL","ST-SEARCH-ALL"]:
src = load(f"{nm}.cbl")
if src:
recs = generate_data(src, extract_structure(src))
print(f" {nm:20s} {len(recs)} records")
if recs:
for k in list(recs[0].keys())[:5]:
vals = set(str(r.get(k,"")) for r in recs if r.get(k))
if len(vals) > 1:
print(f" {k}: {sorted(vals)[:5]}")
sec("BUG#5: Matching subtype detection")
for nm in ["MT01_1TO1","MT02_1TON","MT03_NTO1","MT16_TWO_STAGE_1TO1","MT20_MN_TO_MXN"]:
src = load(f"{nm}.cbl")
if src:
cp = classify_program(src); st = extract_structure(src)
print(f" {nm:20s} cat={cp.get('category','?'):15s} subtype={cp.get('subtype','?'):10s} conf={cp.get('confidence',0):.3f}")
sec("BUG#6: Adversarial false positive detection")
for nm in ["ADV-FALSE-KEY","ADV-PREVKEY-FAKE","ADV-KEY-IN-COMMENT","ADV-ASCII-KEY"]:
src = load(f"{nm}.cbl")
if src:
cp = classify_program(src); st = extract_structure(src)
print(f" {nm:20s} cat={cp.get('category','?'):20s} conf={cp.get('confidence',0):.3f} vpat={st.get('variable_patterns',{})}")
sec("BUG#7: Keyword detection false positive/negative")
from hina.classifier import detect_keyword
kw_tests = [
("MT01_1TO1.matching","should have matching kw"),
("CI01_CICS.cics","should have online kw"),
("DB01_SELECT_UPDATE.db","should have DB kw"),
("ST01_SORT.statement","should have SORT kw"),
("ADV-FALSE-KEY.*","false KEY should not trigger"),
]
for nm, desc in kw_tests:
parts = nm.split(".")
src_file = load(f"{parts[0]}.cbl")
if src_file:
kw = detect_keyword(src_file.upper())
cat_kw = set(k[0] for k in kw) if kw else set()
print(f" {parts[0]:25s} keywords={cat_kw}")
print(f"\n{'='*55}\nR16: {P} PASS / {F} FAIL\n{'='*55}")
if F > 0: sys.exit(1)