50995d3335
- SETUP.md: 完整环境搭建指南(同事用) - SETUP_QUICK.md: 快速搭环境(4步) - s22~s26: TNA端到端、覆盖率报告、回归检查 - procedure_grammar.lark: 实验性Lark语法 Co-Authored-By: Claude <noreply@anthropic.com>
90 lines
3.9 KiB
Python
90 lines
3.9 KiB
Python
"""R16: Real bug hunting — classification accuracy + data generation correctness"""
|
|
import sys, glob, json
|
|
from pathlib import Path
|
|
sys.path.insert(0, ".")
|
|
P=0;F=0
|
|
def ck(v,m=""): global P,F; (P:=P+1) if v else (F:=F+1,print(f" FAIL {m}"))
|
|
def sec(n): print(f"\n--- {n} ---")
|
|
|
|
from cobol_testgen import extract_structure, generate_data
|
|
from hina.pipeline.pipeline import classify_program
|
|
from hina.rule_engine.confusion_groups import resolve_matching_vs_keybreak
|
|
|
|
BASE = Path("test-data/cobol")
|
|
|
|
def load(name, subdir=None):
|
|
candidates = [BASE / subdir / name] if subdir else []
|
|
for sd in ["category_matching","category_validation","category_csv","category_division",
|
|
"category_cics","category_db","statement","adversarial","matching"]:
|
|
p = BASE / sd / name
|
|
if p.exists(): return p.read_text(encoding="utf-8-sig")
|
|
return None
|
|
|
|
sec("BUG#1: MT32 mixed same key -> falsely dedup")
|
|
src = load("MT32_MIXED_SAME_KEY.cbl")
|
|
if src:
|
|
cp = classify_program(src); st = extract_structure(src)
|
|
vpat = st.get("variable_patterns", {})
|
|
ck(vpat.get("has_prev_key") or st.get("file_count",0)>=2,"mt32 has matching signals")
|
|
gr = resolve_matching_vs_keybreak({"file_count":st.get("file_count",0),"if_types":st.get("if_types",{}),"variable_patterns":vpat})
|
|
print(f" MT32: cat={cp.get('category')} conf={cp.get('confidence'):.3f} vpat={vpat} grp={gr.get('type')}")
|
|
|
|
sec("BUG#2: VL02 no-dup -> keybreak")
|
|
src = load("VL02_CHECK_NO_DUP.cbl")
|
|
if src:
|
|
cp = classify_program(src); st = extract_structure(src)
|
|
print(f" VL02: cat={cp.get('category')} conf={cp.get('confidence'):.3f} vpat={st.get('variable_patterns')}")
|
|
|
|
sec("BUG#3: Low confidence on statement programs")
|
|
for nm in ["ST-ADD-TO","ST-SUB-FROM","ST-MUL-BY","ST-DIV-BY-GIVING","ST-IF-COMP"]:
|
|
src = load(f"{nm}.cbl")
|
|
if src:
|
|
cp = classify_program(src)
|
|
print(f" {nm:20s} cat={cp.get('category','?'):20s} conf={cp.get('confidence',0):.3f} meth={cp.get('method','?')}")
|
|
|
|
sec("BUG#4: generate_data on real COBOL")
|
|
for nm in ["ST-IF-COMP","ST-EVAL-ALSO","ST-SET-88","ST-PERF-UNTIL","ST-SEARCH-ALL"]:
|
|
src = load(f"{nm}.cbl")
|
|
if src:
|
|
recs = generate_data(src, extract_structure(src))
|
|
print(f" {nm:20s} {len(recs)} records")
|
|
if recs:
|
|
for k in list(recs[0].keys())[:5]:
|
|
vals = set(str(r.get(k,"")) for r in recs if r.get(k))
|
|
if len(vals) > 1:
|
|
print(f" {k}: {sorted(vals)[:5]}")
|
|
|
|
sec("BUG#5: Matching subtype detection")
|
|
for nm in ["MT01_1TO1","MT02_1TON","MT03_NTO1","MT16_TWO_STAGE_1TO1","MT20_MN_TO_MXN"]:
|
|
src = load(f"{nm}.cbl")
|
|
if src:
|
|
cp = classify_program(src); st = extract_structure(src)
|
|
print(f" {nm:20s} cat={cp.get('category','?'):15s} subtype={cp.get('subtype','?'):10s} conf={cp.get('confidence',0):.3f}")
|
|
|
|
sec("BUG#6: Adversarial false positive detection")
|
|
for nm in ["ADV-FALSE-KEY","ADV-PREVKEY-FAKE","ADV-KEY-IN-COMMENT","ADV-ASCII-KEY"]:
|
|
src = load(f"{nm}.cbl")
|
|
if src:
|
|
cp = classify_program(src); st = extract_structure(src)
|
|
print(f" {nm:20s} cat={cp.get('category','?'):20s} conf={cp.get('confidence',0):.3f} vpat={st.get('variable_patterns',{})}")
|
|
|
|
sec("BUG#7: Keyword detection false positive/negative")
|
|
from hina.classifier import detect_keyword
|
|
kw_tests = [
|
|
("MT01_1TO1.matching","should have matching kw"),
|
|
("CI01_CICS.cics","should have online kw"),
|
|
("DB01_SELECT_UPDATE.db","should have DB kw"),
|
|
("ST01_SORT.statement","should have SORT kw"),
|
|
("ADV-FALSE-KEY.*","false KEY should not trigger"),
|
|
]
|
|
for nm, desc in kw_tests:
|
|
parts = nm.split(".")
|
|
src_file = load(f"{parts[0]}.cbl")
|
|
if src_file:
|
|
kw = detect_keyword(src_file.upper())
|
|
cat_kw = set(k[0] for k in kw) if kw else set()
|
|
print(f" {parts[0]:25s} keywords={cat_kw}")
|
|
|
|
print(f"\n{'='*55}\nR16: {P} PASS / {F} FAIL\n{'='*55}")
|
|
if F > 0: sys.exit(1)
|