fix: adversarial testing — 4 false positive/negative fixes + comment stripping
COBOL migration expert adversarial testing found 4 real defects:
FIX 1: Comment-stripping in detect_keyword() (FP-2)
- Remove *> inline comments and * comment lines before keyword matching
- Prevents 「マッチング」 from triggering on WS-KEY in comments
FIX 2: KEY comparison context validation (FP-1, FP-6)
- Add _matches_key_comparison() — requires WS-KEY variable to appear
NEAR an actual comparison operator (= < >), not just as PIC/VALUE decl
- Same check in _path_rule_engine features via has_key_var injection
- Fix regex bug: [=<>\s] vs [=<>] — \s matched whitespace after PIC decl
FIX 3: Old-school naming support (FN-1)
- Add L1 keyword r'[A-Z]\d{0,2}-\w*KEY' with 0.55 confidence
- Matches K01-KEY, KS-KEY etc. (non-WS- prefix naming convention)
FIX 4: mn_output_mode over-matching (FP-6)
- Require IF branches + KEY evidence before returning M:N for file>=3
- matching_vs_keybreak rule 3 now requires has_key_var
New tests: test_adversarial.py — 8 parametrized adversarial tests
Regression: 755 passed (0 new failures)
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
"""对抗性测试 — COBOL 匹配分类器的假阳性/假阴性攻击
|
||||
|
||||
COBOL 迁移专家设计的攻击面:
|
||||
- FP: 非匹配程序被误判为マッチング
|
||||
- FN: 真实匹配程序未被识别
|
||||
- 边界: 注释关键词、旧式命名、多文件非匹配
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from cobol_testgen import extract_structure
|
||||
from hina.pipeline import classify_program
|
||||
from hina.classifier import detect_keyword
|
||||
|
||||
FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "adversarial"
|
||||
|
||||
# (filename, expect_matching, reason)
|
||||
# expect_matching=True → must be マッチング/二段階
|
||||
# expect_matching=False → must NOT be マッチング/二段階
|
||||
ADVERSARIAL_TESTS = [
|
||||
("ADV-FALSE-KEY.cbl", False,
|
||||
"FP: WS-KEY 变量但只是简单 ADD 程序,不应触发匹配"),
|
||||
("ADV-KEY-IN-COMMENT.cbl", False,
|
||||
"FP: KEY 只在 *> 注释中,不应触发匹配"),
|
||||
("ADV-PREVKEY-FAKE.cbl", False,
|
||||
"FP: WS-PREV-KEY 但无匹配逻辑,不应触发匹配"),
|
||||
("ADV-OLD-SCHOOL.cbl", True,
|
||||
"FN: K01-KEY 旧式命名,应识别为匹配"),
|
||||
("ADV-TINY-MATCH.cbl", True,
|
||||
"FN: 极简匹配程序(1 文件),应识别"),
|
||||
("ADV-CALL-MATCH.cbl", False,
|
||||
"FP: CALL+WS-MAST-KEY,子程序调用应优先"),
|
||||
("ADV-ASCII-KEY.cbl", False,
|
||||
"FP: ASCII+WS-KEY,编码转换应优先"),
|
||||
("ADV-10FILES.cbl", False,
|
||||
"FP: 10 文件无 KEY 比较,不应触发匹配"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename,expect_matching,reason",
|
||||
ADVERSARIAL_TESTS,
|
||||
ids=[t[0].replace('.cbl','') for t in ADVERSARIAL_TESTS],
|
||||
)
|
||||
def test_adversarial(filename, expect_matching, reason):
|
||||
"""对抗性测试:验证明假阳性/假阴性"""
|
||||
path = FIXTURES / filename
|
||||
assert path.exists(), f"Missing: {path}"
|
||||
src = path.read_text("utf-8")
|
||||
|
||||
# 1. extract_structure must not crash
|
||||
struct = extract_structure(src)
|
||||
assert struct is not None
|
||||
|
||||
# 2. classify_program must not crash
|
||||
result = classify_program(src)
|
||||
assert result is not None
|
||||
assert result["confidence"] >= 0
|
||||
|
||||
# 3. False positive/negative check
|
||||
is_matching = "マッチング" in result["category"] or "二段階" in result["category"]
|
||||
if expect_matching:
|
||||
assert is_matching, (
|
||||
f"{filename}: expected MATCHING but got '{result['category']}' "
|
||||
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
||||
)
|
||||
else:
|
||||
assert not is_matching, (
|
||||
f"{filename}: expected NON-MATCHING but got '{result['category']}' "
|
||||
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
||||
)
|
||||
|
||||
# 4. Keyword detection sanity
|
||||
kw = detect_keyword(src)
|
||||
if expect_matching:
|
||||
# Matching programs should have at least 1 keyword match
|
||||
assert len(kw) >= 1 or result["method"] != "rule_engine_fallback", (
|
||||
f"{filename}: matching program with 0 keyword matches"
|
||||
)
|
||||
Reference in New Issue
Block a user