"""对抗性测试 — COBOL 匹配分类器的假阳性/假阴性攻击 COBOL 迁移专家设计的攻击面: - FP: 非匹配程序被误判为マッチング - FN: 真实匹配程序未被识别 - 边界: 注释关键词、旧式命名、多文件非匹配 """ from pathlib import Path import pytest from cobol_testgen import extract_structure from hina.pipeline import classify_program from hina.classifier import detect_keyword FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "adversarial" # (filename, expect_matching, reason) # expect_matching=True → must be マッチング/二段階 # expect_matching=False → must NOT be マッチング/二段階 ADVERSARIAL_TESTS = [ ("ADV-FALSE-KEY.cbl", False, "FP: WS-KEY 变量但只是简单 ADD 程序,不应触发匹配"), ("ADV-KEY-IN-COMMENT.cbl", False, "FP: KEY 只在 *> 注释中,不应触发匹配"), ("ADV-PREVKEY-FAKE.cbl", False, "FP: WS-PREV-KEY 但无匹配逻辑,不应触发匹配"), ("ADV-OLD-SCHOOL.cbl", True, "FN: K01-KEY 旧式命名,应识别为匹配"), ("ADV-TINY-MATCH.cbl", True, "FN: 极简匹配程序(1 文件),应识别"), ("ADV-CALL-MATCH.cbl", False, "FP: CALL+WS-MAST-KEY,子程序调用应优先"), ("ADV-ASCII-KEY.cbl", False, "FP: ASCII+WS-KEY,编码转换应优先"), ("ADV-10FILES.cbl", False, "FP: 10 文件无 KEY 比较,不应触发匹配"), ] @pytest.mark.parametrize( "filename,expect_matching,reason", ADVERSARIAL_TESTS, ids=[t[0].replace('.cbl','') for t in ADVERSARIAL_TESTS], ) def test_adversarial(filename, expect_matching, reason): """对抗性测试:验证明假阳性/假阴性""" path = FIXTURES / filename assert path.exists(), f"Missing: {path}" src = path.read_text("utf-8") # 1. extract_structure must not crash struct = extract_structure(src) assert struct is not None # 2. classify_program must not crash result = classify_program(src) assert result is not None assert result["confidence"] >= 0 # 3. False positive/negative check is_matching = "マッチング" in result["category"] or "二段階" in result["category"] if expect_matching: assert is_matching, ( f"{filename}: expected MATCHING but got '{result['category']}' " f"(conf={result['confidence']:.2f}). Reason: {reason}" ) else: assert not is_matching, ( f"{filename}: expected NON-MATCHING but got '{result['category']}' " f"(conf={result['confidence']:.2f}). Reason: {reason}" ) # 4. Keyword detection sanity kw = detect_keyword(src) if expect_matching: # Matching programs should have at least 1 keyword match assert len(kw) >= 1 or result["method"] != "rule_engine_fallback", ( f"{filename}: matching program with 0 keyword matches" )