"""对抗性测试 — COBOL 匹配分类器的假阳性/假阴性攻击 COBOL 迁移专家设计的攻击面: - FP: 非匹配程序被误判为マッチング - FN: 真实匹配程序未被识别 - 边界: 注释关键词、旧式命名、多文件非匹配、跨行AT END、 GO TO风格、NOT =比较、变量无连字符 """ from pathlib import Path import pytest from cobol_testgen import extract_structure from hina.pipeline import classify_program from hina.classifier import detect_keyword FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "adversarial" # ── 对抗性 FP/FN 测试(使用 COBOL 样本文件)── ADVERSARIAL_TESTS = [ ("ADV-FALSE-KEY.cbl", False, "FP: WS-KEY variable but only simple ADD, should NOT trigger matching"), ("ADV-KEY-IN-COMMENT.cbl", False, "FP: KEY only in *> comments, should NOT trigger matching"), ("ADV-PREVKEY-FAKE.cbl", False, "FP: WS-PREV-KEY without matching logic, should NOT trigger"), ("ADV-OLD-SCHOOL.cbl", True, "FN: K01-KEY old-school naming, should detect matching"), ("ADV-TINY-MATCH.cbl", False, "FP: 1 file + SPACES compare is not real matching. Use WS-KEY-A = WS-KEY-B for matching."), ("ADV-CALL-MATCH.cbl", False, "FP: CALL+WS-MAST-KEY, subprogram call should win"), ("ADV-ASCII-KEY.cbl", False, "FP: ASCII+WS-KEY, encoding conversion should win"), ("ADV-10FILES.cbl", False, "FP: 10 files no KEY comparison, should NOT trigger matching"), ] @pytest.mark.parametrize( "filename,expect_matching,reason", ADVERSARIAL_TESTS, ids=[t[0].replace('.cbl','') for t in ADVERSARIAL_TESTS], ) def test_adversarial(filename, expect_matching, reason): """Adversarial test: false positive / false negative check""" path = FIXTURES / filename assert path.exists(), f"Missing: {path}" src = path.read_text("utf-8") struct = extract_structure(src) assert struct is not None result = classify_program(src) assert result is not None assert result["confidence"] >= 0 is_matching = "マッチング" in result["category"] or "二段階" in result["category"] if expect_matching: assert is_matching, ( f"{filename}: expected MATCHING but got '{result['category']}' " f"(conf={result['confidence']:.2f}). Reason: {reason}" ) else: assert not is_matching, ( f"{filename}: expected NON-MATCHING but got '{result['category']}' " f"(conf={result['confidence']:.2f}). Reason: {reason}" ) kw = detect_keyword(src) if expect_matching: assert len(kw) >= 1 or result["method"] != "rule_engine_fallback", ( f"{filename}: matching program with 0 keyword matches" ) # ── COBOL 专家 10 大攻击面测试 ── COBOL_ATTACK_SOURCES = [] def _add(name, src): COBOL_ATTACK_SOURCES.append((name, src)) _add("attack1: 跨行AT END", " IDENTIFICATION DIVISION. PROGRAM-ID. ATEND1." " ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL." " SELECT FILE-A ASSIGN TO 'A.DAT'." " SELECT FILE-B ASSIGN TO 'B.DAT'." " DATA DIVISION. FILE SECTION." " FD FILE-A. 01 REC-A PIC X(80)." " FD FILE-B. 01 REC-B PIC X(80)." " WORKING-STORAGE SECTION." " 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)." " 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'." " PROCEDURE DIVISION. MAIN." " OPEN INPUT FILE-A FILE-B." " READ FILE-A INTO REC-A" " AT END MOVE 'Y' TO WS-EOF-A." " READ FILE-B INTO REC-B" " AT END MOVE 'Y' TO WS-EOF-B." " PERFORM UNTIL WS-EOF-A = 'Y' OR WS-EOF-B = 'Y'" " IF WS-KEY-A = WS-KEY-B DISPLAY 'M'" " ELSE IF WS-KEY-A < WS-KEY-B" " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A" " ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B" " END-IF" " END-PERFORM." " CLOSE FILE-A FILE-B. STOP RUN.") _add("attack4: 无连字符WSKEY", " IDENTIFICATION DIVISION. PROGRAM-ID. NOHYF." " ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL." " SELECT FILE-A ASSIGN TO 'A.DAT'." " SELECT FILE-B ASSIGN TO 'B.DAT'." " DATA DIVISION. FILE SECTION." " FD FILE-A. 01 REC-A PIC X(80)." " FD FILE-B. 01 REC-B PIC X(80)." " WORKING-STORAGE SECTION." " 01 WSKEY1 PIC X(10). 01 WSKEY2 PIC X(10)." " 01 WSEOF1 PIC X VALUE 'N'. 01 WSEOF2 PIC X VALUE 'N'." " PROCEDURE DIVISION. MAIN." " OPEN INPUT FILE-A FILE-B." " READ FILE-A AT END MOVE 'Y' TO WSEOF1." " READ FILE-B AT END MOVE 'Y' TO WSEOF2." " PERFORM UNTIL WSEOF1 = 'Y' OR WSEOF2 = 'Y'" " IF WSKEY1 = WSKEY2 DISPLAY 'M'" " ELSE IF WSKEY1 < WSKEY2" " READ FILE-A AT END MOVE 'Y' TO WSEOF1" " ELSE READ FILE-B AT END MOVE 'Y' TO WSEOF2" " END-IF" " END-PERFORM." " CLOSE FILE-A FILE-B. STOP RUN.") _add("attack5: GO TO风格", " IDENTIFICATION DIVISION. PROGRAM-ID. GOTOM." " ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL." " SELECT FILE-A ASSIGN TO 'A.DAT'." " SELECT FILE-B ASSIGN TO 'B.DAT'." " DATA DIVISION. FILE SECTION." " FD FILE-A. 01 REC-A PIC X(80)." " FD FILE-B. 01 REC-B PIC X(80)." " WORKING-STORAGE SECTION." " 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)." " 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'." " PROCEDURE DIVISION. MAIN." " OPEN INPUT FILE-A FILE-B." " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A." " READ FILE-B AT END MOVE 'Y' TO WS-EOF-B." " LOOP." " IF WS-EOF-A = 'Y' OR WS-EOF-B = 'Y' GO TO EXIT-PGM." " IF WS-KEY-A = WS-KEY-B" " DISPLAY 'M'" " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A" " READ FILE-B AT END MOVE 'Y' TO WS-EOF-B" " ELSE IF WS-KEY-A < WS-KEY-B" " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A" " ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B" " END-IF." " GO TO LOOP." " EXIT-PGM. CLOSE FILE-A FILE-B. STOP RUN.") _add("attack10: NOT = 比较", " IDENTIFICATION DIVISION. PROGRAM-ID. NOTEQ." " ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL." " SELECT FILE-A ASSIGN TO 'A.DAT'." " SELECT FILE-B ASSIGN TO 'B.DAT'." " DATA DIVISION. FILE SECTION." " FD FILE-A. 01 REC-A PIC X(80)." " FD FILE-B. 01 REC-B PIC X(80)." " WORKING-STORAGE SECTION." " 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)." " 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'." " PROCEDURE DIVISION. MAIN." " OPEN INPUT FILE-A FILE-B." " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A." " READ FILE-B AT END MOVE 'Y' TO WS-EOF-B." " PERFORM UNTIL WS-EOF-A = 'Y' OR WS-EOF-B = 'Y'" " IF WS-KEY-A NOT = WS-KEY-B" " IF WS-KEY-A < WS-KEY-B" " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A" " ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B" " END-IF" " ELSE" " DISPLAY 'MATCH'" " READ FILE-A AT END MOVE 'Y' TO WS-EOF-A" " READ FILE-B AT END MOVE 'Y' TO WS-EOF-B" " END-IF" " END-PERFORM." " CLOSE FILE-A FILE-B. STOP RUN.") @pytest.mark.parametrize( "name,source_text", COBOL_ATTACK_SOURCES, ids=[n for n, _ in COBOL_ATTACK_SOURCES], ) def test_cobol_expert_attacks(name, source_text): """COBOL 专家攻击面测试:所有结构式匹配程序必须被正确检测""" result = classify_program(source_text) assert "マッチング" in result["category"] or "二段階" in result["category"], ( f"{name}: 漏检! got {result['category']} conf={result['confidence']:.2f}" ) assert result["confidence"] > 0.30, ( f"{name}: 确信度过低 {result['confidence']:.2f}" )