a784c6974a
COBOL技術者による高密度テスト(52 tests)実装: 発見・修正されたバグ: 1. WS-KEY = SPACES の figurative constant 比較が FP 原因 - _matches_key_comparison に figurative constant除外を追加 - 構造検知の信号4でも SPACES/ZERO 等を除外 - structural_matching で単一ファイルプログラムを除外 2. simple_vs_two_stage が常に単純マッチングを返していた - 実証拠なしでも0.5で返す → 他の分類を汚染 - 修正: file_count>=2 + IF + 比較証拠がない場合は unknown 3. simple_vs_two_stageテストを現実に合わせて更新 回帰: 767 passed(0 new failures) 高密度テスト: 52/52 PASS
207 lines
7.9 KiB
Python
207 lines
7.9 KiB
Python
"""对抗性测试 — COBOL 匹配分类器的假阳性/假阴性攻击
|
|
|
|
COBOL 迁移专家设计的攻击面:
|
|
- FP: 非匹配程序被误判为マッチング
|
|
- FN: 真实匹配程序未被识别
|
|
- 边界: 注释关键词、旧式命名、多文件非匹配、跨行AT END、
|
|
GO TO风格、NOT =比较、变量无连字符
|
|
"""
|
|
|
|
from pathlib import Path
|
|
import pytest
|
|
|
|
from cobol_testgen import extract_structure
|
|
from hina.pipeline import classify_program
|
|
from hina.classifier import detect_keyword
|
|
|
|
FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "adversarial"
|
|
|
|
# ── 对抗性 FP/FN 测试(使用 COBOL 样本文件)──
|
|
|
|
ADVERSARIAL_TESTS = [
|
|
("ADV-FALSE-KEY.cbl", False,
|
|
"FP: WS-KEY variable but only simple ADD, should NOT trigger matching"),
|
|
("ADV-KEY-IN-COMMENT.cbl", False,
|
|
"FP: KEY only in *> comments, should NOT trigger matching"),
|
|
("ADV-PREVKEY-FAKE.cbl", False,
|
|
"FP: WS-PREV-KEY without matching logic, should NOT trigger"),
|
|
("ADV-OLD-SCHOOL.cbl", True,
|
|
"FN: K01-KEY old-school naming, should detect matching"),
|
|
("ADV-TINY-MATCH.cbl", False,
|
|
"FP: 1 file + SPACES compare is not real matching. Use WS-KEY-A = WS-KEY-B for matching."),
|
|
("ADV-CALL-MATCH.cbl", False,
|
|
"FP: CALL+WS-MAST-KEY, subprogram call should win"),
|
|
("ADV-ASCII-KEY.cbl", False,
|
|
"FP: ASCII+WS-KEY, encoding conversion should win"),
|
|
("ADV-10FILES.cbl", False,
|
|
"FP: 10 files no KEY comparison, should NOT trigger matching"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"filename,expect_matching,reason",
|
|
ADVERSARIAL_TESTS,
|
|
ids=[t[0].replace('.cbl','') for t in ADVERSARIAL_TESTS],
|
|
)
|
|
def test_adversarial(filename, expect_matching, reason):
|
|
"""Adversarial test: false positive / false negative check"""
|
|
path = FIXTURES / filename
|
|
assert path.exists(), f"Missing: {path}"
|
|
src = path.read_text("utf-8")
|
|
|
|
struct = extract_structure(src)
|
|
assert struct is not None
|
|
|
|
result = classify_program(src)
|
|
assert result is not None
|
|
assert result["confidence"] >= 0
|
|
|
|
is_matching = "マッチング" in result["category"] or "二段階" in result["category"]
|
|
if expect_matching:
|
|
assert is_matching, (
|
|
f"{filename}: expected MATCHING but got '{result['category']}' "
|
|
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
|
)
|
|
else:
|
|
assert not is_matching, (
|
|
f"{filename}: expected NON-MATCHING but got '{result['category']}' "
|
|
f"(conf={result['confidence']:.2f}). Reason: {reason}"
|
|
)
|
|
|
|
kw = detect_keyword(src)
|
|
if expect_matching:
|
|
assert len(kw) >= 1 or result["method"] != "rule_engine_fallback", (
|
|
f"{filename}: matching program with 0 keyword matches"
|
|
)
|
|
|
|
|
|
# ── COBOL 专家 10 大攻击面测试 ──
|
|
|
|
COBOL_ATTACK_SOURCES = []
|
|
|
|
def _add(name, src):
|
|
COBOL_ATTACK_SOURCES.append((name, src))
|
|
|
|
_add("attack1: 跨行AT END",
|
|
" IDENTIFICATION DIVISION. PROGRAM-ID. ATEND1."
|
|
" ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL."
|
|
" SELECT FILE-A ASSIGN TO 'A.DAT'."
|
|
" SELECT FILE-B ASSIGN TO 'B.DAT'."
|
|
" DATA DIVISION. FILE SECTION."
|
|
" FD FILE-A. 01 REC-A PIC X(80)."
|
|
" FD FILE-B. 01 REC-B PIC X(80)."
|
|
" WORKING-STORAGE SECTION."
|
|
" 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)."
|
|
" 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'."
|
|
" PROCEDURE DIVISION. MAIN."
|
|
" OPEN INPUT FILE-A FILE-B."
|
|
" READ FILE-A INTO REC-A"
|
|
" AT END MOVE 'Y' TO WS-EOF-A."
|
|
" READ FILE-B INTO REC-B"
|
|
" AT END MOVE 'Y' TO WS-EOF-B."
|
|
" PERFORM UNTIL WS-EOF-A = 'Y' OR WS-EOF-B = 'Y'"
|
|
" IF WS-KEY-A = WS-KEY-B DISPLAY 'M'"
|
|
" ELSE IF WS-KEY-A < WS-KEY-B"
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A"
|
|
" ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B"
|
|
" END-IF"
|
|
" END-PERFORM."
|
|
" CLOSE FILE-A FILE-B. STOP RUN.")
|
|
|
|
_add("attack4: 无连字符WSKEY",
|
|
" IDENTIFICATION DIVISION. PROGRAM-ID. NOHYF."
|
|
" ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL."
|
|
" SELECT FILE-A ASSIGN TO 'A.DAT'."
|
|
" SELECT FILE-B ASSIGN TO 'B.DAT'."
|
|
" DATA DIVISION. FILE SECTION."
|
|
" FD FILE-A. 01 REC-A PIC X(80)."
|
|
" FD FILE-B. 01 REC-B PIC X(80)."
|
|
" WORKING-STORAGE SECTION."
|
|
" 01 WSKEY1 PIC X(10). 01 WSKEY2 PIC X(10)."
|
|
" 01 WSEOF1 PIC X VALUE 'N'. 01 WSEOF2 PIC X VALUE 'N'."
|
|
" PROCEDURE DIVISION. MAIN."
|
|
" OPEN INPUT FILE-A FILE-B."
|
|
" READ FILE-A AT END MOVE 'Y' TO WSEOF1."
|
|
" READ FILE-B AT END MOVE 'Y' TO WSEOF2."
|
|
" PERFORM UNTIL WSEOF1 = 'Y' OR WSEOF2 = 'Y'"
|
|
" IF WSKEY1 = WSKEY2 DISPLAY 'M'"
|
|
" ELSE IF WSKEY1 < WSKEY2"
|
|
" READ FILE-A AT END MOVE 'Y' TO WSEOF1"
|
|
" ELSE READ FILE-B AT END MOVE 'Y' TO WSEOF2"
|
|
" END-IF"
|
|
" END-PERFORM."
|
|
" CLOSE FILE-A FILE-B. STOP RUN.")
|
|
|
|
_add("attack5: GO TO风格",
|
|
" IDENTIFICATION DIVISION. PROGRAM-ID. GOTOM."
|
|
" ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL."
|
|
" SELECT FILE-A ASSIGN TO 'A.DAT'."
|
|
" SELECT FILE-B ASSIGN TO 'B.DAT'."
|
|
" DATA DIVISION. FILE SECTION."
|
|
" FD FILE-A. 01 REC-A PIC X(80)."
|
|
" FD FILE-B. 01 REC-B PIC X(80)."
|
|
" WORKING-STORAGE SECTION."
|
|
" 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)."
|
|
" 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'."
|
|
" PROCEDURE DIVISION. MAIN."
|
|
" OPEN INPUT FILE-A FILE-B."
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A."
|
|
" READ FILE-B AT END MOVE 'Y' TO WS-EOF-B."
|
|
" LOOP."
|
|
" IF WS-EOF-A = 'Y' OR WS-EOF-B = 'Y' GO TO EXIT-PGM."
|
|
" IF WS-KEY-A = WS-KEY-B"
|
|
" DISPLAY 'M'"
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A"
|
|
" READ FILE-B AT END MOVE 'Y' TO WS-EOF-B"
|
|
" ELSE IF WS-KEY-A < WS-KEY-B"
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A"
|
|
" ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B"
|
|
" END-IF."
|
|
" GO TO LOOP."
|
|
" EXIT-PGM. CLOSE FILE-A FILE-B. STOP RUN.")
|
|
|
|
_add("attack10: NOT = 比较",
|
|
" IDENTIFICATION DIVISION. PROGRAM-ID. NOTEQ."
|
|
" ENVIRONMENT DIVISION. INPUT-OUTPUT SECTION. FILE-CONTROL."
|
|
" SELECT FILE-A ASSIGN TO 'A.DAT'."
|
|
" SELECT FILE-B ASSIGN TO 'B.DAT'."
|
|
" DATA DIVISION. FILE SECTION."
|
|
" FD FILE-A. 01 REC-A PIC X(80)."
|
|
" FD FILE-B. 01 REC-B PIC X(80)."
|
|
" WORKING-STORAGE SECTION."
|
|
" 01 WS-KEY-A PIC X(10). 01 WS-KEY-B PIC X(10)."
|
|
" 01 WS-EOF-A PIC X VALUE 'N'. 01 WS-EOF-B PIC X VALUE 'N'."
|
|
" PROCEDURE DIVISION. MAIN."
|
|
" OPEN INPUT FILE-A FILE-B."
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A."
|
|
" READ FILE-B AT END MOVE 'Y' TO WS-EOF-B."
|
|
" PERFORM UNTIL WS-EOF-A = 'Y' OR WS-EOF-B = 'Y'"
|
|
" IF WS-KEY-A NOT = WS-KEY-B"
|
|
" IF WS-KEY-A < WS-KEY-B"
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A"
|
|
" ELSE READ FILE-B AT END MOVE 'Y' TO WS-EOF-B"
|
|
" END-IF"
|
|
" ELSE"
|
|
" DISPLAY 'MATCH'"
|
|
" READ FILE-A AT END MOVE 'Y' TO WS-EOF-A"
|
|
" READ FILE-B AT END MOVE 'Y' TO WS-EOF-B"
|
|
" END-IF"
|
|
" END-PERFORM."
|
|
" CLOSE FILE-A FILE-B. STOP RUN.")
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"name,source_text",
|
|
COBOL_ATTACK_SOURCES,
|
|
ids=[n for n, _ in COBOL_ATTACK_SOURCES],
|
|
)
|
|
def test_cobol_expert_attacks(name, source_text):
|
|
"""COBOL 专家攻击面测试:所有结构式匹配程序必须被正确检测"""
|
|
result = classify_program(source_text)
|
|
assert "マッチング" in result["category"] or "二段階" in result["category"], (
|
|
f"{name}: 漏检! got {result['category']} conf={result['confidence']:.2f}"
|
|
)
|
|
assert result["confidence"] > 0.30, (
|
|
f"{name}: 确信度过低 {result['confidence']:.2f}"
|
|
)
|