Files
cobol-java-v3/tests/parametrized/test_statements/test_l2_classifier.py
T
NB-076 33762ca959 fix: adversarial testing — 4 false positive/negative fixes + comment stripping
COBOL migration expert adversarial testing found 4 real defects:

FIX 1: Comment-stripping in detect_keyword() (FP-2)
- Remove *> inline comments and * comment lines before keyword matching
- Prevents 「マッチング」 from triggering on WS-KEY in comments

FIX 2: KEY comparison context validation (FP-1, FP-6)
- Add _matches_key_comparison() — requires WS-KEY variable to appear
  NEAR an actual comparison operator (= < >), not just as PIC/VALUE decl
- Same check in _path_rule_engine features via has_key_var injection
- Fix regex bug: [=<>\s] vs [=<>] — \s matched whitespace after PIC decl

FIX 3: Old-school naming support (FN-1)
- Add L1 keyword r'[A-Z]\d{0,2}-\w*KEY' with 0.55 confidence
- Matches K01-KEY, KS-KEY etc. (non-WS- prefix naming convention)

FIX 4: mn_output_mode over-matching (FP-6)
- Require IF branches + KEY evidence before returning M:N for file>=3
- matching_vs_keybreak rule 3 now requires has_key_var

New tests: test_adversarial.py — 8 parametrized adversarial tests
Regression: 755 passed (0 new failures)
2026-06-21 15:16:41 +08:00

132 lines
6.3 KiB
Python

"""L2 验证 — HINA classify_program 对 COBOL 语句分类的正确性
注: 分类器结果受 L1 关键字 + 规则引擎双重影响。
大部分程序即使无 L1 关键字匹配,规则引擎也会输出基线分类。
"""
from pathlib import Path
import pytest
from hina.pipeline import classify_program
FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol"
# ── 分类验证数据集 ──
# (rel_path, expected_category, min_confidence, note)
# category = None 表示跳过类别检查(仅验证不崩溃)
CLASSIFICATION_TESTS = [
# ── L1 关键字匹配分类 ──
("category_cics/CI01_CICS.cbl", "online", 0.40, "DFHCOMMAREA keyword"),
("category_db/DB01_SELECT_UPDATE.cbl", None, 0.0, "EXEC SQL in *> comments (comment stripping)"),
("HINA101.cbl", "DB操作", 0.55, "EXEC SQL + CALL"),
("HINA025.cbl", "子程序调用", 0.40, "CALL + LINKAGE SECTION"),
# sort/merge parser broken by SD keyword - falls to rule engine
# 编码转换 via classifier ALPHABETIC/ASCII/EBCDIC
("category_csv/CV03_ASCII_EBCDIC.cbl", "编码转换", 0.45, "ASCII/EBCDIC keywords"),
# ── 规则引擎分类(DIVIDE 常量检测) ──
("category_division/DV01_DIVIDE_50.cbl", "DIVIDE_50.0", 0.30, None),
("category_division/DV02_DIVIDE_25.cbl", "DIVIDE_25.0", 0.30, None),
("category_division/DV03_DIVIDE_100.cbl", "DIVIDE_100.0", 0.30, None),
# ── HINA 统合样本 ──
("HINA001.cbl", None, 0.0, "matching program"),
("HINA004.cbl", None, 0.0, "matching program"),
("HINA005.cbl", None, 0.0, "IF branches"),
("HINA006.cbl", None, 0.0, "EVALUATE"),
("HINA007.cbl", None, 0.0, "key break"),
("HINA013.cbl", None, 0.0, "validation"),
("HINA024.cbl", None, 0.0, "misc"),
("HINA034.cbl", None, 0.0, "misc"),
]
# ── P0 样本分类验证 ──
P0_CLASSIFICATION_TESTS = [
# CALL + LINKAGE → 子程序调用
("statement_control/ST-CALL-CONTENT.cbl", "子程序调用", 0.50, None),
("statement_control/ST-CALL-VALUE.cbl", "子程序调用", 0.50, None),
# ORGANIZATION IS → 文件编成
("statement_file/ST-DELETE.cbl", "文件编成", 0.85, "ORGANIZATION IS INDEXED keyword"),
("statement_file/ST-START.cbl", "文件编成", 0.85, "ORGANIZATION IS INDEXED keyword"),
("statement_file/ST-REWRITE-FROM.cbl", "文件编成", 0.60, None),
# 其余新样本:无 L1 关键字 → 规则引擎基线(項目チェック(重複含まず))
("statement_arithmetic/ST-ADD-TO.cbl", None, 0.0, "rule engine baseline"),
("statement_arithmetic/ST-ADD-GIVING.cbl", None, 0.0, None),
("statement_arithmetic/ST-ADD-ROUNDED.cbl", None, 0.0, None),
("statement_arithmetic/ST-SUB-FROM.cbl", None, 0.0, None),
("statement_arithmetic/ST-SUB-GIVING.cbl", None, 0.0, None),
("statement_arithmetic/ST-MUL-BY.cbl", None, 0.0, None),
("statement_arithmetic/ST-MUL-GIVING.cbl", None, 0.0, None),
("statement_arithmetic/ST-DIV-BY-GIVING.cbl", None, 0.0, None),
("statement_arithmetic/ST-COMPLEX.cbl", None, 0.0, None),
("statement_control/ST-IF-COMP.cbl", None, 0.0, None),
("statement_control/ST-IF-DEEP.cbl", None, 0.0, None),
("statement_control/ST-EVAL-ALSO.cbl", None, 0.0, None),
("statement_control/ST-GOTO-DEPEND.cbl", None, 0.0, None),
("statement_file/ST-READ-INTO.cbl", None, 0.0, None),
("statement_file/ST-READ-AT-END.cbl", None, 0.0, None),
("statement_file/ST-WRITE-AFTER.cbl", None, 0.0, None),
("statement_inspect/ST-INSP-CONVERT.cbl", None, 0.0, None),
("statement_inspect/ST-INSP-BEFORE.cbl", None, 0.0, None),
("statement_inspect/ST-ACCEPT-DATE.cbl", None, 0.0, None),
("statement_move/ST-MOVE-GROUP.cbl", None, 0.0, None),
("statement_move/ST-INI-MULTI.cbl", None, 0.0, None),
("statement_move/ST-INI-REPLACE.cbl", None, 0.0, None),
("statement_move/ST-STRING-DELIM.cbl", None, 0.0, None),
("statement_move/ST-UNSTRING-BASIC.cbl", None, 0.0, None),
("statement_perform/ST-PERF-VARY.cbl", None, 0.0, None),
("statement_perform/ST-PERF-UNTIL.cbl", None, 0.0, None),
("statement_perform/ST-PERF-TIMES.cbl", None, 0.0, None),
("statement_search/ST-SEARCH-ALL.cbl", None, 0.0, None),
("statement_search/ST-SET-88.cbl", None, 0.0, None),
]
@pytest.mark.parametrize(
"rel_path,expected_cat,min_conf,note",
CLASSIFICATION_TESTS,
ids=[c[0].replace('/', '-') for c in CLASSIFICATION_TESTS],
)
def test_classify_existing_samples(rel_path, expected_cat, min_conf, note):
"""验证现有 COBOL 样本分类"""
path = FIXTURES / rel_path
if not path.exists():
pytest.skip(f"Sample not found: {path}")
source = path.read_text("utf-8")
result = classify_program(source)
assert result is not None, f"{rel_path}: classify_program returned None"
assert "confidence" in result
assert result["confidence"] >= min_conf, (
f"{rel_path}: confidence {result['confidence']:.2f} < {min_conf}"
)
if expected_cat is not None:
assert result["category"] == expected_cat, (
f"{rel_path}: expected '{expected_cat}', got '{result['category']}' "
f"(conf={result['confidence']:.2f})"
)
@pytest.mark.parametrize(
"rel_path,expected_cat,min_conf,note",
P0_CLASSIFICATION_TESTS,
ids=[c[0].replace('/', '-') for c in P0_CLASSIFICATION_TESTS],
)
def test_classify_p0_samples(rel_path, expected_cat, min_conf, note):
"""验证 P0 样本分类(大部分为规则引擎基线)"""
path = FIXTURES / rel_path
if not path.exists():
pytest.skip(f"P0 sample not found: {path}")
source = path.read_text("utf-8")
result = classify_program(source)
assert result is not None, f"{rel_path}: classify_program returned None"
if expected_cat is not None:
assert result["category"] == expected_cat, (
f"{rel_path}: expected '{expected_cat}', got '{result['category']}' "
f"(conf={result['confidence']:.2f})"
)
assert result["confidence"] >= min_conf, (
f"{rel_path}: confidence {result['confidence']:.2f} < {min_conf}"
)