fix: subtype resolver + comprehensive matching program test
Fix 4 remaining defects found by adversarial testing: 1. MT03 N:1 → subtype corrected to N:1 (key suffix -M/-T heuristic) 2. MT32 混合 → subtype added (項目チェック programs with WS-PREV-KEY) 3. MT33 混合异键 → WS-ALT-KEY detection → 混合(异键) 4. MT18/MT19 → subtype M:N (correct: static cannot distinguish M:N→M vs M:N→N) Also expand subtype resolver scope: now also processes 項目チェック classified programs with matching-like characteristics (WS-PREV-KEY), not just マッチング. New test: test_matching_programs.py — 10 parametrized tests covering all 4 dimensions (category, subtype, branches, files) for every matching program. Known limitation documented: MT18 vs MT19 requires runtime data for M:N→M vs M:N→N distinction. Regression: 755 passed (10 new, 0 failures).
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
"""专项测试 — 匹配程序完整识别 (10 个程序 × 4 维度)
|
||||
|
||||
验证所有 10 个匹配程序在以下维度上的正确性:
|
||||
1. 分类正确(マッチング/二段階/項目チェック)
|
||||
2. 子类型正确(1:1/1:N/N:1/M:N/二段階/混合)
|
||||
3. 分支检测正确
|
||||
4. 文件数检测正确
|
||||
|
||||
已知缺陷(静态分析固有限制):
|
||||
- MT18 (M:N→M) vs MT19 (M:N→N): 运行时行为区分,静态都输出 M:N
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from cobol_testgen import extract_structure
|
||||
from hina.pipeline import classify_program
|
||||
|
||||
FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "category_matching"
|
||||
|
||||
# (filename, expected_category, expected_subtype, min_branches, min_files)
|
||||
MATCHING_TESTS = [
|
||||
("MT01_1TO1.cbl", "マッチング", "1:1", 4, 2),
|
||||
("MT02_1TON.cbl", "マッチング", "1:N", 4, 2),
|
||||
("MT03_NTO1.cbl", "マッチング", "N:1", 4, 2),
|
||||
("MT16_TWO_STAGE_1TO1.cbl", "二段階マッチング", "二段階", 4, 3),
|
||||
("MT17_TWO_STAGE_NTO1.cbl", "二段階マッチング", "二段階", 4, 3),
|
||||
("MT18_MN_TO_M.cbl", "マッチング", "M:N", 4, 2),
|
||||
("MT19_MN_TO_N.cbl", "マッチング", "M:N", 4, 2),
|
||||
("MT20_MN_TO_MXN.cbl", "マッチング", "M:N→MxN", 2, 3),
|
||||
("MT32_MIXED_SAME_KEY.cbl", "項目チェック(重複含む)", "混合", 4, 2),
|
||||
("MT33_MIXED_DIFF_KEY.cbl", "マッチング", "混合(异键)", 4, 2),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename,exp_cat,exp_subtype,min_br,min_fl",
|
||||
MATCHING_TESTS,
|
||||
ids=[t[0].replace('.cbl','') for t in MATCHING_TESTS],
|
||||
)
|
||||
def test_matching_classification(filename, exp_cat, exp_subtype, min_br, min_fl):
|
||||
"""匹配程序分类 + 子类型验证"""
|
||||
path = FIXTURES / filename
|
||||
assert path.exists(), f"Missing: {path}"
|
||||
src = path.read_text("utf-8")
|
||||
|
||||
# 1. extract_structure must not crash
|
||||
struct = extract_structure(src)
|
||||
assert struct is not None
|
||||
|
||||
# 2. Branch count meets minimum
|
||||
assert struct["total_branches"] >= min_br, (
|
||||
f"{filename}: expected >= {min_br} branches, got {struct['total_branches']}"
|
||||
)
|
||||
|
||||
# 3. File count meets minimum
|
||||
assert struct["file_count"] >= min_fl, (
|
||||
f"{filename}: expected >= {min_fl} files, got {struct['file_count']}"
|
||||
)
|
||||
|
||||
# 4. classify_program must not crash
|
||||
result = classify_program(src)
|
||||
assert result is not None
|
||||
assert result["confidence"] > 0
|
||||
|
||||
# 5. Category must match (中文/日文编码问题用精确匹配)
|
||||
assert result["category"] == exp_cat, (
|
||||
f"{filename}: expected category '{exp_cat}', got '{result['category']}'"
|
||||
)
|
||||
|
||||
# 6. Subtype must match
|
||||
actual_st = result.get("subtype", "-")
|
||||
assert actual_st == exp_subtype, (
|
||||
f"{filename}: expected subtype '{exp_subtype}', got '{actual_st}'"
|
||||
)
|
||||
|
||||
# 7. Must NOT be fallback
|
||||
assert result["method"] in ("rule_engine", "keyword"), (
|
||||
f"{filename}: method is '{result['method']}' (should be rule_engine or keyword)"
|
||||
)
|
||||
Reference in New Issue
Block a user