"""专项测试 — 匹配程序完整识别 (10 个程序 × 4 维度) 验证所有 10 个匹配程序在以下维度上的正确性: 1. 分类正确(マッチング/二段階/項目チェック) 2. 子类型正确(1:1/1:N/N:1/M:N/二段階/混合) 3. 分支检测正确 4. 文件数检测正确 已知缺陷(静态分析固有限制): - MT18 (M:N→M) vs MT19 (M:N→N): 运行时行为区分,静态都输出 M:N """ from pathlib import Path import pytest from cobol_testgen import extract_structure from hina.pipeline import classify_program FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "category_matching" # (filename, expected_category, expected_subtype, min_branches, min_files) MATCHING_TESTS = [ ("MT01_1TO1.cbl", "マッチング", "1:1", 4, 2), ("MT02_1TON.cbl", "マッチング", "1:N", 4, 2), ("MT03_NTO1.cbl", "マッチング", "N:1", 4, 2), ("MT16_TWO_STAGE_1TO1.cbl", "二段階マッチング", "二段階", 4, 3), ("MT17_TWO_STAGE_NTO1.cbl", "二段階マッチング", "二段階", 4, 3), ("MT18_MN_TO_M.cbl", "マッチング", "M:N", 4, 2), ("MT19_MN_TO_N.cbl", "マッチング", "M:N", 4, 2), ("MT20_MN_TO_MXN.cbl", "マッチング", "M:N→MxN", 2, 3), ("MT32_MIXED_SAME_KEY.cbl", "項目チェック(重複含む)", "混合", 4, 2), ("MT33_MIXED_DIFF_KEY.cbl", "マッチング", "混合(异键)", 4, 2), ] @pytest.mark.parametrize( "filename,exp_cat,exp_subtype,min_br,min_fl", MATCHING_TESTS, ids=[t[0].replace('.cbl','') for t in MATCHING_TESTS], ) def test_matching_classification(filename, exp_cat, exp_subtype, min_br, min_fl): """匹配程序分类 + 子类型验证""" path = FIXTURES / filename assert path.exists(), f"Missing: {path}" src = path.read_text("utf-8") # 1. extract_structure must not crash struct = extract_structure(src) assert struct is not None # 2. Branch count meets minimum assert struct["total_branches"] >= min_br, ( f"{filename}: expected >= {min_br} branches, got {struct['total_branches']}" ) # 3. File count meets minimum assert struct["file_count"] >= min_fl, ( f"{filename}: expected >= {min_fl} files, got {struct['file_count']}" ) # 4. classify_program must not crash result = classify_program(src) assert result is not None assert result["confidence"] > 0 # 5. Category must match (中文/日文编码问题用精确匹配) assert result["category"] == exp_cat, ( f"{filename}: expected category '{exp_cat}', got '{result['category']}'" ) # 6. Subtype must match actual_st = result.get("subtype", "-") assert actual_st == exp_subtype, ( f"{filename}: expected subtype '{exp_subtype}', got '{actual_st}'" ) # 7. Must NOT be fallback assert result["method"] in ("rule_engine", "keyword"), ( f"{filename}: method is '{result['method']}' (should be rule_engine or keyword)" )