diff --git a/hina/pipeline/pipeline.py b/hina/pipeline/pipeline.py index 7e614da..428e098 100644 --- a/hina/pipeline/pipeline.py +++ b/hina/pipeline/pipeline.py @@ -429,8 +429,8 @@ def _resolve_matching_subtype( 更新后的 result,增加 "subtype" 字段。 """ category = result.get("category", "") - if "マッチング" not in category and "キーブレイク" not in category: - return result # 非匹配程序不做子类型区分 + if "マッチング" not in category and "キーブレイク" not in category and "項目チェック" not in category: + return result # 非匹配/校验程序不做子类型区分 src_upper = cobol_source.upper() import re @@ -445,12 +445,17 @@ def _resolve_matching_subtype( result["subtype"] = "M:N→MxN" return result - # 2. 混合匹配 (WS-PREV-KEY 存在) + # 2. 混合匹配 (WS-PREV-KEY 存在) — 也覆盖 項目チェック 分类 if 'WS-PREV-KEY' in src_upper: result["subtype"] = "混合" return result - # 3. 检查键变量命名模式 + # 3. WS-ALT-KEY → 混合(异键) + if 'WS-ALT-KEY' in src_upper or 'ALTERNATE' in src_upper.upper(): + result["subtype"] = "混合(异键)" + return result + + # 4. 检查键变量命名模式 key_vars = set(re.findall(r'WS-[\w-]*KEY[A-Z0-9-]*', src_upper)) # 不对称键名 → 1:N 或 N:1 (WS-MAST-KEY + WS-TRAN-KEY) @@ -460,7 +465,20 @@ def _resolve_matching_subtype( result["subtype"] = "1:N" return result - # ── 第 2 层: 静态规则+LLM 辅助 ── + # 5. 命名模式启发式: WS-KEY-M/WS-KEY-T → Master/Transaction → N:1 + # WS-KEY-A/WS-KEY-B → 对称命名 → 1:1 + # WS-KEY-M/WS-KEY-N → M:N 多文件 + key_suffixes = [k.split('-')[-1] if '-' in k else '' for k in key_vars] + if 'M' in key_suffixes and 'T' in key_suffixes: + # WS-KEY-M + WS-KEY-T → Master/Transaction → N:1 + result["subtype"] = "N:1" + return result + if 'M' in key_suffixes and 'N' in key_suffixes: + # WS-KEY-M + WS-KEY-N → M:N 多文件(无法区分 M:N→M 还是 M:N→N) + result["subtype"] = "M:N" + return result + + # ── 第 2 层: LLM 辅助 ── # 多个键变量 + 多文件 → 可能是 M:N→M 或 M:N→N,需要 LLM 分辨 needs_llm = ( len(key_vars) >= 3 or diff --git a/tests/parametrized/test_statements/test_matching_programs.py b/tests/parametrized/test_statements/test_matching_programs.py new file mode 100644 index 0000000..ab1ef0c --- /dev/null +++ b/tests/parametrized/test_statements/test_matching_programs.py @@ -0,0 +1,80 @@ +"""专项测试 — 匹配程序完整识别 (10 个程序 × 4 维度) + +验证所有 10 个匹配程序在以下维度上的正确性: +1. 分类正确(マッチング/二段階/項目チェック) +2. 子类型正确(1:1/1:N/N:1/M:N/二段階/混合) +3. 分支检测正确 +4. 文件数检测正确 + +已知缺陷(静态分析固有限制): +- MT18 (M:N→M) vs MT19 (M:N→N): 运行时行为区分,静态都输出 M:N +""" + +from pathlib import Path +import pytest + +from cobol_testgen import extract_structure +from hina.pipeline import classify_program + +FIXTURES = Path(__file__).parents[3] / "test-data" / "cobol" / "category_matching" + +# (filename, expected_category, expected_subtype, min_branches, min_files) +MATCHING_TESTS = [ + ("MT01_1TO1.cbl", "マッチング", "1:1", 4, 2), + ("MT02_1TON.cbl", "マッチング", "1:N", 4, 2), + ("MT03_NTO1.cbl", "マッチング", "N:1", 4, 2), + ("MT16_TWO_STAGE_1TO1.cbl", "二段階マッチング", "二段階", 4, 3), + ("MT17_TWO_STAGE_NTO1.cbl", "二段階マッチング", "二段階", 4, 3), + ("MT18_MN_TO_M.cbl", "マッチング", "M:N", 4, 2), + ("MT19_MN_TO_N.cbl", "マッチング", "M:N", 4, 2), + ("MT20_MN_TO_MXN.cbl", "マッチング", "M:N→MxN", 2, 3), + ("MT32_MIXED_SAME_KEY.cbl", "項目チェック(重複含む)", "混合", 4, 2), + ("MT33_MIXED_DIFF_KEY.cbl", "マッチング", "混合(异键)", 4, 2), +] + + +@pytest.mark.parametrize( + "filename,exp_cat,exp_subtype,min_br,min_fl", + MATCHING_TESTS, + ids=[t[0].replace('.cbl','') for t in MATCHING_TESTS], +) +def test_matching_classification(filename, exp_cat, exp_subtype, min_br, min_fl): + """匹配程序分类 + 子类型验证""" + path = FIXTURES / filename + assert path.exists(), f"Missing: {path}" + src = path.read_text("utf-8") + + # 1. extract_structure must not crash + struct = extract_structure(src) + assert struct is not None + + # 2. Branch count meets minimum + assert struct["total_branches"] >= min_br, ( + f"{filename}: expected >= {min_br} branches, got {struct['total_branches']}" + ) + + # 3. File count meets minimum + assert struct["file_count"] >= min_fl, ( + f"{filename}: expected >= {min_fl} files, got {struct['file_count']}" + ) + + # 4. classify_program must not crash + result = classify_program(src) + assert result is not None + assert result["confidence"] > 0 + + # 5. Category must match (中文/日文编码问题用精确匹配) + assert result["category"] == exp_cat, ( + f"{filename}: expected category '{exp_cat}', got '{result['category']}'" + ) + + # 6. Subtype must match + actual_st = result.get("subtype", "-") + assert actual_st == exp_subtype, ( + f"{filename}: expected subtype '{exp_subtype}', got '{actual_st}'" + ) + + # 7. Must NOT be fallback + assert result["method"] in ("rule_engine", "keyword"), ( + f"{filename}: method is '{result['method']}' (should be rule_engine or keyword)" + )