feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,314 @@
+"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。
+
+覆盖路径:
+  - 路径 A: keyword confidence >= 90% -> 直接输出
+  - 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯
+  - 路径 C: keyword < 50% -> LLM 辅助
+  - 无矛盾场景
+  - orchestrator 集成契约
+  - 空源码边界
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hina import classify_program
+from hina.pipeline.pipeline import _get_best_keyword_match
+
+
+# ── _get_best_keyword_match 单元测试 ────────────────────────────────────────────
+
+
+class TestGetBestKeywordMatch:
+    def test_empty_matches(self) -> None:
+        assert _get_best_keyword_match([]) is None
+
+    def test_single_match(self) -> None:
+        result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")])
+        assert result is not None
+        assert result["category"] == "DB操作"
+        assert result["confidence"] == 0.95
+        assert result["keyword"] == "EXEC SQL"
+
+    def test_multiple_matches_picks_highest(self) -> None:
+        matches = [
+            ("子程序调用", 0.90, "CALL"),
+            ("DB操作", 0.95, "EXEC SQL"),
+            ("SORT", 0.95, "SORT ON KEY"),
+        ]
+        result = _get_best_keyword_match(matches)
+        assert result is not None
+        assert result["confidence"] == 0.95
+        # 置信度相同时取第一个最高值
+        assert "all_matches" in result
+        assert len(result["all_matches"]) == 3
+
+
+# ── classify_program 管道测试 (模拟依赖) ──────────────────────────────────────
+
+
+def _make_mock_structure(**overrides) -> dict:
+    """生成用于 mock 的标准 structure dict。"""
+    base = {
+        "total_paragraphs": 5,
+        "file_count": 2,
+        "decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}],
+        "if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0},
+        "branch_tree_obj": MagicMock(),
+        "has_call": False,
+        "has_divide": False,
+        "has_string": False,
+        "has_inspect": False,
+        "open_pattern": "sequential",
+        "select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]},
+        "variable_patterns": {
+            "has_prev_key": False,
+            "has_accumulator": False,
+            "has_error_flag": False,
+            "has_switch": False,
+            "has_index": False,
+            "has_save_area": False,
+            "has_counter": False,
+            "has_work": False,
+        },
+        "divide_constants": [],
+        "open_directions": {},
+    }
+    base.update(overrides)
+    return base
+
+
+class TestClassifyProgramPipeline:
+
+    # ── 路径 A: keyword >= 90% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_keyword_high_confidence(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 A: keyword confidence >= 90%, 直接输出关键词结果。"""
+        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["category"] == "DB操作"
+        assert result["confidence"] >= 0.0
+        assert result["method"] == "keyword"
+        assert result["source"] == "l1"
+        assert result["judgment"] in ("auto", "review")
+        assert len(result["matches"]) == 1
+        assert result["matches"][0][0] == "DB操作"
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_keyword_high_confidence_sysin(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。"""
+        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["category"] == "SYSIN"
+        assert result["confidence"] >= 0.0
+        assert result["method"] == "keyword"
+
+    # ── 路径 B: keyword 50-89% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_rule_engine(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。"""
+        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
+        mock_extract.return_value = _make_mock_structure(
+            variable_patterns={
+                "has_prev_key": True,
+                "has_accumulator": True,
+                "has_error_flag": False,
+                "has_switch": False,
+                "has_index": False,
+                "has_save_area": False,
+                "has_counter": False,
+                "has_work": False,
+            },
+            file_count=2,
+            select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]},
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["method"] in ("rule_engine", "rule_engine_fallback")
+        # 确信度应由 v2 计算给出合理的值
+        assert result["confidence"] >= 0.0
+        assert "category" in result
+        assert "resolved_types" in result
+        assert "contradictions" in result
+        assert "v2_confidence" in result
+        assert result["v2_confidence"]["base"] >= 0.0
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_rule_engine_with_contradiction(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B 变体: 规则引擎检测到矛盾并解决。"""
+        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
+        # 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾
+        mock_extract.return_value = _make_mock_structure(
+            file_count=3,
+            select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]},
+            if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2},
+            variable_patterns={
+                "has_prev_key": True,
+                "has_accumulator": True,
+                "has_error_flag": False,
+                "has_switch": False,
+                "has_index": False,
+                "has_save_area": False,
+                "has_counter": True,
+                "has_work": False,
+            },
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert "contradiction_resolution" in result
+        assert result["contradiction_resolution"]["total_count"] >= 0
+        # 即使有矛盾, 结果应该是完整的
+        assert "category" in result
+        assert result["confidence"] >= 0.0
+
+    # ── 路径 C: keyword < 50% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_llm_fallback(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 C: keyword < 50%, LLM 辅助分类。"""
+        mock_detect.return_value = []  # 无关键字匹配 -> confidence = 0
+        mock_extract.return_value = _make_mock_structure()
+
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = (
+            '{"category": "simple_sequential", "subtype": "no_branch", '
+            '"confidence": 0.88, "features": {}, "required_tests": 1, '
+            '"strategy_params": {}}'
+        )
+
+        result = classify_program("SOME COBOL SOURCE", llm=mock_llm)
+
+        assert result["method"] == "llm"
+        assert "category" in result
+        # LLM 路径应调用 LLM
+        assert mock_llm.call.called
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_llm_unavailable_fallback_to_rule_engine(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 C 兜底: LLM 不可用时退化为规则引擎。"""
+        mock_detect.return_value = []
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE", llm=None)
+
+        # 没有 LLM, 使用规则引擎兜底
+        assert result["method"] == "rule_engine_fallback"
+        assert "category" in result
+        assert result["confidence"] >= 0.0
+
+    # ── 无矛盾场景 ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_no_contradiction(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B 变体: 规则引擎处理后无矛盾。"""
+        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
+        mock_extract.return_value = _make_mock_structure(
+            # 简单的结构, 不会触发复杂混淆组
+            file_count=1,
+            select_files={"F1": ["R1"]},
+            if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
+            variable_patterns={
+                "has_prev_key": False, "has_accumulator": False,
+                "has_error_flag": False, "has_switch": False,
+                "has_index": False, "has_save_area": False,
+                "has_counter": False, "has_work": False,
+            },
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert "contradictions" in result
+        assert len(result["contradictions"]) == 0
+
+    # ── orchestrator 集成契约 ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_with_orchestrator_integration(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """验证 classify_program 输出满足 orchestrator 的集成契约。"""
+        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        # 模拟 orchestrator 的用法:
+        vr_type = result["category"]
+        vr_confidence = result["confidence"]
+        vr_debug_classification = result
+        vr_quality_warn = None
+        if result["needs_review"]:
+            vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})"
+
+        # 断言 orchestrator 需要的字段
+        assert isinstance(vr_type, str)
+        assert isinstance(vr_confidence, float)
+        assert isinstance(vr_debug_classification, dict)
+        assert 0.0 <= vr_confidence <= 1.0
+        assert isinstance(result["needs_review"], bool)
+
+        # 高确信度不需要 review
+        # needs_review depends on v2 confidence
+        assert vr_quality_warn is None or "过低" in str(vr_quality_warn)
+
+    # ── 空源码边界 ──
+
+    def test_pipeline_empty_source(self) -> None:
+        """空 COBOL 源码返回 unknown 且 needs_review=True。"""
+        result = classify_program("")
+        assert result["category"] == "unknown"
+        assert result["confidence"] == 0.0
+        assert result["needs_review"] is True
+        assert result["method"] == "none"
+        assert result["source"] == "error"
+        assert result["judgment"] == "impossible"
+
+    def test_pipeline_whitespace_source(self) -> None:
+        """纯空白源码也返回 unknown。"""
+        result = classify_program("   \n  \t  ")
+        assert result["category"] == "unknown"
+        assert result["needs_review"] is True
+
+    # ── import 验证 ──
+
+    def test_import_from_hina(self) -> None:
+        """验证 classify_program 是 hina 包唯一导出的函数。"""
+        from hina import __all__ as hina_all
+
+        assert "classify_program" in hina_all
+        assert len(hina_all) == 1  # 唯一外部入口