"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。 覆盖路径: - 路径 A: keyword confidence >= 90% -> 直接输出 - 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯 - 路径 C: keyword < 50% -> LLM 辅助 - 无矛盾场景 - orchestrator 集成契约 - 空源码边界 """ from __future__ import annotations from unittest.mock import MagicMock, patch import pytest from hina import classify_program from hina.pipeline.pipeline import _get_best_keyword_match # ── _get_best_keyword_match 单元测试 ──────────────────────────────────────────── class TestGetBestKeywordMatch: def test_empty_matches(self) -> None: assert _get_best_keyword_match([]) is None def test_single_match(self) -> None: result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")]) assert result is not None assert result["category"] == "DB操作" assert result["confidence"] == 0.95 assert result["keyword"] == "EXEC SQL" def test_multiple_matches_picks_highest(self) -> None: matches = [ ("子程序调用", 0.90, "CALL"), ("DB操作", 0.95, "EXEC SQL"), ("SORT", 0.95, "SORT ON KEY"), ] result = _get_best_keyword_match(matches) assert result is not None assert result["confidence"] == 0.95 # 置信度相同时取第一个最高值 assert "all_matches" in result assert len(result["all_matches"]) == 3 # ── classify_program 管道测试 (模拟依赖) ────────────────────────────────────── def _make_mock_structure(**overrides) -> dict: """生成用于 mock 的标准 structure dict。""" base = { "total_paragraphs": 5, "file_count": 2, "decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}], "if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0}, "branch_tree_obj": MagicMock(), "has_call": False, "has_divide": False, "has_string": False, "has_inspect": False, "open_pattern": "sequential", "select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]}, "variable_patterns": { "has_prev_key": False, "has_accumulator": False, "has_error_flag": False, "has_switch": False, "has_index": False, "has_save_area": False, "has_counter": False, "has_work": False, }, "divide_constants": [], "open_directions": {}, } base.update(overrides) return base class TestClassifyProgramPipeline: # ── 路径 A: keyword >= 90% ── @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_keyword_high_confidence( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 A: keyword confidence >= 90%, 直接输出关键词结果。""" mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")] mock_extract.return_value = _make_mock_structure() result = classify_program("SOME COBOL SOURCE") assert result["category"] == "DB操作" assert result["confidence"] >= 0.0 assert result["method"] == "keyword" assert result["source"] == "l1" assert result["judgment"] in ("auto", "review") assert len(result["matches"]) == 1 assert result["matches"][0][0] == "DB操作" @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_keyword_high_confidence_sysin( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。""" mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")] mock_extract.return_value = _make_mock_structure() result = classify_program("SOME COBOL SOURCE") assert result["category"] == "SYSIN" assert result["confidence"] >= 0.0 assert result["method"] == "keyword" # ── 路径 B: keyword 50-89% ── @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_rule_engine( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。""" mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")] mock_extract.return_value = _make_mock_structure( variable_patterns={ "has_prev_key": True, "has_accumulator": True, "has_error_flag": False, "has_switch": False, "has_index": False, "has_save_area": False, "has_counter": False, "has_work": False, }, file_count=2, select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]}, ) result = classify_program("SOME COBOL SOURCE") assert result["method"] in ("rule_engine", "rule_engine_fallback") # 确信度应由 v2 计算给出合理的值 assert result["confidence"] >= 0.0 assert "category" in result assert "resolved_types" in result assert "contradictions" in result assert "v2_confidence" in result assert result["v2_confidence"]["base"] >= 0.0 @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_rule_engine_with_contradiction( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 B 变体: 规则引擎检测到矛盾并解决。""" mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")] # 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾 mock_extract.return_value = _make_mock_structure( file_count=3, select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]}, if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2}, variable_patterns={ "has_prev_key": True, "has_accumulator": True, "has_error_flag": False, "has_switch": False, "has_index": False, "has_save_area": False, "has_counter": True, "has_work": False, }, ) result = classify_program("SOME COBOL SOURCE") assert "contradiction_resolution" in result assert result["contradiction_resolution"]["total_count"] >= 0 # 即使有矛盾, 结果应该是完整的 assert "category" in result assert result["confidence"] >= 0.0 # ── 路径 C: keyword < 50% ── @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_llm_fallback( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 C: keyword < 50%, LLM 辅助分类。""" mock_detect.return_value = [] # 无关键字匹配 -> confidence = 0 mock_extract.return_value = _make_mock_structure() mock_llm = MagicMock() mock_llm.call.return_value = ( '{"category": "simple_sequential", "subtype": "no_branch", ' '"confidence": 0.88, "features": {}, "required_tests": 1, ' '"strategy_params": {}}' ) result = classify_program("SOME COBOL SOURCE", llm=mock_llm) assert result["method"] == "llm" assert "category" in result # LLM 路径应调用 LLM assert mock_llm.call.called @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_llm_unavailable_fallback_to_rule_engine( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 C 兜底: LLM 不可用时退化为规则引擎。""" mock_detect.return_value = [] mock_extract.return_value = _make_mock_structure() result = classify_program("SOME COBOL SOURCE", llm=None) # 没有 LLM, 使用规则引擎兜底 assert result["method"] == "rule_engine_fallback" assert "category" in result assert result["confidence"] >= 0.0 # ── 无矛盾场景 ── @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_no_contradiction( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """路径 B 变体: 规则引擎处理后无矛盾。""" mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")] mock_extract.return_value = _make_mock_structure( # 简单的结构, 不会触发复杂混淆组 file_count=1, select_files={"F1": ["R1"]}, if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0}, variable_patterns={ "has_prev_key": False, "has_accumulator": False, "has_error_flag": False, "has_switch": False, "has_index": False, "has_save_area": False, "has_counter": False, "has_work": False, }, ) result = classify_program("SOME COBOL SOURCE") assert "contradictions" in result assert len(result["contradictions"]) == 0 # ── orchestrator 集成契约 ── @patch("hina.pipeline.pipeline.detect_keyword") @patch("hina.pipeline.pipeline.extract_structure") def test_pipeline_with_orchestrator_integration( self, mock_extract: MagicMock, mock_detect: MagicMock ) -> None: """验证 classify_program 输出满足 orchestrator 的集成契约。""" mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")] mock_extract.return_value = _make_mock_structure() result = classify_program("SOME COBOL SOURCE") # 模拟 orchestrator 的用法: vr_type = result["category"] vr_confidence = result["confidence"] vr_debug_classification = result vr_quality_warn = None if result["needs_review"]: vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})" # 断言 orchestrator 需要的字段 assert isinstance(vr_type, str) assert isinstance(vr_confidence, float) assert isinstance(vr_debug_classification, dict) assert 0.0 <= vr_confidence <= 1.0 assert isinstance(result["needs_review"], bool) # 高确信度不需要 review # needs_review depends on v2 confidence assert vr_quality_warn is None or "过低" in str(vr_quality_warn) # ── 空源码边界 ── def test_pipeline_empty_source(self) -> None: """空 COBOL 源码返回 unknown 且 needs_review=True。""" result = classify_program("") assert result["category"] == "unknown" assert result["confidence"] == 0.0 assert result["needs_review"] is True assert result["method"] == "none" assert result["source"] == "error" assert result["judgment"] == "impossible" def test_pipeline_whitespace_source(self) -> None: """纯空白源码也返回 unknown。""" result = classify_program(" \n \t ") assert result["category"] == "unknown" assert result["needs_review"] is True # ── import 验证 ── def test_import_from_hina(self) -> None: """验证 classify_program 是 hina 包唯一导出的函数。""" from hina import __all__ as hina_all assert "classify_program" in hina_all assert len(hina_all) == 1 # 唯一外部入口