"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。

覆盖路径:
  - 路径 A: keyword confidence >= 90% -> 直接输出
  - 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯
  - 路径 C: keyword < 50% -> LLM 辅助
  - 无矛盾场景
  - orchestrator 集成契约
  - 空源码边界
"""

from __future__ import annotations

from unittest.mock import MagicMock, patch

import pytest

from hina import classify_program
from hina.pipeline.pipeline import _get_best_keyword_match


# ── _get_best_keyword_match 单元测试 ────────────────────────────────────────────


class TestGetBestKeywordMatch:
    def test_empty_matches(self) -> None:
        assert _get_best_keyword_match([]) is None

    def test_single_match(self) -> None:
        result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")])
        assert result is not None
        assert result["category"] == "DB操作"
        assert result["confidence"] == 0.95
        assert result["keyword"] == "EXEC SQL"

    def test_multiple_matches_picks_highest(self) -> None:
        matches = [
            ("子程序调用", 0.90, "CALL"),
            ("DB操作", 0.95, "EXEC SQL"),
            ("SORT", 0.95, "SORT ON KEY"),
        ]
        result = _get_best_keyword_match(matches)
        assert result is not None
        assert result["confidence"] == 0.95
        # 置信度相同时取第一个最高值
        assert "all_matches" in result
        assert len(result["all_matches"]) == 3


# ── classify_program 管道测试 (模拟依赖) ──────────────────────────────────────


def _make_mock_structure(**overrides) -> dict:
    """生成用于 mock 的标准 structure dict。"""
    base = {
        "total_paragraphs": 5,
        "file_count": 2,
        "decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}],
        "if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0},
        "branch_tree_obj": MagicMock(),
        "has_call": False,
        "has_divide": False,
        "has_string": False,
        "has_inspect": False,
        "open_pattern": "sequential",
        "select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]},
        "variable_patterns": {
            "has_prev_key": False,
            "has_accumulator": False,
            "has_error_flag": False,
            "has_switch": False,
            "has_index": False,
            "has_save_area": False,
            "has_counter": False,
            "has_work": False,
        },
        "divide_constants": [],
        "open_directions": {},
    }
    base.update(overrides)
    return base


class TestClassifyProgramPipeline:

    # ── 路径 A: keyword >= 90% ──

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_keyword_high_confidence(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 A: keyword confidence >= 90%, 直接输出关键词结果。"""
        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
        mock_extract.return_value = _make_mock_structure()

        result = classify_program("SOME COBOL SOURCE")

        assert result["category"] == "DB操作"
        assert result["confidence"] >= 0.0
        assert result["method"] == "keyword"
        assert result["source"] == "l1"
        assert result["judgment"] in ("auto", "review")
        assert len(result["matches"]) == 1
        assert result["matches"][0][0] == "DB操作"

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_keyword_high_confidence_sysin(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。"""
        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
        mock_extract.return_value = _make_mock_structure()

        result = classify_program("SOME COBOL SOURCE")

        assert result["category"] == "SYSIN"
        assert result["confidence"] >= 0.0
        assert result["method"] == "keyword"

    # ── 路径 B: keyword 50-89% ──

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_rule_engine(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。"""
        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
        mock_extract.return_value = _make_mock_structure(
            variable_patterns={
                "has_prev_key": True,
                "has_accumulator": True,
                "has_error_flag": False,
                "has_switch": False,
                "has_index": False,
                "has_save_area": False,
                "has_counter": False,
                "has_work": False,
            },
            file_count=2,
            select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]},
        )

        result = classify_program("SOME COBOL SOURCE")

        assert result["method"] in ("rule_engine", "rule_engine_fallback")
        # 确信度应由 v2 计算给出合理的值
        assert result["confidence"] >= 0.0
        assert "category" in result
        assert "resolved_types" in result
        assert "contradictions" in result
        assert "v2_confidence" in result
        assert result["v2_confidence"]["base"] >= 0.0

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_rule_engine_with_contradiction(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 B 变体: 规则引擎检测到矛盾并解决。"""
        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
        # 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾
        mock_extract.return_value = _make_mock_structure(
            file_count=3,
            select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]},
            if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2},
            variable_patterns={
                "has_prev_key": True,
                "has_accumulator": True,
                "has_error_flag": False,
                "has_switch": False,
                "has_index": False,
                "has_save_area": False,
                "has_counter": True,
                "has_work": False,
            },
        )

        result = classify_program("SOME COBOL SOURCE")

        assert "contradiction_resolution" in result
        assert result["contradiction_resolution"]["total_count"] >= 0
        # 即使有矛盾, 结果应该是完整的
        assert "category" in result
        assert result["confidence"] >= 0.0

    # ── 路径 C: keyword < 50% ──

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_llm_fallback(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 C: keyword < 50%, LLM 辅助分类。"""
        mock_detect.return_value = []  # 无关键字匹配 -> confidence = 0
        mock_extract.return_value = _make_mock_structure()

        mock_llm = MagicMock()
        mock_llm.call.return_value = (
            '{"category": "simple_sequential", "subtype": "no_branch", '
            '"confidence": 0.88, "features": {}, "required_tests": 1, '
            '"strategy_params": {}}'
        )

        result = classify_program("SOME COBOL SOURCE", llm=mock_llm)

        assert result["method"] == "llm"
        assert "category" in result
        # LLM 路径应调用 LLM
        assert mock_llm.call.called

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_llm_unavailable_fallback_to_rule_engine(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 C 兜底: LLM 不可用时退化为规则引擎。"""
        mock_detect.return_value = []
        mock_extract.return_value = _make_mock_structure()

        result = classify_program("SOME COBOL SOURCE", llm=None)

        # 没有 LLM, 使用规则引擎兜底
        assert result["method"] == "rule_engine_fallback"
        assert "category" in result
        assert result["confidence"] >= 0.0

    # ── 无矛盾场景 ──

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_no_contradiction(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """路径 B 变体: 规则引擎处理后无矛盾。"""
        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
        mock_extract.return_value = _make_mock_structure(
            # 简单的结构, 不会触发复杂混淆组
            file_count=1,
            select_files={"F1": ["R1"]},
            if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
            variable_patterns={
                "has_prev_key": False, "has_accumulator": False,
                "has_error_flag": False, "has_switch": False,
                "has_index": False, "has_save_area": False,
                "has_counter": False, "has_work": False,
            },
        )

        result = classify_program("SOME COBOL SOURCE")

        assert "contradictions" in result
        assert len(result["contradictions"]) == 0

    # ── orchestrator 集成契约 ──

    @patch("hina.pipeline.pipeline.detect_keyword")
    @patch("hina.pipeline.pipeline.extract_structure")
    def test_pipeline_with_orchestrator_integration(
        self, mock_extract: MagicMock, mock_detect: MagicMock
    ) -> None:
        """验证 classify_program 输出满足 orchestrator 的集成契约。"""
        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
        mock_extract.return_value = _make_mock_structure()

        result = classify_program("SOME COBOL SOURCE")

        # 模拟 orchestrator 的用法:
        vr_type = result["category"]
        vr_confidence = result["confidence"]
        vr_debug_classification = result
        vr_quality_warn = None
        if result["needs_review"]:
            vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})"

        # 断言 orchestrator 需要的字段
        assert isinstance(vr_type, str)
        assert isinstance(vr_confidence, float)
        assert isinstance(vr_debug_classification, dict)
        assert 0.0 <= vr_confidence <= 1.0
        assert isinstance(result["needs_review"], bool)

        # 高确信度不需要 review
        # needs_review depends on v2 confidence
        assert vr_quality_warn is None or "过低" in str(vr_quality_warn)

    # ── 空源码边界 ──

    def test_pipeline_empty_source(self) -> None:
        """空 COBOL 源码返回 unknown 且 needs_review=True。"""
        result = classify_program("")
        assert result["category"] == "unknown"
        assert result["confidence"] == 0.0
        assert result["needs_review"] is True
        assert result["method"] == "none"
        assert result["source"] == "error"
        assert result["judgment"] == "impossible"

    def test_pipeline_whitespace_source(self) -> None:
        """纯空白源码也返回 unknown。"""
        result = classify_program("   \n  \t  ")
        assert result["category"] == "unknown"
        assert result["needs_review"] is True

    # ── import 验证 ──

    def test_import_from_hina(self) -> None:
        """验证 classify_program 是 hina 包唯一导出的函数。"""
        from hina import __all__ as hina_all

        assert "classify_program" in hina_all
        assert len(hina_all) == 1  # 唯一外部入口