feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,148 @@
+"""HA-01~10: HINA Agent — LLM 分类 + 回退 + 解析"""
+
+import sys, os, json
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from hina.hina_agent import (
+    classify_with_llm, _parse_llm_response, _validate_result, _fallback_classification,
+)
+
+
+class _MockLLMPass:
+    """模拟 LLM 返回正常 JSON"""
+    def call(self, msgs, retries=1):
+        return json.dumps({
+            "category": "condition_heavy",
+            "subtype": "nested_if",
+            "confidence": 0.85,
+            "features": {},
+            "required_tests": 10,
+            "strategy_params": {"max_nesting_depth": 3, "coverage_target": "branch", "file_isolation": False, "supplement_strategy": "incremental"},
+        })
+
+
+class _MockLLMEmpty:
+    def call(self, msgs, retries=1):
+        return ""
+
+
+class _MockLLMBadJSON:
+    def call(self, msgs, retries=1):
+        return "not valid json at all"
+
+
+class _MockLLMTimeout:
+    def call(self, msgs, retries=1):
+        raise Exception("httpx.TimeoutException")
+
+
+# ── HA-01: normal classify_with_llm ──
+
+def test_classify_with_llm_normal():
+    """HA-01: 有效结构体 → 返回 dict 含 category"""
+    structure = {
+        "paragraph_count": 5, "decision_count": 3, "if_count": 2,
+        "evaluate_count": 0, "file_count": 1, "open_directions": ["INPUT"],
+        "has_search_all": False, "has_call": False, "has_break": False,
+        "total_branches": 4,
+    }
+    result = classify_with_llm(structure, _MockLLMPass())
+    assert isinstance(result, dict)
+    assert "category" in result
+    assert result["category"] == "condition_heavy"
+
+
+# ── HA-02~04: LLM error handling ──
+
+def test_classify_with_llm_bad_json():
+    """HA-03: LLM 返回非法 JSON → fallback"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMBadJSON())
+    assert isinstance(result, dict)
+    assert "category" in result or "confidence" in result
+
+
+def test_classify_with_llm_empty():
+    """HA-03(同): LLM 返回空字符串 → fallback"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMEmpty())
+    assert isinstance(result, dict)
+
+
+def test_classify_with_llm_timeout():
+    """HA-04: LLM 超时 → fallback + 不崩溃"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMTimeout())
+    assert isinstance(result, dict)
+
+
+# ── HA-05~07: _parse_llm_response ──
+
+def test_parse_llm_json():
+    """HA-05: 合法 JSON → 解析成功"""
+    r = _parse_llm_response('{"category": "DB操作", "confidence": 0.95}')
+    assert r["category"] == "DB操作"
+    assert r["confidence"] == 0.95
+
+
+def test_parse_llm_invalid_json():
+    """HA-06: 非法 JSON → try/except 不崩溃"""
+    r = _parse_llm_response("暂无")
+    assert r is None or isinstance(r, dict)
+
+
+def test_parse_llm_markdown_wrapped():
+    """HA-07: 含 ```json markdown 包裹"""
+    raw = '```json\n{"category": "SORT", "confidence": 0.9}\n```'
+    r = _parse_llm_response(raw)
+    assert r is not None
+    assert r.get("category") == "SORT"
+
+
+def test_parse_llm_empty_string():
+    """空字符串 → 验证后默认 dict"""
+    r = _parse_llm_response("")
+    assert r["category"] == "unknown"
+    assert r["confidence"] == 0.0
+
+
+# ── HA-08~10: _fallback_classification ──
+
+def test_fallback_no_decision():
+    """HA-08: total_decisions=0 → simple_sequential"""
+    structure = {"decision_points": [], "file_count": 0}
+    r = _fallback_classification(structure)
+    assert r["category"] == "simple_sequential"
+
+
+def test_fallback_call():
+    """HA-09: has_call → call_based"""
+    structure = {
+        "decision_points": [{"kind": "IF"}],
+        "file_count": 0, "has_call": True, "has_search_all": False, "has_break": False,
+    }
+    r = _fallback_classification(structure)
+    assert r["category"] == "call_based"
+
+
+def test_fallback_search():
+    """HA-10: has_search_all → search_intensive"""
+    structure = {
+        "decision_points": [{"kind": "IF"}],
+        "file_count": 0, "has_call": False, "has_search_all": True, "has_break": False,
+    }
+    r = _fallback_classification(structure)
+    assert r["category"] == "search_intensive"
+
+
+# ── _validate_result ──
+
+def test_validate_valid():
+    """合法结果通过验证"""
+    r = _validate_result({"category": "condition_heavy", "confidence": 0.8, "features": {}})
+    assert isinstance(r, dict)
+
+
+def test_validate_missing_category():
+    """缺失 category → 默认 unknown"""
+    r = _validate_result({"confidence": 0.8})
+    assert r["category"] == "unknown"
@@ -0,0 +1,205 @@
+"""Deep classifier tests: keyword detection, confidence boundaries, edge cases"""
+
+import sys, os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+
+from hina.classifier import detect_keyword, compute_confidence
+
+
+# ── 1. detect_keyword with SQL + SORT + CALL all present ──
+
+def test_detect_keyword_multiple_matches():
+    """Source with SQL, SORT and CALL keywords → multiple matches with correct confidence ranking"""
+    source = """
+       IDENTIFICATION DIVISION.
+       PROGRAM-ID. TESTPGM.
+       DATA DIVISION.
+       WORKING-STORAGE SECTION.
+       01 WS-A PIC X(100).
+       PROCEDURE DIVISION.
+           EXEC SQL
+               SELECT * FROM TABLE
+           END-EXEC.
+           SORT ON KEY WS-KEY.
+           CALL 'SUBPGM'.
+           STOP RUN.
+    """
+    results = detect_keyword(source)
+
+    categories = {r[0] for r in results}
+    assert "DB操作" in categories    # EXEC SQL → 0.95
+    assert "SORT" in categories       # SORT ON KEY → 0.95
+    assert "子程序调用" in categories  # CALL → 0.90
+
+    # Verify confidence values per match
+    cat_map = {r[0]: (r[1], r[2]) for r in results}
+    assert cat_map["DB操作"][0] == 0.95
+    assert cat_map["DB操作"][1] == "EXEC SQL"
+    assert cat_map["SORT"][0] == 0.95
+    assert cat_map["SORT"][1] == "SORT ON KEY"
+    assert cat_map["子程序调用"][0] == 0.90
+    assert cat_map["子程序调用"][1] == "CALL"
+
+
+# ── 2. compute_confidence with hybrid (keyword + LLM) result ──
+
+def test_compute_confidence_hybrid():
+    """Keyword match below 0.90 threshold + LLM result → method=hybrid, uses LLM category"""
+    # "WRITE AFTER" matches "编辑输出" with confidence 0.80 (< 0.90)
+    source = "WRITE AFTER ADVANCING 1 LINE."
+    llm_result = {"category": "output_heavy", "confidence": 0.75}
+
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["method"] == "hybrid"
+    assert result["source"] == "llm"
+    assert result["category"] == "output_heavy"
+    assert result["confidence"] == 0.75
+    # Keyword matches are still attached to the result
+    assert len(result["matches"]) > 0
+    assert any("WRITE AFTER" in str(m) for m in result["matches"])
+
+
+def test_compute_confidence_keyword_high_confidence_overrides_llm():
+    """Keyword match >= 0.90 → keyword method wins, LLM ignored"""
+    # "EXEC SQL" matches "DB操作" with confidence 0.95 (>= 0.90)
+    source = "EXEC SQL SELECT * FROM TABLE"
+    llm_result = {"category": "something_else", "confidence": 0.50}
+
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["method"] == "keyword"
+    assert result["source"] == "l1"
+    assert result["category"] == "DB操作"
+    assert result["confidence"] == 0.95
+
+
+# ── 3. compute_confidence boundaries: 0.0, 0.69, 0.70, 0.71, 1.0 ──
+
+def test_confidence_boundary_zero():
+    """No keyword match, no LLM → category=unknown, confidence=0.0"""
+    source = "       MOVE 1 TO A.\n       ADD 1 TO B.\n       STOP RUN."
+    result = compute_confidence(source, llm_result=None)
+
+    assert result["category"] == "unknown"
+    assert result["confidence"] == 0.0
+    assert result["method"] == "none"
+    assert result["matches"] == []
+
+
+def test_confidence_boundary_069():
+    """LLM result with confidence 0.69 (below 0.70 boundary)"""
+    source = "       MOVE 1 TO A."
+    llm_result = {"category": "custom_category", "confidence": 0.69}
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["category"] == "custom_category"
+    assert result["confidence"] == 0.69
+    assert result["method"] == "hybrid"
+
+
+def test_confidence_boundary_070():
+    """LLM result with confidence 0.70 (at 0.70 boundary)"""
+    source = "       MOVE 1 TO A."
+    llm_result = {"category": "custom_category", "confidence": 0.70}
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["category"] == "custom_category"
+    assert result["confidence"] == 0.70
+    assert result["method"] == "hybrid"
+
+
+def test_confidence_boundary_071():
+    """LLM result with confidence 0.71 (above 0.70 boundary)"""
+    source = "       MOVE 1 TO A."
+    llm_result = {"category": "custom_category", "confidence": 0.71}
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["category"] == "custom_category"
+    assert result["confidence"] == 0.71
+    assert result["method"] == "hybrid"
+
+
+def test_confidence_boundary_max():
+    """LLM result with confidence 1.0"""
+    source = "       MOVE 1 TO A."
+    llm_result = {"category": "perfect", "confidence": 1.0}
+    result = compute_confidence(source, llm_result=llm_result)
+
+    assert result["category"] == "perfect"
+    assert result["confidence"] == 1.0
+    assert result["method"] == "hybrid"
+
+
+# ── 4. Keyword source text with mixed case, extra whitespace, inline comments ──
+
+def test_detect_keyword_mixed_case_whitespace_comments():
+    """Source with mixed case, inline *> comments"""
+    source = """
+       IDENTIFICATION DIVISION.
+           ExEc Sql
+               SELECT * FROM TABLE
+           END-EXEC.   *> inline comment
+           Call 'SUBPGM'   *> some comment
+           Sort On Key WS-KEY.
+    """
+    results = detect_keyword(source)
+
+    categories = {r[0] for r in results}
+    assert "DB操作" in categories      # EXEC SQL (mixed case)
+    assert "子程序调用" in categories   # CALL (mixed case)
+    assert "SORT" in categories         # SORT ON KEY (mixed case)
+
+    # Verify matched keywords were found (function uppercases source)
+    matched_keywords = {r[2] for r in results}
+    assert "EXEC SQL" in matched_keywords
+    assert "CALL" in matched_keywords
+    assert "SORT ON KEY" in matched_keywords
+
+
+# ── 5. No keyword match and no LLM result → unknown ──
+
+def test_detect_keyword_no_match():
+    """Source with no known keywords → empty list"""
+    source = "       MOVE 1 TO A.\n       ADD 1 TO B.\n       STOP RUN."
+    results = detect_keyword(source)
+    assert len(results) == 0
+
+
+def test_compute_confidence_no_match_no_llm():
+    """No keyword match and no LLM → category=unknown, confidence=0, method=none"""
+    source = "       MOVE 1 TO A.\n       ADD 1 TO B.\n       STOP RUN."
+    result = compute_confidence(source, llm_result=None)
+
+    assert result["category"] == "unknown"
+    assert result["confidence"] == 0.0
+    assert result["method"] == "none"
+    assert result["source"] == "unknown"
+    assert result["matches"] == []
+
+
+# ── Additional: verify L1_RULES via detect_keyword ──
+
+def test_detect_keyword_all_rules():
+    """Each L1_RULE category is detectable from a representative keyword"""
+    test_cases = [
+        ("EXEC SQL", "DB操作"),
+        ("CALL", "子程序调用"),
+        ("IS INITIAL", "IS INITIAL"),
+        ("SYSIN", "SYSIN"),
+        ("ALPHABETIC", "编码转换"),
+        ("DFHCOMMAREA", "online"),
+        ("MAP", "online"),
+        ("SORT ON KEY", "SORT"),
+        ("MERGE ON KEY", "MERGE"),
+        ("WRITE AFTER", "编辑输出"),
+        ("WRITE BEFORE", "编辑输出"),
+        ("ORGANIZATION IS", "文件编成"),
+        ("ALTERNATE RECORD KEY", "替代索引"),
+    ]
+    for keyword, expected_category in test_cases:
+        source = f"       {keyword}  DUMMY."
+        results = detect_keyword(source)
+        categories = {r[0] for r in results}
+        assert expected_category in categories, \
+            f"Keyword '{keyword}' should trigger category '{expected_category}', got {categories}"
@@ -0,0 +1,354 @@
+"""测试: 确信度 4 因子计算 + 质量门禁评分 + 覆盖率比较"""
+
+import pytest
+from hina.confidence import compute_confidence_v2
+from hina.gate import compute_quality_score, check as gate_check
+from coverage.compare_coverage import compare_coverage
+
+
+# ── compute_confidence_v2 判定阈值测试 ──
+
+
+def test_auto_judgment():
+    """确信度 >= 0.90 → auto"""
+    keyword_result = {
+        "base_confidence": 1.0,
+        "match_count": 3,
+    }
+    structure_features = {"structure_match_score": 5}
+    result = compute_confidence_v2(keyword_result, structure_features)
+    # 1.0 × 1.0 × 1.0 × 1.0 = 1.0
+    assert result["confidence"] == 1.0
+    assert result["judgment"] == "auto"
+    assert result["needs_review"] is False
+
+
+def test_review_judgment():
+    """确信度 0.70-0.89 → review"""
+    # Need 0.70 <= confidence < 0.90
+    # base=1.0, context=0.95, consistency=1.0, structure=0.7 → 0.665 → still manual
+    # base=1.0, context=1.0, consistency=0.9, structure=0.85... hmm structure is discrete
+    # Let's try: base=0.95, context=1.0, consistency=1.0, structure=0.7 → 0.665 (manual)
+    # base=0.95, context=0.95(match=2), consistency=1.0, structure=0.7 → 0.63175 (manual)
+    # base=0.95, context=1.0, consistency=0.90, structure=1.0 → 0.855 (review!)
+    keyword_result = {
+        "base_confidence": 0.95,
+        "match_count": 3,
+    }
+    structure_features = {"structure_match_score": 5}
+    contradictions = [
+        {"type": "type_mismatch", "resolved": True},
+    ]
+    result = compute_confidence_v2(
+        keyword_result, structure_features,
+        contradictions=contradictions,
+    )
+    # 0.95 × 1.0 × 0.90 × 1.0 = 0.855
+    assert 0.70 <= result["confidence"] < 0.90
+    assert result["judgment"] == "review"
+    assert result["needs_review"] is True
+
+
+def test_manual_judgment():
+    """确信度 0.50-0.69 → manual"""
+    keyword_result = {
+        "base_confidence": 0.95,
+        "match_count": 1,
+    }
+    structure_features = {"structure_match_score": 4}
+    contradictions = [
+        {"type": "type_mismatch", "resolved": True},
+    ]
+    result = compute_confidence_v2(
+        keyword_result, structure_features,
+        contradictions=contradictions,
+    )
+    # 0.95 × 0.90 × 0.90 × 0.7 = 0.53865
+    assert 0.50 <= result["confidence"] < 0.70
+    assert result["judgment"] == "manual"
+    assert result["needs_review"] is True
+
+
+def test_impossible_judgment():
+    """确信度 < 0.50 → impossible"""
+    keyword_result = {
+        "base_confidence": 0.7,
+        "match_count": 0,
+    }
+    structure_features = {"structure_match_score": 0}
+    result = compute_confidence_v2(keyword_result, structure_features)
+    # 0.7 × 0.50 × 1.0 × 0.3 = 0.105
+    assert result["confidence"] < 0.50
+    assert result["judgment"] == "impossible"
+    assert result["needs_review"] is True
+
+
+# ── 因子边界测试 ──
+
+
+def test_context_factor_match_counts():
+    """关键字匹配数对上下文因子的影响"""
+    # match_count >= 3 → context_factor = 1.0
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 5},
+        {"structure_match_score": 5},
+    )
+    assert r["context_factor"] == 1.0
+    assert r["confidence"] == 1.0
+
+    # match_count == 2 → context_factor = 0.95
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 2},
+        {"structure_match_score": 5},
+    )
+    assert r["context_factor"] == 0.95
+    assert r["confidence"] == 0.95
+
+    # match_count == 1 → context_factor = 0.90
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 1},
+        {"structure_match_score": 5},
+    )
+    assert r["context_factor"] == 0.90
+    assert r["confidence"] == 0.90
+
+    # match_count == 0 → context_factor = 0.50
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 0},
+        {"structure_match_score": 5},
+    )
+    assert r["context_factor"] == 0.50
+    assert r["confidence"] == 0.50
+
+
+def test_consistency_factor_contradictions():
+    """矛盾数量对一致性因子的影响"""
+    # 无矛盾 → 1.0
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 5},
+        contradictions=[],
+    )
+    assert r["consistency_factor"] == 1.0
+
+    # 已解决 → 0.90
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 5},
+        contradictions=[{"type": "t1", "resolved": True}],
+    )
+    assert r["consistency_factor"] == 0.90
+
+    # 未解决 < 3 → 0.80
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 5},
+        contradictions=[{"type": "t1", "resolved": False}],
+    )
+    assert r["consistency_factor"] == 0.80
+
+    # ≥3 未解决 → 0.50
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 5},
+        contradictions=[
+            {"type": "t1", "resolved": False},
+            {"type": "t2", "resolved": False},
+            {"type": "t3", "resolved": True},
+        ],
+    )
+    assert r["consistency_factor"] == 0.50
+
+
+def test_structure_factor_scores():
+    """结构匹配度对结构一致性因子的影响"""
+    # 5/5 → 1.0
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 5},
+    )
+    assert r["structure_factor"] == 1.0
+
+    # 3-4/5 → 0.7
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 3},
+    )
+    assert r["structure_factor"] == 0.7
+
+    # 1-2/5 → 0.5
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 1},
+    )
+    assert r["structure_factor"] == 0.5
+
+    # 无法/0 → 0.3
+    r = compute_confidence_v2(
+        {"base_confidence": 1.0, "match_count": 3},
+        {"structure_match_score": 0},
+    )
+    assert r["structure_factor"] == 0.3
+
+
+def test_base_confidence_default():
+    """keyword_result 未提供 base_confidence 时使用默认值 0.7"""
+    r = compute_confidence_v2(
+        {"match_count": 3},
+        {"structure_match_score": 5},
+    )
+    assert r["base"] == 0.7
+
+
+# ── compute_quality_score 双模式测试 ──
+
+
+def test_quality_score_no_gcov():
+    """gcov 未启用模式: branch_rate×0.5 + paragraph_rate×0.5 + confidence×0.4"""
+    static_cov = {
+        "branch_rate": 0.80,
+        "paragraph_rate": 0.90,
+    }
+    score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.5)
+    # 0.80×0.5 + 0.90×0.5 + 0.5×0.4 = 0.40 + 0.45 + 0.20 = 1.05 → min(1.0, 1.05) = 1.0
+    assert score == 1.0
+
+
+def test_quality_score_no_gcov_sub_max():
+    """gcov 未启用模式，确保不超过 1.0 被 clamp"""
+    static_cov = {
+        "branch_rate": 0.60,
+        "paragraph_rate": 0.70,
+    }
+    score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.8)
+    # 0.60×0.5 + 0.70×0.5 + 0.8×0.4 = 0.30 + 0.35 + 0.32 = 0.97
+    assert score == 0.97
+
+
+def test_quality_score_with_gcov():
+    """gcov 启用模式: static_cov×0.3 + gcov_cov×0.4 + confidence×0.3"""
+    static_cov = {
+        "branch_rate": 0.80,
+        "paragraph_rate": 0.90,
+    }
+    gcov_cov = {"gcov_cov": 0.75}
+    score = compute_quality_score(static_cov, gcov_cov, confidence=0.5)
+    # static_cov = 0.80×0.5 + 0.90×0.5 = 0.85
+    # score = 0.85×0.3 + 0.75×0.4 + 0.5×0.3 = 0.255 + 0.30 + 0.15 = 0.705
+    assert score == 0.705
+
+
+def test_quality_score_with_gcov_zero_confidence():
+    """gcov 启用模式，置信度为 0"""
+    static_cov = {
+        "branch_rate": 1.0,
+        "paragraph_rate": 1.0,
+    }
+    gcov_cov = {"gcov_cov": 0.5}
+    score = compute_quality_score(static_cov, gcov_cov, confidence=0.0)
+    # static_cov = 1.0
+    # score = 1.0×0.3 + 0.5×0.4 + 0.0×0.3 = 0.30 + 0.20 + 0.0 = 0.50
+    assert score == 0.50
+
+
+# ── compare_coverage 基本功能测试 ──
+
+
+def test_compare_coverage_basic():
+    """compare_coverage 基本功能"""
+    static = {
+        "branch_rate": 0.90,
+        "paragraph_rate": 0.85,
+        "total_branches": 20,
+        "covered_branches": 18,
+    }
+    dynamic = {
+        "gcov_cov": 0.75,
+        "covered_branches": 15,
+        "total_branches": 20,
+        "misleading_branches": ["BR001", "BR003"],
+    }
+    result = compare_coverage("TESTPROG", static, dynamic)
+    assert result["program"] == "TESTPROG"
+    assert result["static"]["branch_rate"] == 0.90
+    assert result["static"]["paragraph_rate"] == 0.85
+    assert result["dynamic"]["gcov_cov"] == 0.75
+    # gap = (0.90×0.5 + 0.85×0.5) - 0.75 = 0.875 - 0.75 = 0.125
+    assert result["gap"] == 0.125
+    assert result["misleading_branches"] == ["BR001", "BR003"]
+
+
+def test_compare_coverage_no_gap():
+    """静态与动态完全一致时 gap 为 0"""
+    static = {
+        "branch_rate": 0.80,
+        "paragraph_rate": 0.80,
+        "total_branches": 10,
+        "covered_branches": 8,
+    }
+    dynamic = {
+        "gcov_cov": 0.80,
+        "covered_branches": 8,
+        "total_branches": 10,
+        "misleading_branches": [],
+    }
+    result = compare_coverage("NOGAP", static, dynamic)
+    # gap = (0.80×0.5 + 0.80×0.5) - 0.80 = 0.80 - 0.80 = 0.0
+    assert result["gap"] == 0.0
+    assert result["misleading_branches"] == []
+
+
+def test_compare_coverage_no_misleading():
+    """没有误导分支时的返回"""
+    static = {
+        "branch_rate": 0.95,
+        "paragraph_rate": 1.0,
+    }
+    dynamic = {
+        "gcov_cov": 0.90,
+        "misleading_branches": [],
+    }
+    result = compare_coverage("CLEAN", static, dynamic)
+    # gap = (0.95×0.5 + 1.0×0.5) - 0.90 = 0.975 - 0.90 = 0.075
+    assert result["gap"] == 0.075
+    assert result["misleading_branches"] == []
+
+
+# ── gate.check 基本功能测试 ──
+
+
+def test_gate_check_passed():
+    """质量门禁完全通过"""
+    result = gate_check(
+        complete_tests=[{"id": 1}],
+        hina_result={},
+        coverage={"branch_rate": 1.0, "paragraph_rate": 1.0},
+    )
+    assert result["passed"] is True
+    assert len(result["issues"]) == 0
+
+
+def test_gate_check_failed_branch():
+    """分支覆盖率不足"""
+    result = gate_check(
+        complete_tests=[{"id": 1}],
+        hina_result={},
+        coverage={
+            "branch_rate": 0.50,
+            "paragraph_rate": 1.0,
+            "uncovered_decision_ids": [1, 2],
+        },
+    )
+    assert result["passed"] is False
+    assert "decision_gaps" in result["issues"]
+
+
+def test_gate_check_no_data():
+    """无测试数据"""
+    result = gate_check(
+        complete_tests=[],
+        hina_result={},
+        coverage={"branch_rate": 1.0, "paragraph_rate": 1.0},
+    )
+    assert result["passed"] is False
+    assert "no_data" in result["issues"]
@@ -0,0 +1,35 @@
+"""GC-01~03: gcov_collector — COBOL 覆盖率采集"""
+
+import sys, os, tempfile
+from pathlib import Path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from hina.gcov_collector import collect_gcov
+
+
+def test_gcov_not_installed():
+    """GC-01: cobc 不在 PATH → available=False"""
+    # Use a temp dir that won't have .gcda/.gcno files
+    with tempfile.TemporaryDirectory() as tmp:
+        work = Path(tmp)
+        result = collect_gcov(work / "program.cbl", work)
+        assert isinstance(result, dict)
+        # available should be False or result has a status field
+        assert not result.get("available", True) or "reason" in result
+
+
+def test_gcov_no_data():
+    """GC-02: 无 .gcda/.gcno → available=False"""
+    with tempfile.TemporaryDirectory() as tmp:
+        cobol_src = Path(tmp) / "test.cbl"
+        cobol_src.write_text("PROGRAM-ID. TEST.")
+        result = collect_gcov(cobol_src, Path(tmp))
+        assert result.get("available") is False
+        assert "reason" in result
+
+
+def test_gcov_result_structure():
+    """返回的 dict 包含必要字段"""
+    with tempfile.TemporaryDirectory() as tmp:
+        result = collect_gcov(Path(tmp) / "nope.cbl", Path(tmp))
+        assert "available" in result
+        assert "reason" in result or "line_rate" in result
@@ -0,0 +1,314 @@
+"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。
+
+覆盖路径:
+  - 路径 A: keyword confidence >= 90% -> 直接输出
+  - 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯
+  - 路径 C: keyword < 50% -> LLM 辅助
+  - 无矛盾场景
+  - orchestrator 集成契约
+  - 空源码边界
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hina import classify_program
+from hina.pipeline.pipeline import _get_best_keyword_match
+
+
+# ── _get_best_keyword_match 单元测试 ────────────────────────────────────────────
+
+
+class TestGetBestKeywordMatch:
+    def test_empty_matches(self) -> None:
+        assert _get_best_keyword_match([]) is None
+
+    def test_single_match(self) -> None:
+        result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")])
+        assert result is not None
+        assert result["category"] == "DB操作"
+        assert result["confidence"] == 0.95
+        assert result["keyword"] == "EXEC SQL"
+
+    def test_multiple_matches_picks_highest(self) -> None:
+        matches = [
+            ("子程序调用", 0.90, "CALL"),
+            ("DB操作", 0.95, "EXEC SQL"),
+            ("SORT", 0.95, "SORT ON KEY"),
+        ]
+        result = _get_best_keyword_match(matches)
+        assert result is not None
+        assert result["confidence"] == 0.95
+        # 置信度相同时取第一个最高值
+        assert "all_matches" in result
+        assert len(result["all_matches"]) == 3
+
+
+# ── classify_program 管道测试 (模拟依赖) ──────────────────────────────────────
+
+
+def _make_mock_structure(**overrides) -> dict:
+    """生成用于 mock 的标准 structure dict。"""
+    base = {
+        "total_paragraphs": 5,
+        "file_count": 2,
+        "decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}],
+        "if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0},
+        "branch_tree_obj": MagicMock(),
+        "has_call": False,
+        "has_divide": False,
+        "has_string": False,
+        "has_inspect": False,
+        "open_pattern": "sequential",
+        "select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]},
+        "variable_patterns": {
+            "has_prev_key": False,
+            "has_accumulator": False,
+            "has_error_flag": False,
+            "has_switch": False,
+            "has_index": False,
+            "has_save_area": False,
+            "has_counter": False,
+            "has_work": False,
+        },
+        "divide_constants": [],
+        "open_directions": {},
+    }
+    base.update(overrides)
+    return base
+
+
+class TestClassifyProgramPipeline:
+
+    # ── 路径 A: keyword >= 90% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_keyword_high_confidence(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 A: keyword confidence >= 90%, 直接输出关键词结果。"""
+        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["category"] == "DB操作"
+        assert result["confidence"] >= 0.0
+        assert result["method"] == "keyword"
+        assert result["source"] == "l1"
+        assert result["judgment"] in ("auto", "review")
+        assert len(result["matches"]) == 1
+        assert result["matches"][0][0] == "DB操作"
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_keyword_high_confidence_sysin(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。"""
+        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["category"] == "SYSIN"
+        assert result["confidence"] >= 0.0
+        assert result["method"] == "keyword"
+
+    # ── 路径 B: keyword 50-89% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_rule_engine(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。"""
+        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
+        mock_extract.return_value = _make_mock_structure(
+            variable_patterns={
+                "has_prev_key": True,
+                "has_accumulator": True,
+                "has_error_flag": False,
+                "has_switch": False,
+                "has_index": False,
+                "has_save_area": False,
+                "has_counter": False,
+                "has_work": False,
+            },
+            file_count=2,
+            select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]},
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert result["method"] in ("rule_engine", "rule_engine_fallback")
+        # 确信度应由 v2 计算给出合理的值
+        assert result["confidence"] >= 0.0
+        assert "category" in result
+        assert "resolved_types" in result
+        assert "contradictions" in result
+        assert "v2_confidence" in result
+        assert result["v2_confidence"]["base"] >= 0.0
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_rule_engine_with_contradiction(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B 变体: 规则引擎检测到矛盾并解决。"""
+        mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
+        # 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾
+        mock_extract.return_value = _make_mock_structure(
+            file_count=3,
+            select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]},
+            if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2},
+            variable_patterns={
+                "has_prev_key": True,
+                "has_accumulator": True,
+                "has_error_flag": False,
+                "has_switch": False,
+                "has_index": False,
+                "has_save_area": False,
+                "has_counter": True,
+                "has_work": False,
+            },
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert "contradiction_resolution" in result
+        assert result["contradiction_resolution"]["total_count"] >= 0
+        # 即使有矛盾, 结果应该是完整的
+        assert "category" in result
+        assert result["confidence"] >= 0.0
+
+    # ── 路径 C: keyword < 50% ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_llm_fallback(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 C: keyword < 50%, LLM 辅助分类。"""
+        mock_detect.return_value = []  # 无关键字匹配 -> confidence = 0
+        mock_extract.return_value = _make_mock_structure()
+
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = (
+            '{"category": "simple_sequential", "subtype": "no_branch", '
+            '"confidence": 0.88, "features": {}, "required_tests": 1, '
+            '"strategy_params": {}}'
+        )
+
+        result = classify_program("SOME COBOL SOURCE", llm=mock_llm)
+
+        assert result["method"] == "llm"
+        assert "category" in result
+        # LLM 路径应调用 LLM
+        assert mock_llm.call.called
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_llm_unavailable_fallback_to_rule_engine(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 C 兜底: LLM 不可用时退化为规则引擎。"""
+        mock_detect.return_value = []
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE", llm=None)
+
+        # 没有 LLM, 使用规则引擎兜底
+        assert result["method"] == "rule_engine_fallback"
+        assert "category" in result
+        assert result["confidence"] >= 0.0
+
+    # ── 无矛盾场景 ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_no_contradiction(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """路径 B 变体: 规则引擎处理后无矛盾。"""
+        mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
+        mock_extract.return_value = _make_mock_structure(
+            # 简单的结构, 不会触发复杂混淆组
+            file_count=1,
+            select_files={"F1": ["R1"]},
+            if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
+            variable_patterns={
+                "has_prev_key": False, "has_accumulator": False,
+                "has_error_flag": False, "has_switch": False,
+                "has_index": False, "has_save_area": False,
+                "has_counter": False, "has_work": False,
+            },
+        )
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        assert "contradictions" in result
+        assert len(result["contradictions"]) == 0
+
+    # ── orchestrator 集成契约 ──
+
+    @patch("hina.pipeline.pipeline.detect_keyword")
+    @patch("hina.pipeline.pipeline.extract_structure")
+    def test_pipeline_with_orchestrator_integration(
+        self, mock_extract: MagicMock, mock_detect: MagicMock
+    ) -> None:
+        """验证 classify_program 输出满足 orchestrator 的集成契约。"""
+        mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
+        mock_extract.return_value = _make_mock_structure()
+
+        result = classify_program("SOME COBOL SOURCE")
+
+        # 模拟 orchestrator 的用法:
+        vr_type = result["category"]
+        vr_confidence = result["confidence"]
+        vr_debug_classification = result
+        vr_quality_warn = None
+        if result["needs_review"]:
+            vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})"
+
+        # 断言 orchestrator 需要的字段
+        assert isinstance(vr_type, str)
+        assert isinstance(vr_confidence, float)
+        assert isinstance(vr_debug_classification, dict)
+        assert 0.0 <= vr_confidence <= 1.0
+        assert isinstance(result["needs_review"], bool)
+
+        # 高确信度不需要 review
+        # needs_review depends on v2 confidence
+        assert vr_quality_warn is None or "过低" in str(vr_quality_warn)
+
+    # ── 空源码边界 ──
+
+    def test_pipeline_empty_source(self) -> None:
+        """空 COBOL 源码返回 unknown 且 needs_review=True。"""
+        result = classify_program("")
+        assert result["category"] == "unknown"
+        assert result["confidence"] == 0.0
+        assert result["needs_review"] is True
+        assert result["method"] == "none"
+        assert result["source"] == "error"
+        assert result["judgment"] == "impossible"
+
+    def test_pipeline_whitespace_source(self) -> None:
+        """纯空白源码也返回 unknown。"""
+        result = classify_program("   \n  \t  ")
+        assert result["category"] == "unknown"
+        assert result["needs_review"] is True
+
+    # ── import 验证 ──
+
+    def test_import_from_hina(self) -> None:
+        """验证 classify_program 是 hina 包唯一导出的函数。"""
+        from hina import __all__ as hina_all
+
+        assert "classify_program" in hina_all
+        assert len(hina_all) == 1  # 唯一外部入口
@@ -0,0 +1,115 @@
+"""RH-01~07: Retry Handler — 分层重试 + heal/simple 分离"""
+
+import sys, os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from hina.retry import RetryHandler, HEALING_FIXES
+from data.diff_result import VerificationRun
+
+
+def _vr(status="PASS", build_log=""):
+    vr = VerificationRun(status=status, program="TEST")
+    if build_log:
+        vr.debug = {"cobol_build": {"log": build_log}}
+    return vr
+
+
+def test_immediate_pass():
+    """RH-01: 1次 PASS → heal=0, simple=0"""
+    h = RetryHandler()
+    vr = h.run(lambda: _vr("PASS"))
+    assert vr.status == "PASS"
+    assert vr.heal_retry == 0
+    assert vr.simple_retry == 0
+
+
+def test_heal_recovery():
+    """RH-02: BLOCKED(not found) → heal修复→PASS"""
+    calls = [0]
+    def fn():
+        calls[0] += 1
+        if calls[0] == 1:
+            return _vr("BLOCKED", build_log="file not found: libcob.so")
+        return _vr("PASS")
+    h = RetryHandler()
+    vr = h.run(fn)
+    assert vr.status == "PASS"
+    assert vr.heal_retry >= 1
+    assert vr.simple_retry == 0
+
+
+def test_simple_retry():
+    """RH-03: BLOCKED→重试→PASS (无 heal 匹配)"""
+    calls = [0]
+    def fn():
+        calls[0] += 1
+        if calls[0] == 1:
+            return _vr("BLOCKED", build_log="some random error")
+        return _vr("PASS")
+    h = RetryHandler()
+    vr = h.run(fn)
+    assert vr.status == "PASS"
+    assert vr.simple_retry >= 1
+
+
+def test_max_retries_exceeded():
+    """RH-04: 全部失败 → FATAL"""
+    h = RetryHandler(max_heal=1, max_simple=1)
+    vr = h.run(lambda: _vr("BLOCKED"))
+    assert vr.status == "FATAL"
+    assert vr.exit_code == 4
+
+
+def test_quality_warn_no_retry():
+    """RH-05: QUALITY_WARN → 立即返回 不重试"""
+    h = RetryHandler()
+    vr = h.run(lambda: _vr("QUALITY_WARN"))
+    assert vr.status == "QUALITY_WARN"
+    assert vr.heal_retry == 0
+    assert vr.simple_retry == 0
+
+
+def test_heal_fails_then_simple():
+    """RH-06: heal 尝试但仍然 BLOCKED → 回退 simple"""
+    calls = [0]
+    def fn():
+        calls[0] += 1
+        return _vr("BLOCKED", build_log="file not found: libcob.so")
+    h = RetryHandler(max_heal=2, max_simple=2)
+    vr = h.run(fn)
+    assert vr.status == "FATAL"
+    # 应已消耗所有 heal+simple
+    assert vr.heal_retry + vr.simple_retry >= 1
+
+
+def test_concurrent_count_separation():
+    """RH-07: heal 和 simple 计数互不影响"""
+    h = RetryHandler(max_heal=2, max_simple=2)
+    calls = [0, False]  # [count, callable flag]
+    def fn():
+        calls[0] += 1
+        if calls[0] == 1:
+            return _vr("BLOCKED", build_log="file not found: libcob.so")
+        return _vr("PASS")
+    h._try_set_env = lambda k, v: None  # no-op fix
+    # Mock fix to succeed on first heal
+    original_fix = HEALING_FIXES["compile_error"]["fix"]
+    HEALING_FIXES["compile_error"]["fix"] = lambda: None
+    try:
+        vr = h.run(fn)
+        assert vr.heal_retry >= 0
+        assert vr.simple_retry >= 0
+        # heal 和 simple 的计数不会混淆
+    finally:
+        HEALING_FIXES["compile_error"]["fix"] = original_fix
+
+
+def test_history_records():
+    """所有 VR 被记录到 history"""
+    h = RetryHandler(max_heal=0, max_simple=2)
+    results = []
+    def fn():
+        vr = _vr("BLOCKED") if len(results) < 2 else _vr("PASS")
+        results.append(vr)
+        return vr
+    h.run(fn)
+    assert len(h.history) >= 2
@@ -0,0 +1,468 @@
+"""Tests for HINA rule engine: confusion groups, contradiction, backtrack."""
+
+from __future__ import annotations
+
+import sys
+import os
+import json
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+
+from hina.rule_engine.confusion_groups import (
+    resolve_matching_vs_keybreak,
+    resolve_dedup_vs_nodedup,
+    resolve_validation_vs_keybreak,
+    resolve_csv_merge_vs_split,
+    resolve_simple_vs_two_stage,
+    resolve_pure_vs_mixed,
+    resolve_division_50_25_100,
+    resolve_mn_output_mode,
+    resolve_confusion_pair,
+)
+from hina.rule_engine.contradiction import (
+    CONTRADICTION_PAIRS,
+    detect_contradictions,
+    resolve_contradiction,
+)
+from hina.rule_engine.backtrack import BacktrackResolver
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 1. confusion_groups — matching_vs_keybreak
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_matching_vs_keybreak_matching():
+    """3路 IF + SELECT>=2 → マッチング"""
+    features = {
+        "if_types": {"total": 5, "comparison": 3, "equality": 1, "compound": 1, "nested_depth": 2},
+        "select_files": {"file1": {"organization": "SEQUENTIAL"}, "file2": {"organization": "SEQUENTIAL"}},
+        "variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
+    }
+    result = resolve_matching_vs_keybreak(features)
+    assert result["resolved_type"] == "マッチング"
+    assert result["confidence"] >= 0.75
+    assert len(result["evidence"]) > 0
+
+
+def test_matching_vs_keybreak_keybreak():
+    """2路 IF + WS-PREV-KEY + 累加器 → キーブレイク"""
+    features = {
+        "if_types": {"total": 2, "comparison": 0, "equality": 2, "compound": 0, "nested_depth": 1},
+        "select_files": {"file1": {"organization": "SEQUENTIAL"}},
+        "variable_patterns": {"has_prev_key": True, "has_accumulator": True, "has_error_field": False},
+    }
+    result = resolve_matching_vs_keybreak(features)
+    assert result["resolved_type"] == "キーブレイク"
+    assert result["confidence"] >= 0.70
+    assert len(result["evidence"]) > 0
+
+
+def test_matching_vs_keybreak_unknown():
+    """特征不足 → unknown"""
+    features = {
+        "if_types": {"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
+        "select_files": {},
+        "variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
+    }
+    result = resolve_matching_vs_keybreak(features)
+    assert result["resolved_type"] == "unknown"
+    assert result["confidence"] == 0.0
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 2. confusion_groups — dedup_vs_nodedup
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_dedup_vs_nodedup_dedup():
+    """WS-PREV-KEY 存在 → 含重复"""
+    features = {"variable_patterns": {"has_prev_key": True, "has_accumulator": False, "has_error_field": False}}
+    result = resolve_dedup_vs_nodedup(features)
+    assert result["resolved_type"] == "項目チェック(重複含む)"
+    assert result["confidence"] >= 0.85
+
+
+def test_dedup_vs_nodedup_nodedup():
+    """WS-PREV-KEY 不存在 → 不含重复"""
+    features = {"variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False}}
+    result = resolve_dedup_vs_nodedup(features)
+    assert result["resolved_type"] == "項目チェック(重複含まず)"
+    assert result["confidence"] >= 0.70
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 3. confusion_groups — validation_vs_keybreak
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_validation_vs_keybreak_validation():
+    """WS-ERR* 错误字段存在 → 校验"""
+    features = {"variable_patterns": {"has_error_flag": True, "has_counter": False, "has_prev_key": False}}
+    result = resolve_validation_vs_keybreak(features)
+    assert result["resolved_type"] == "編集処理(校验)"
+    assert result["confidence"] >= 0.70
+
+
+def test_validation_vs_keybreak_keybreak():
+    """WS-*CNT 计数器存在 → キーブレイク"""
+    features = {"variable_patterns": {"has_error_field": False, "has_counter": True, "has_prev_key": False}}
+    result = resolve_validation_vs_keybreak(features)
+    assert result["resolved_type"] == "キーブレイク"
+    assert result["confidence"] >= 0.75
+
+
+def test_validation_vs_keybreak_unknown():
+    """既无错误字段也无计数器 → unknown"""
+    features = {"variable_patterns": {"has_error_field": False, "has_counter": False, "has_prev_key": False}}
+    result = resolve_validation_vs_keybreak(features)
+    assert result["resolved_type"] == "unknown"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 4. confusion_groups — csv_merge_vs_split
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_csv_merge_vs_split_merge():
+    """STRING 存在 → CSV合并"""
+    features = {"has_string": True, "has_inspect": False}
+    result = resolve_csv_merge_vs_split(features)
+    assert result["resolved_type"] == "CSV合并"
+    assert result["confidence"] >= 0.70
+
+
+def test_csv_merge_vs_split_split():
+    """INSPECT REPLACING 存在 → CSV拆分"""
+    features = {"has_string": False, "has_inspect": True}
+    result = resolve_csv_merge_vs_split(features)
+    assert result["resolved_type"] == "CSV拆分"
+    assert result["confidence"] >= 0.70
+
+
+def test_csv_merge_vs_split_both():
+    """两个都存在 → STRING 优先 (CSV合并)"""
+    features = {"has_string": True, "has_inspect": True}
+    result = resolve_csv_merge_vs_split(features)
+    assert result["resolved_type"] == "CSV合并"
+
+
+def test_csv_merge_vs_split_unknown():
+    """两者都不存在 → unknown"""
+    features = {"has_string": False, "has_inspect": False}
+    result = resolve_csv_merge_vs_split(features)
+    assert result["resolved_type"] == "unknown"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 5. confusion_groups — simple_vs_two_stage
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_simple_vs_two_stage_two_stage():
+    """OPEN→CLOSE→再OPEN → 二级匹配"""
+    features = {"open_pattern": "open-close-open"}
+    result = resolve_simple_vs_two_stage(features)
+    assert result["resolved_type"] == "二段階マッチング"
+    assert result["confidence"] >= 0.85
+
+
+def test_simple_vs_two_stage_simple():
+    """顺序 OPEN → 简单匹配"""
+    features = {"open_pattern": "sequential"}
+    result = resolve_simple_vs_two_stage(features)
+    assert result["resolved_type"] == "単純マッチング"
+    assert result["confidence"] >= 0.75
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 6. confusion_groups — pure_vs_mixed
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_pure_vs_mixed_mixed():
+    """has_switch + has_counter + IF≥3 → 混合匹配"""
+    features = {"variable_patterns": {"has_switch": True, "has_counter": True}, "if_types": {"total": 3}}
+    result = resolve_pure_vs_mixed(features)
+    assert result["resolved_type"] == "混合マッチング"
+    assert result["confidence"] >= 0.70
+
+
+def test_pure_vs_mixed_pure():
+    """无混合特征 → unknown（无法静态确定）"""
+    features = {"variable_patterns": {"has_switch": False, "has_counter": False}, "if_types": {"total": 1}}
+    result = resolve_pure_vs_mixed(features)
+    assert result["resolved_type"] == "unknown"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 7. confusion_groups — division_50_25_100
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_division_50():
+    """DIVIDE 被除数 = 50"""
+    features = {"divide_constants": [50]}
+    result = resolve_division_50_25_100(features)
+    assert result["resolved_type"] == "DIVIDE_50"
+    assert result["confidence"] >= 0.90
+
+
+def test_division_100():
+    """DIVIDE 被除数 = 100"""
+    features = {"divide_constants": [100]}
+    result = resolve_division_50_25_100(features)
+    assert result["resolved_type"] == "DIVIDE_100"
+    assert result["confidence"] >= 0.90
+
+
+def test_division_unknown():
+    """无匹配常量 → unknown"""
+    features = {"divide_constants": [10, 20]}
+    result = resolve_division_50_25_100(features)
+    assert result["resolved_type"] == "unknown"
+    assert result["confidence"] == 0.0
+
+
+def test_division_empty():
+    """空列表 → unknown"""
+    features = {"divide_constants": []}
+    result = resolve_division_50_25_100(features)
+    assert result["resolved_type"] == "unknown"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 8. confusion_groups — mn_output_mode
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_mn_output_mode_known():
+    """SELECT≥2 + 分支≥3 → M:N"""
+    features = {"select_files": {"a": {}, "b": {}, "c": {}}, "total_branches": 3}
+    result = resolve_mn_output_mode(features)
+    assert result["resolved_type"] == "M:N"
+    assert result["confidence"] >= 0.60
+
+
+def test_mn_output_mode_unknown():
+    """无提示且文件 < 3 → unknown (需数据验证)"""
+    features = {"has_mn_output_hint": False, "select_files": {"a": {}, "b": {}}}
+    result = resolve_mn_output_mode(features)
+    assert result["resolved_type"] == "unknown"
+    assert result["confidence"] == 0.0
+
+
+def test_mn_output_mode_many_files():
+    """文件数 >=3 无提示 → M:N"""
+    features = {"has_mn_output_hint": False, "select_files": {"a": {}, "b": {}, "c": {}}}
+    result = resolve_mn_output_mode(features)
+    assert result["resolved_type"] == "M:N"
+    assert result["confidence"] >= 0.55
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 9. resolve_confusion_pair — dispatcher
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_resolve_confusion_pair_dispatch():
+    """resolve_confusion_pair 正确调度到具体函数"""
+    features = {
+        "variable_patterns": {"has_prev_key": True, "has_accumulator": False, "has_error_field": False},
+    }
+    result = resolve_confusion_pair(features, "dedup_vs_nodedup")
+    assert result["resolved_type"] == "項目チェック(重複含む)"
+
+    result = resolve_confusion_pair(features, "nonexistent_pair")
+    assert result["resolved_type"] == "unknown"
+    assert "未知混淆对名称" in result["evidence"][0]
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 10. contradiction — detect_contradictions
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_detect_contradictions_empty():
+    """无 resolved_types → 空矛盾列表"""
+    features = {"resolved_types": {}}
+    assert detect_contradictions(features) == []
+
+
+def test_detect_contradictions_no_contradiction():
+    """只有一个类型 → 无矛盾"""
+    features = {
+        "resolved_types": {
+            "pair_1": "マッチング",
+        }
+    }
+    assert detect_contradictions(features) == []
+
+
+def test_detect_contradictions_found():
+    """マッチング 和 キーブレイク 同时存在 → 检测到矛盾"""
+    features = {
+        "resolved_types": {
+            "pair_1": "マッチング",
+            "pair_2": "キーブレイク",
+        }
+    }
+    contradictions = detect_contradictions(features)
+    assert len(contradictions) >= 1
+    match = [c for c in contradictions if c["type_a"] == "マッチング" and c["type_b"] == "キーブレイク"]
+    assert len(match) >= 1
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 11. contradiction — resolve_contradiction
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_resolve_contradiction_priority():
+    """マッチング(prio=10) 胜出 over キーブレイク(prio=9)"""
+    contradiction = {"name": "matching_vs_keybreak", "type_a": "マッチング", "type_b": "キーブレイク"}
+    result = resolve_contradiction({}, contradiction)
+    assert result == "マッチング"
+
+
+def test_resolve_contradiction_csv():
+    """CSV合并(prio=6) == CSV拆分(prio=6) → 使用重判定"""
+    contradiction = {"name": "csv_merge_vs_split", "type_a": "CSV合并", "type_b": "CSV拆分"}
+    features = {"has_string": True, "has_inspect": False}
+    result = resolve_contradiction(features, contradiction)
+    assert result == "CSV合并"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 12. contradiction — CONTRACTION_PAIRS 常量
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_contradiction_pairs_defined():
+    """CONTRADICTION_PAIRS 包含所有 8 个混淆对"""
+    assert len(CONTRADICTION_PAIRS) == 8
+    names = {p["name"] for p in CONTRADICTION_PAIRS}
+    expected = {
+        "matching_vs_keybreak", "dedup_vs_nodedup", "validation_vs_keybreak",
+        "csv_merge_vs_split", "simple_vs_two_stage", "pure_vs_mixed",
+        "division_50_25_100", "mn_output_mode",
+    }
+    assert names == expected
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 13. backtrack — BacktrackResolver
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_backtrack_no_contradiction():
+    """无矛盾 → 一轮解决，backtrack_resolved=True"""
+
+    def extractor(src: str) -> dict:
+        return {"resolved_types": {"pair_1": "マッチング"}, "if_types": {}}
+
+    resolver = BacktrackResolver(extractor)
+    result = resolver.resolve("some source", {"resolved_types": {"pair_1": "マッチング"}})
+    assert result["backtrack_resolved"] is True
+    assert result["backtrack_rounds"] == 0
+
+
+def test_backtrack_with_contradiction():
+    """有矛盾 → 解决，标记 round"""
+
+    def extractor(src: str) -> dict:
+        return {"resolved_types": {"pair_1": "マッチング"}, "if_types": {}}
+
+    features = {
+        "resolved_types": {
+            "pair_1": "マッチング",
+            "pair_2": "キーブレイク",
+        }
+    }
+    resolver = BacktrackResolver(extractor)
+    result = resolver.resolve("some source", features)
+
+    # 核心断言: 矛盾被解决 (resolved_* keys 出现)
+    resolved_keys = [k for k in result if k.startswith("resolved_")]
+    assert len(resolved_keys) >= 1
+    assert result["backtrack_rounds"] >= 1
+
+
+def test_backtrack_max_rounds_degraded():
+    """持续矛盾 → 耗尽 max_rounds 后 degraded"""
+
+    round_count = 0
+
+    def extractor(src: str) -> dict:
+        nonlocal round_count
+        round_count += 1
+        # 每次都返回包含矛盾的特征
+        return {
+            "resolved_types": {
+                "pair_1": "マッチング",
+                "pair_2": "キーブレイク",
+            }
+        }
+
+    features = {
+        "resolved_types": {
+            "pair_1": "マッチング",
+            "pair_2": "キーブレイク",
+        }
+    }
+    resolver = BacktrackResolver(extractor)
+    resolver.max_rounds = 2
+    result = resolver.resolve("some source", features)
+
+    assert result["backtrack_degraded"] is True
+    # 应已进行多轮尝试
+    assert result["backtrack_rounds"] >= 1
+
+
+def test_backtrack_extract_error():
+    """提取器抛异常 → 标记 extract_error"""
+
+    def extractor(src: str) -> dict:
+        raise ValueError("extraction failed")
+
+    features = {
+        "resolved_types": {
+            "pair_1": "マッチング",
+            "pair_2": "キーブレイク",
+        }
+    }
+    resolver = BacktrackResolver(extractor)
+    result = resolver.resolve("some source", features)
+
+    assert result.get("backtrack_extract_error") is True
+
+
+def test_backtrack_no_contradiction():
+    """无矛盾 → 不超时，直接返回"""
+
+    def fast_extractor(src: str) -> dict:
+        return {"resolved_types": {}}
+
+    resolver = BacktrackResolver(fast_extractor)
+    result = resolver.resolve("source", {"resolved_types": {}})
+
+    assert isinstance(result, dict)
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 14. Integration — full round-trip via resolve_confusion_pair
+# ═══════════════════════════════════════════════════════════════════════════
+
+def test_integration_matching_roundtrip():
+    """完整流程: 通过 resolve_confusion_pair → resolve_matching_vs_keybreak"""
+    features = {
+        "if_types": {"total": 5, "comparison": 3, "equality": 1, "compound": 1, "nested_depth": 2},
+        "select_files": {"f1": {}, "f2": {}},
+        "variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
+    }
+    result = resolve_confusion_pair(features, "matching_vs_keybreak")
+    assert result["resolved_type"] in ("マッチング", "キーブレイク", "unknown")
+    assert "confidence" in result
+    assert "evidence" in result
+
+
+def test_integration_contradiction_resolve_cycle():
+    """矛盾检测 → 解决完整闭环"""
+    features = {
+        "resolved_types": {
+            "from_keyword": "マッチング",
+            "from_llm": "キーブレイク",
+        }
+    }
+    contradictions = detect_contradictions(features)
+    assert len(contradictions) >= 1
+
+    winner = resolve_contradiction(features, contradictions[0])
+    assert winner in ("マッチング", "キーブレイク")