feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,148 @@
+"""HA-01~10: HINA Agent — LLM 分类 + 回退 + 解析"""
+
+import sys, os, json
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from hina.hina_agent import (
+    classify_with_llm, _parse_llm_response, _validate_result, _fallback_classification,
+)
+
+
+class _MockLLMPass:
+    """模拟 LLM 返回正常 JSON"""
+    def call(self, msgs, retries=1):
+        return json.dumps({
+            "category": "condition_heavy",
+            "subtype": "nested_if",
+            "confidence": 0.85,
+            "features": {},
+            "required_tests": 10,
+            "strategy_params": {"max_nesting_depth": 3, "coverage_target": "branch", "file_isolation": False, "supplement_strategy": "incremental"},
+        })
+
+
+class _MockLLMEmpty:
+    def call(self, msgs, retries=1):
+        return ""
+
+
+class _MockLLMBadJSON:
+    def call(self, msgs, retries=1):
+        return "not valid json at all"
+
+
+class _MockLLMTimeout:
+    def call(self, msgs, retries=1):
+        raise Exception("httpx.TimeoutException")
+
+
+# ── HA-01: normal classify_with_llm ──
+
+def test_classify_with_llm_normal():
+    """HA-01: 有效结构体 → 返回 dict 含 category"""
+    structure = {
+        "paragraph_count": 5, "decision_count": 3, "if_count": 2,
+        "evaluate_count": 0, "file_count": 1, "open_directions": ["INPUT"],
+        "has_search_all": False, "has_call": False, "has_break": False,
+        "total_branches": 4,
+    }
+    result = classify_with_llm(structure, _MockLLMPass())
+    assert isinstance(result, dict)
+    assert "category" in result
+    assert result["category"] == "condition_heavy"
+
+
+# ── HA-02~04: LLM error handling ──
+
+def test_classify_with_llm_bad_json():
+    """HA-03: LLM 返回非法 JSON → fallback"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMBadJSON())
+    assert isinstance(result, dict)
+    assert "category" in result or "confidence" in result
+
+
+def test_classify_with_llm_empty():
+    """HA-03(同): LLM 返回空字符串 → fallback"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMEmpty())
+    assert isinstance(result, dict)
+
+
+def test_classify_with_llm_timeout():
+    """HA-04: LLM 超时 → fallback + 不崩溃"""
+    structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
+    result = classify_with_llm(structure, _MockLLMTimeout())
+    assert isinstance(result, dict)
+
+
+# ── HA-05~07: _parse_llm_response ──
+
+def test_parse_llm_json():
+    """HA-05: 合法 JSON → 解析成功"""
+    r = _parse_llm_response('{"category": "DB操作", "confidence": 0.95}')
+    assert r["category"] == "DB操作"
+    assert r["confidence"] == 0.95
+
+
+def test_parse_llm_invalid_json():
+    """HA-06: 非法 JSON → try/except 不崩溃"""
+    r = _parse_llm_response("暂无")
+    assert r is None or isinstance(r, dict)
+
+
+def test_parse_llm_markdown_wrapped():
+    """HA-07: 含 ```json markdown 包裹"""
+    raw = '```json\n{"category": "SORT", "confidence": 0.9}\n```'
+    r = _parse_llm_response(raw)
+    assert r is not None
+    assert r.get("category") == "SORT"
+
+
+def test_parse_llm_empty_string():
+    """空字符串 → 验证后默认 dict"""
+    r = _parse_llm_response("")
+    assert r["category"] == "unknown"
+    assert r["confidence"] == 0.0
+
+
+# ── HA-08~10: _fallback_classification ──
+
+def test_fallback_no_decision():
+    """HA-08: total_decisions=0 → simple_sequential"""
+    structure = {"decision_points": [], "file_count": 0}
+    r = _fallback_classification(structure)
+    assert r["category"] == "simple_sequential"
+
+
+def test_fallback_call():
+    """HA-09: has_call → call_based"""
+    structure = {
+        "decision_points": [{"kind": "IF"}],
+        "file_count": 0, "has_call": True, "has_search_all": False, "has_break": False,
+    }
+    r = _fallback_classification(structure)
+    assert r["category"] == "call_based"
+
+
+def test_fallback_search():
+    """HA-10: has_search_all → search_intensive"""
+    structure = {
+        "decision_points": [{"kind": "IF"}],
+        "file_count": 0, "has_call": False, "has_search_all": True, "has_break": False,
+    }
+    r = _fallback_classification(structure)
+    assert r["category"] == "search_intensive"
+
+
+# ── _validate_result ──
+
+def test_validate_valid():
+    """合法结果通过验证"""
+    r = _validate_result({"category": "condition_heavy", "confidence": 0.8, "features": {}})
+    assert isinstance(r, dict)
+
+
+def test_validate_missing_category():
+    """缺失 category → 默认 unknown"""
+    r = _validate_result({"confidence": 0.8})
+    assert r["category"] == "unknown"