feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,151 @@
+"""AG-01~12: Agents 模块"""
+
+import sys, os, json, tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from agents.llm import LLMClient
+from agents.agent1_parser import Agent1Parser
+from agents.agent2_data import Agent2Data
+from agents.agent3_diagnostic import Agent3Diagnostic
+from data.diff_result import FieldResult
+
+
+def _llm_client(cache_dir=None):
+    if cache_dir is None:
+        cache_dir = tempfile.mkdtemp()
+    return LLMClient(model="test", cache_dir=cache_dir)
+
+
+def _mock_response(content="resp"):
+    m = MagicMock()
+    m.json.return_value = {"choices": [{"message": {"content": content}}]}
+    m.raise_for_status.return_value = None
+    return m
+
+
+# ── AG-01~05: LLMClient ──
+
+def test_llm_call_returns_string():
+    """AG-01: call 返回字符串"""
+    client = _llm_client()
+    with patch("httpx.post", return_value=_mock_response("hello")):
+        assert client.call([{"role": "user", "content": "hi"}]) == "hello"
+
+
+def test_llm_cache_hit():
+    """AG-02: 相同消息 → 缓存命中"""
+    with tempfile.TemporaryDirectory() as tmp:
+        client = _llm_client(tmp)
+        with patch("httpx.post", return_value=_mock_response("resp1")):
+            client.call([{"role": "user", "content": "ping"}])
+        with patch("httpx.post") as mock_post:
+            result = client.call([{"role": "user", "content": "ping"}])
+            assert result == "resp1"
+            mock_post.assert_not_called()
+
+
+def test_llm_timeout():
+    """AG-03: 超时 → 抛出异常"""
+    client = _llm_client()
+    with patch("httpx.post", side_effect=Exception("timeout")):
+        import pytest
+        with pytest.raises(Exception):
+            client.call([{"role": "user", "content": "hi"}], retries=0)
+
+
+def test_llm_retry_success():
+    """AG-04: 首次失败, 重试成功"""
+    with tempfile.TemporaryDirectory() as tmp:
+        client = _llm_client(tmp)
+        call_n = [0]
+        def _side(*a, **kw):
+            call_n[0] += 1
+            if call_n[0] == 1:
+                raise Exception("first fail")
+            return _mock_response("ok")
+        with patch("httpx.post", side_effect=_side):
+            result = client.call([{"role": "user", "content": "retry"}], retries=1)
+            assert result == "ok"
+
+
+def test_llm_retry_exhausted():
+    """AG-05: 重试用完 → 抛出"""
+    client = _llm_client()
+    with patch("httpx.post", side_effect=Exception("fail")):
+        import pytest
+        with pytest.raises(Exception):
+            client.call([{"role": "user", "content": "x"}], retries=0)
+
+
+# ── AG-06~08: Agent1Parser ──
+
+def test_agent1_parse_valid():
+    """AG-06: 合法 COPYBOOK 字段"""
+    llm = MagicMock()
+    llm.call.return_value = json.dumps({
+        "fields": [
+            {"name": "WS-A", "level": 5, "pic": "9(4)", "length": 4, "offset": 0},
+        ]
+    })
+    tree = Agent1Parser(llm).parse("text")
+    assert "WS-A" in tree.flatten()
+
+
+def test_agent1_parse_bad_json():
+    """AG-07: 非法 JSON → parse_error"""
+    llm = MagicMock()
+    llm.call.return_value = "not json"
+    tree = Agent1Parser(llm).parse("x")
+    assert tree.copybook_name == "parse_error"
+
+
+def test_agent1_parse_empty():
+    """AG-08: JSON 缺 fields"""
+    llm = MagicMock()
+    llm.call.return_value = json.dumps({})
+    tree = Agent1Parser(llm).parse("x")
+    assert len(tree.fields) >= 0
+
+
+# ── AG-09~11: Agent2Data ──
+
+def test_agent2_design_normal():
+    """AG-09: 正常 → TestSuite"""
+    llm = MagicMock()
+    llm.call.return_value = json.dumps({"test_cases": [{"id": "TC-1", "fields": {"A": 1}}]})
+    from data.field_tree import FieldTree, Field
+    suite = Agent2Data(llm).design(FieldTree(fields=[Field(name="A", level=5, pic="9(4)")]))
+    assert suite is not None
+
+
+def test_agent2_design_fallback():
+    """AG-10: LLM 返回非法 JSON → try/except 进入 fallback"""
+    llm = MagicMock()
+    llm.call.return_value = "not-json"
+    from data.field_tree import FieldTree
+    suite = Agent2Data(llm).design(FieldTree(fields=[]))
+    # json.loads 抛出 JSONDecodeError, 被 except 捕获, 返回 TC-FALLBACK
+    assert len(suite.test_cases) >= 1
+    assert suite.test_cases[0].id == "TC-FALLBACK"
+
+
+def test_agent2_design_spark():
+    """AG-11: spark_mode → SparkConfig"""
+    llm = MagicMock()
+    llm.call.return_value = json.dumps({"test_cases": []})
+    from data.field_tree import FieldTree
+    suite = Agent2Data(llm).design(FieldTree(fields=[]), spark_mode=True)
+    assert suite.has_spark is True
+
+
+# ── AG-12: Agent3Diagnostic ──
+
+def test_agent3_analyze():
+    """AG-12: MISMATCH → 诊断"""
+    llm = MagicMock()
+    llm.call.return_value = "rounding error"
+    fr = FieldResult(field_name="BR-AMT", status="MISMATCH",
+                     cobol_value="1500000", java_value="1499999.99")
+    r = Agent3Diagnostic(llm).analyze(fr)
+    assert isinstance(r, str) and len(r) > 0