feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,148 @@
|
||||
"""HA-01~10: HINA Agent — LLM 分类 + 回退 + 解析"""
|
||||
|
||||
import sys, os, json
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
from hina.hina_agent import (
|
||||
classify_with_llm, _parse_llm_response, _validate_result, _fallback_classification,
|
||||
)
|
||||
|
||||
|
||||
class _MockLLMPass:
|
||||
"""模拟 LLM 返回正常 JSON"""
|
||||
def call(self, msgs, retries=1):
|
||||
return json.dumps({
|
||||
"category": "condition_heavy",
|
||||
"subtype": "nested_if",
|
||||
"confidence": 0.85,
|
||||
"features": {},
|
||||
"required_tests": 10,
|
||||
"strategy_params": {"max_nesting_depth": 3, "coverage_target": "branch", "file_isolation": False, "supplement_strategy": "incremental"},
|
||||
})
|
||||
|
||||
|
||||
class _MockLLMEmpty:
|
||||
def call(self, msgs, retries=1):
|
||||
return ""
|
||||
|
||||
|
||||
class _MockLLMBadJSON:
|
||||
def call(self, msgs, retries=1):
|
||||
return "not valid json at all"
|
||||
|
||||
|
||||
class _MockLLMTimeout:
|
||||
def call(self, msgs, retries=1):
|
||||
raise Exception("httpx.TimeoutException")
|
||||
|
||||
|
||||
# ── HA-01: normal classify_with_llm ──
|
||||
|
||||
def test_classify_with_llm_normal():
|
||||
"""HA-01: 有效结构体 → 返回 dict 含 category"""
|
||||
structure = {
|
||||
"paragraph_count": 5, "decision_count": 3, "if_count": 2,
|
||||
"evaluate_count": 0, "file_count": 1, "open_directions": ["INPUT"],
|
||||
"has_search_all": False, "has_call": False, "has_break": False,
|
||||
"total_branches": 4,
|
||||
}
|
||||
result = classify_with_llm(structure, _MockLLMPass())
|
||||
assert isinstance(result, dict)
|
||||
assert "category" in result
|
||||
assert result["category"] == "condition_heavy"
|
||||
|
||||
|
||||
# ── HA-02~04: LLM error handling ──
|
||||
|
||||
def test_classify_with_llm_bad_json():
|
||||
"""HA-03: LLM 返回非法 JSON → fallback"""
|
||||
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
|
||||
result = classify_with_llm(structure, _MockLLMBadJSON())
|
||||
assert isinstance(result, dict)
|
||||
assert "category" in result or "confidence" in result
|
||||
|
||||
|
||||
def test_classify_with_llm_empty():
|
||||
"""HA-03(同): LLM 返回空字符串 → fallback"""
|
||||
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
|
||||
result = classify_with_llm(structure, _MockLLMEmpty())
|
||||
assert isinstance(result, dict)
|
||||
|
||||
|
||||
def test_classify_with_llm_timeout():
|
||||
"""HA-04: LLM 超时 → fallback + 不崩溃"""
|
||||
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
|
||||
result = classify_with_llm(structure, _MockLLMTimeout())
|
||||
assert isinstance(result, dict)
|
||||
|
||||
|
||||
# ── HA-05~07: _parse_llm_response ──
|
||||
|
||||
def test_parse_llm_json():
|
||||
"""HA-05: 合法 JSON → 解析成功"""
|
||||
r = _parse_llm_response('{"category": "DB操作", "confidence": 0.95}')
|
||||
assert r["category"] == "DB操作"
|
||||
assert r["confidence"] == 0.95
|
||||
|
||||
|
||||
def test_parse_llm_invalid_json():
|
||||
"""HA-06: 非法 JSON → try/except 不崩溃"""
|
||||
r = _parse_llm_response("暂无")
|
||||
assert r is None or isinstance(r, dict)
|
||||
|
||||
|
||||
def test_parse_llm_markdown_wrapped():
|
||||
"""HA-07: 含 ```json markdown 包裹"""
|
||||
raw = '```json\n{"category": "SORT", "confidence": 0.9}\n```'
|
||||
r = _parse_llm_response(raw)
|
||||
assert r is not None
|
||||
assert r.get("category") == "SORT"
|
||||
|
||||
|
||||
def test_parse_llm_empty_string():
|
||||
"""空字符串 → 验证后默认 dict"""
|
||||
r = _parse_llm_response("")
|
||||
assert r["category"] == "unknown"
|
||||
assert r["confidence"] == 0.0
|
||||
|
||||
|
||||
# ── HA-08~10: _fallback_classification ──
|
||||
|
||||
def test_fallback_no_decision():
|
||||
"""HA-08: total_decisions=0 → simple_sequential"""
|
||||
structure = {"decision_points": [], "file_count": 0}
|
||||
r = _fallback_classification(structure)
|
||||
assert r["category"] == "simple_sequential"
|
||||
|
||||
|
||||
def test_fallback_call():
|
||||
"""HA-09: has_call → call_based"""
|
||||
structure = {
|
||||
"decision_points": [{"kind": "IF"}],
|
||||
"file_count": 0, "has_call": True, "has_search_all": False, "has_break": False,
|
||||
}
|
||||
r = _fallback_classification(structure)
|
||||
assert r["category"] == "call_based"
|
||||
|
||||
|
||||
def test_fallback_search():
|
||||
"""HA-10: has_search_all → search_intensive"""
|
||||
structure = {
|
||||
"decision_points": [{"kind": "IF"}],
|
||||
"file_count": 0, "has_call": False, "has_search_all": True, "has_break": False,
|
||||
}
|
||||
r = _fallback_classification(structure)
|
||||
assert r["category"] == "search_intensive"
|
||||
|
||||
|
||||
# ── _validate_result ──
|
||||
|
||||
def test_validate_valid():
|
||||
"""合法结果通过验证"""
|
||||
r = _validate_result({"category": "condition_heavy", "confidence": 0.8, "features": {}})
|
||||
assert isinstance(r, dict)
|
||||
|
||||
|
||||
def test_validate_missing_category():
|
||||
"""缺失 category → 默认 unknown"""
|
||||
r = _validate_result({"confidence": 0.8})
|
||||
assert r["category"] == "unknown"
|
||||
Reference in New Issue
Block a user