Files
cobol-java-v3/tests/hina/test_agent.py
hangshuo652 bc1d56d1a4 feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00

149 lines
4.6 KiB
Python

"""HA-01~10: HINA Agent — LLM 分类 + 回退 + 解析"""
import sys, os, json
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.hina_agent import (
classify_with_llm, _parse_llm_response, _validate_result, _fallback_classification,
)
class _MockLLMPass:
"""模拟 LLM 返回正常 JSON"""
def call(self, msgs, retries=1):
return json.dumps({
"category": "condition_heavy",
"subtype": "nested_if",
"confidence": 0.85,
"features": {},
"required_tests": 10,
"strategy_params": {"max_nesting_depth": 3, "coverage_target": "branch", "file_isolation": False, "supplement_strategy": "incremental"},
})
class _MockLLMEmpty:
def call(self, msgs, retries=1):
return ""
class _MockLLMBadJSON:
def call(self, msgs, retries=1):
return "not valid json at all"
class _MockLLMTimeout:
def call(self, msgs, retries=1):
raise Exception("httpx.TimeoutException")
# ── HA-01: normal classify_with_llm ──
def test_classify_with_llm_normal():
"""HA-01: 有效结构体 → 返回 dict 含 category"""
structure = {
"paragraph_count": 5, "decision_count": 3, "if_count": 2,
"evaluate_count": 0, "file_count": 1, "open_directions": ["INPUT"],
"has_search_all": False, "has_call": False, "has_break": False,
"total_branches": 4,
}
result = classify_with_llm(structure, _MockLLMPass())
assert isinstance(result, dict)
assert "category" in result
assert result["category"] == "condition_heavy"
# ── HA-02~04: LLM error handling ──
def test_classify_with_llm_bad_json():
"""HA-03: LLM 返回非法 JSON → fallback"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMBadJSON())
assert isinstance(result, dict)
assert "category" in result or "confidence" in result
def test_classify_with_llm_empty():
"""HA-03(同): LLM 返回空字符串 → fallback"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMEmpty())
assert isinstance(result, dict)
def test_classify_with_llm_timeout():
"""HA-04: LLM 超时 → fallback + 不崩溃"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMTimeout())
assert isinstance(result, dict)
# ── HA-05~07: _parse_llm_response ──
def test_parse_llm_json():
"""HA-05: 合法 JSON → 解析成功"""
r = _parse_llm_response('{"category": "DB操作", "confidence": 0.95}')
assert r["category"] == "DB操作"
assert r["confidence"] == 0.95
def test_parse_llm_invalid_json():
"""HA-06: 非法 JSON → try/except 不崩溃"""
r = _parse_llm_response("暂无")
assert r is None or isinstance(r, dict)
def test_parse_llm_markdown_wrapped():
"""HA-07: 含 ```json markdown 包裹"""
raw = '```json\n{"category": "SORT", "confidence": 0.9}\n```'
r = _parse_llm_response(raw)
assert r is not None
assert r.get("category") == "SORT"
def test_parse_llm_empty_string():
"""空字符串 → 验证后默认 dict"""
r = _parse_llm_response("")
assert r["category"] == "unknown"
assert r["confidence"] == 0.0
# ── HA-08~10: _fallback_classification ──
def test_fallback_no_decision():
"""HA-08: total_decisions=0 → simple_sequential"""
structure = {"decision_points": [], "file_count": 0}
r = _fallback_classification(structure)
assert r["category"] == "simple_sequential"
def test_fallback_call():
"""HA-09: has_call → call_based"""
structure = {
"decision_points": [{"kind": "IF"}],
"file_count": 0, "has_call": True, "has_search_all": False, "has_break": False,
}
r = _fallback_classification(structure)
assert r["category"] == "call_based"
def test_fallback_search():
"""HA-10: has_search_all → search_intensive"""
structure = {
"decision_points": [{"kind": "IF"}],
"file_count": 0, "has_call": False, "has_search_all": True, "has_break": False,
}
r = _fallback_classification(structure)
assert r["category"] == "search_intensive"
# ── _validate_result ──
def test_validate_valid():
"""合法结果通过验证"""
r = _validate_result({"category": "condition_heavy", "confidence": 0.8, "features": {}})
assert isinstance(r, dict)
def test_validate_missing_category():
"""缺失 category → 默认 unknown"""
r = _validate_result({"confidence": 0.8})
assert r["category"] == "unknown"