feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+314
View File
@@ -0,0 +1,314 @@
"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。
覆盖路径:
- 路径 A: keyword confidence >= 90% -> 直接输出
- 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯
- 路径 C: keyword < 50% -> LLM 辅助
- 无矛盾场景
- orchestrator 集成契约
- 空源码边界
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from hina import classify_program
from hina.pipeline.pipeline import _get_best_keyword_match
# ── _get_best_keyword_match 单元测试 ────────────────────────────────────────────
class TestGetBestKeywordMatch:
def test_empty_matches(self) -> None:
assert _get_best_keyword_match([]) is None
def test_single_match(self) -> None:
result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")])
assert result is not None
assert result["category"] == "DB操作"
assert result["confidence"] == 0.95
assert result["keyword"] == "EXEC SQL"
def test_multiple_matches_picks_highest(self) -> None:
matches = [
("子程序调用", 0.90, "CALL"),
("DB操作", 0.95, "EXEC SQL"),
("SORT", 0.95, "SORT ON KEY"),
]
result = _get_best_keyword_match(matches)
assert result is not None
assert result["confidence"] == 0.95
# 置信度相同时取第一个最高值
assert "all_matches" in result
assert len(result["all_matches"]) == 3
# ── classify_program 管道测试 (模拟依赖) ──────────────────────────────────────
def _make_mock_structure(**overrides) -> dict:
"""生成用于 mock 的标准 structure dict。"""
base = {
"total_paragraphs": 5,
"file_count": 2,
"decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}],
"if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0},
"branch_tree_obj": MagicMock(),
"has_call": False,
"has_divide": False,
"has_string": False,
"has_inspect": False,
"open_pattern": "sequential",
"select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]},
"variable_patterns": {
"has_prev_key": False,
"has_accumulator": False,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": False,
"has_work": False,
},
"divide_constants": [],
"open_directions": {},
}
base.update(overrides)
return base
class TestClassifyProgramPipeline:
# ── 路径 A: keyword >= 90% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_keyword_high_confidence(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 A: keyword confidence >= 90%, 直接输出关键词结果。"""
mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
assert result["category"] == "DB操作"
assert result["confidence"] >= 0.0
assert result["method"] == "keyword"
assert result["source"] == "l1"
assert result["judgment"] in ("auto", "review")
assert len(result["matches"]) == 1
assert result["matches"][0][0] == "DB操作"
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_keyword_high_confidence_sysin(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。"""
mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
assert result["category"] == "SYSIN"
assert result["confidence"] >= 0.0
assert result["method"] == "keyword"
# ── 路径 B: keyword 50-89% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_rule_engine(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。"""
mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
mock_extract.return_value = _make_mock_structure(
variable_patterns={
"has_prev_key": True,
"has_accumulator": True,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": False,
"has_work": False,
},
file_count=2,
select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]},
)
result = classify_program("SOME COBOL SOURCE")
assert result["method"] in ("rule_engine", "rule_engine_fallback")
# 确信度应由 v2 计算给出合理的值
assert result["confidence"] >= 0.0
assert "category" in result
assert "resolved_types" in result
assert "contradictions" in result
assert "v2_confidence" in result
assert result["v2_confidence"]["base"] >= 0.0
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_rule_engine_with_contradiction(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B 变体: 规则引擎检测到矛盾并解决。"""
mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
# 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾
mock_extract.return_value = _make_mock_structure(
file_count=3,
select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]},
if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2},
variable_patterns={
"has_prev_key": True,
"has_accumulator": True,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": True,
"has_work": False,
},
)
result = classify_program("SOME COBOL SOURCE")
assert "contradiction_resolution" in result
assert result["contradiction_resolution"]["total_count"] >= 0
# 即使有矛盾, 结果应该是完整的
assert "category" in result
assert result["confidence"] >= 0.0
# ── 路径 C: keyword < 50% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_llm_fallback(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 C: keyword < 50%, LLM 辅助分类。"""
mock_detect.return_value = [] # 无关键字匹配 -> confidence = 0
mock_extract.return_value = _make_mock_structure()
mock_llm = MagicMock()
mock_llm.call.return_value = (
'{"category": "simple_sequential", "subtype": "no_branch", '
'"confidence": 0.88, "features": {}, "required_tests": 1, '
'"strategy_params": {}}'
)
result = classify_program("SOME COBOL SOURCE", llm=mock_llm)
assert result["method"] == "llm"
assert "category" in result
# LLM 路径应调用 LLM
assert mock_llm.call.called
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_llm_unavailable_fallback_to_rule_engine(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 C 兜底: LLM 不可用时退化为规则引擎。"""
mock_detect.return_value = []
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE", llm=None)
# 没有 LLM, 使用规则引擎兜底
assert result["method"] == "rule_engine_fallback"
assert "category" in result
assert result["confidence"] >= 0.0
# ── 无矛盾场景 ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_no_contradiction(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B 变体: 规则引擎处理后无矛盾。"""
mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
mock_extract.return_value = _make_mock_structure(
# 简单的结构, 不会触发复杂混淆组
file_count=1,
select_files={"F1": ["R1"]},
if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
variable_patterns={
"has_prev_key": False, "has_accumulator": False,
"has_error_flag": False, "has_switch": False,
"has_index": False, "has_save_area": False,
"has_counter": False, "has_work": False,
},
)
result = classify_program("SOME COBOL SOURCE")
assert "contradictions" in result
assert len(result["contradictions"]) == 0
# ── orchestrator 集成契约 ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_with_orchestrator_integration(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""验证 classify_program 输出满足 orchestrator 的集成契约。"""
mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
# 模拟 orchestrator 的用法:
vr_type = result["category"]
vr_confidence = result["confidence"]
vr_debug_classification = result
vr_quality_warn = None
if result["needs_review"]:
vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})"
# 断言 orchestrator 需要的字段
assert isinstance(vr_type, str)
assert isinstance(vr_confidence, float)
assert isinstance(vr_debug_classification, dict)
assert 0.0 <= vr_confidence <= 1.0
assert isinstance(result["needs_review"], bool)
# 高确信度不需要 review
# needs_review depends on v2 confidence
assert vr_quality_warn is None or "过低" in str(vr_quality_warn)
# ── 空源码边界 ──
def test_pipeline_empty_source(self) -> None:
"""空 COBOL 源码返回 unknown 且 needs_review=True。"""
result = classify_program("")
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["needs_review"] is True
assert result["method"] == "none"
assert result["source"] == "error"
assert result["judgment"] == "impossible"
def test_pipeline_whitespace_source(self) -> None:
"""纯空白源码也返回 unknown。"""
result = classify_program(" \n \t ")
assert result["category"] == "unknown"
assert result["needs_review"] is True
# ── import 验证 ──
def test_import_from_hina(self) -> None:
"""验证 classify_program 是 hina 包唯一导出的函数。"""
from hina import __all__ as hina_all
assert "classify_program" in hina_all
assert len(hina_all) == 1 # 唯一外部入口