feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
hangshuo652
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
+148
View File
@@ -0,0 +1,148 @@
"""HA-01~10: HINA Agent — LLM 分类 + 回退 + 解析"""
import sys, os, json
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.hina_agent import (
classify_with_llm, _parse_llm_response, _validate_result, _fallback_classification,
)
class _MockLLMPass:
"""模拟 LLM 返回正常 JSON"""
def call(self, msgs, retries=1):
return json.dumps({
"category": "condition_heavy",
"subtype": "nested_if",
"confidence": 0.85,
"features": {},
"required_tests": 10,
"strategy_params": {"max_nesting_depth": 3, "coverage_target": "branch", "file_isolation": False, "supplement_strategy": "incremental"},
})
class _MockLLMEmpty:
def call(self, msgs, retries=1):
return ""
class _MockLLMBadJSON:
def call(self, msgs, retries=1):
return "not valid json at all"
class _MockLLMTimeout:
def call(self, msgs, retries=1):
raise Exception("httpx.TimeoutException")
# ── HA-01: normal classify_with_llm ──
def test_classify_with_llm_normal():
"""HA-01: 有效结构体 → 返回 dict 含 category"""
structure = {
"paragraph_count": 5, "decision_count": 3, "if_count": 2,
"evaluate_count": 0, "file_count": 1, "open_directions": ["INPUT"],
"has_search_all": False, "has_call": False, "has_break": False,
"total_branches": 4,
}
result = classify_with_llm(structure, _MockLLMPass())
assert isinstance(result, dict)
assert "category" in result
assert result["category"] == "condition_heavy"
# ── HA-02~04: LLM error handling ──
def test_classify_with_llm_bad_json():
"""HA-03: LLM 返回非法 JSON → fallback"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMBadJSON())
assert isinstance(result, dict)
assert "category" in result or "confidence" in result
def test_classify_with_llm_empty():
"""HA-03(同): LLM 返回空字符串 → fallback"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMEmpty())
assert isinstance(result, dict)
def test_classify_with_llm_timeout():
"""HA-04: LLM 超时 → fallback + 不崩溃"""
structure = {"paragraph_count": 1, "decision_count": 0, "if_count": 0}
result = classify_with_llm(structure, _MockLLMTimeout())
assert isinstance(result, dict)
# ── HA-05~07: _parse_llm_response ──
def test_parse_llm_json():
"""HA-05: 合法 JSON → 解析成功"""
r = _parse_llm_response('{"category": "DB操作", "confidence": 0.95}')
assert r["category"] == "DB操作"
assert r["confidence"] == 0.95
def test_parse_llm_invalid_json():
"""HA-06: 非法 JSON → try/except 不崩溃"""
r = _parse_llm_response("暂无")
assert r is None or isinstance(r, dict)
def test_parse_llm_markdown_wrapped():
"""HA-07: 含 ```json markdown 包裹"""
raw = '```json\n{"category": "SORT", "confidence": 0.9}\n```'
r = _parse_llm_response(raw)
assert r is not None
assert r.get("category") == "SORT"
def test_parse_llm_empty_string():
"""空字符串 → 验证后默认 dict"""
r = _parse_llm_response("")
assert r["category"] == "unknown"
assert r["confidence"] == 0.0
# ── HA-08~10: _fallback_classification ──
def test_fallback_no_decision():
"""HA-08: total_decisions=0 → simple_sequential"""
structure = {"decision_points": [], "file_count": 0}
r = _fallback_classification(structure)
assert r["category"] == "simple_sequential"
def test_fallback_call():
"""HA-09: has_call → call_based"""
structure = {
"decision_points": [{"kind": "IF"}],
"file_count": 0, "has_call": True, "has_search_all": False, "has_break": False,
}
r = _fallback_classification(structure)
assert r["category"] == "call_based"
def test_fallback_search():
"""HA-10: has_search_all → search_intensive"""
structure = {
"decision_points": [{"kind": "IF"}],
"file_count": 0, "has_call": False, "has_search_all": True, "has_break": False,
}
r = _fallback_classification(structure)
assert r["category"] == "search_intensive"
# ── _validate_result ──
def test_validate_valid():
"""合法结果通过验证"""
r = _validate_result({"category": "condition_heavy", "confidence": 0.8, "features": {}})
assert isinstance(r, dict)
def test_validate_missing_category():
"""缺失 category → 默认 unknown"""
r = _validate_result({"confidence": 0.8})
assert r["category"] == "unknown"
+205
View File
@@ -0,0 +1,205 @@
"""Deep classifier tests: keyword detection, confidence boundaries, edge cases"""
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.classifier import detect_keyword, compute_confidence
# ── 1. detect_keyword with SQL + SORT + CALL all present ──
def test_detect_keyword_multiple_matches():
"""Source with SQL, SORT and CALL keywords → multiple matches with correct confidence ranking"""
source = """
IDENTIFICATION DIVISION.
PROGRAM-ID. TESTPGM.
DATA DIVISION.
WORKING-STORAGE SECTION.
01 WS-A PIC X(100).
PROCEDURE DIVISION.
EXEC SQL
SELECT * FROM TABLE
END-EXEC.
SORT ON KEY WS-KEY.
CALL 'SUBPGM'.
STOP RUN.
"""
results = detect_keyword(source)
categories = {r[0] for r in results}
assert "DB操作" in categories # EXEC SQL → 0.95
assert "SORT" in categories # SORT ON KEY → 0.95
assert "子程序调用" in categories # CALL → 0.90
# Verify confidence values per match
cat_map = {r[0]: (r[1], r[2]) for r in results}
assert cat_map["DB操作"][0] == 0.95
assert cat_map["DB操作"][1] == "EXEC SQL"
assert cat_map["SORT"][0] == 0.95
assert cat_map["SORT"][1] == "SORT ON KEY"
assert cat_map["子程序调用"][0] == 0.90
assert cat_map["子程序调用"][1] == "CALL"
# ── 2. compute_confidence with hybrid (keyword + LLM) result ──
def test_compute_confidence_hybrid():
"""Keyword match below 0.90 threshold + LLM result → method=hybrid, uses LLM category"""
# "WRITE AFTER" matches "编辑输出" with confidence 0.80 (< 0.90)
source = "WRITE AFTER ADVANCING 1 LINE."
llm_result = {"category": "output_heavy", "confidence": 0.75}
result = compute_confidence(source, llm_result=llm_result)
assert result["method"] == "hybrid"
assert result["source"] == "llm"
assert result["category"] == "output_heavy"
assert result["confidence"] == 0.75
# Keyword matches are still attached to the result
assert len(result["matches"]) > 0
assert any("WRITE AFTER" in str(m) for m in result["matches"])
def test_compute_confidence_keyword_high_confidence_overrides_llm():
"""Keyword match >= 0.90 → keyword method wins, LLM ignored"""
# "EXEC SQL" matches "DB操作" with confidence 0.95 (>= 0.90)
source = "EXEC SQL SELECT * FROM TABLE"
llm_result = {"category": "something_else", "confidence": 0.50}
result = compute_confidence(source, llm_result=llm_result)
assert result["method"] == "keyword"
assert result["source"] == "l1"
assert result["category"] == "DB操作"
assert result["confidence"] == 0.95
# ── 3. compute_confidence boundaries: 0.0, 0.69, 0.70, 0.71, 1.0 ──
def test_confidence_boundary_zero():
"""No keyword match, no LLM → category=unknown, confidence=0.0"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
result = compute_confidence(source, llm_result=None)
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["method"] == "none"
assert result["matches"] == []
def test_confidence_boundary_069():
"""LLM result with confidence 0.69 (below 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.69}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.69
assert result["method"] == "hybrid"
def test_confidence_boundary_070():
"""LLM result with confidence 0.70 (at 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.70}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.70
assert result["method"] == "hybrid"
def test_confidence_boundary_071():
"""LLM result with confidence 0.71 (above 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.71}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.71
assert result["method"] == "hybrid"
def test_confidence_boundary_max():
"""LLM result with confidence 1.0"""
source = " MOVE 1 TO A."
llm_result = {"category": "perfect", "confidence": 1.0}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "perfect"
assert result["confidence"] == 1.0
assert result["method"] == "hybrid"
# ── 4. Keyword source text with mixed case, extra whitespace, inline comments ──
def test_detect_keyword_mixed_case_whitespace_comments():
"""Source with mixed case, inline *> comments"""
source = """
IDENTIFICATION DIVISION.
ExEc Sql
SELECT * FROM TABLE
END-EXEC. *> inline comment
Call 'SUBPGM' *> some comment
Sort On Key WS-KEY.
"""
results = detect_keyword(source)
categories = {r[0] for r in results}
assert "DB操作" in categories # EXEC SQL (mixed case)
assert "子程序调用" in categories # CALL (mixed case)
assert "SORT" in categories # SORT ON KEY (mixed case)
# Verify matched keywords were found (function uppercases source)
matched_keywords = {r[2] for r in results}
assert "EXEC SQL" in matched_keywords
assert "CALL" in matched_keywords
assert "SORT ON KEY" in matched_keywords
# ── 5. No keyword match and no LLM result → unknown ──
def test_detect_keyword_no_match():
"""Source with no known keywords → empty list"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
results = detect_keyword(source)
assert len(results) == 0
def test_compute_confidence_no_match_no_llm():
"""No keyword match and no LLM → category=unknown, confidence=0, method=none"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
result = compute_confidence(source, llm_result=None)
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["method"] == "none"
assert result["source"] == "unknown"
assert result["matches"] == []
# ── Additional: verify L1_RULES via detect_keyword ──
def test_detect_keyword_all_rules():
"""Each L1_RULE category is detectable from a representative keyword"""
test_cases = [
("EXEC SQL", "DB操作"),
("CALL", "子程序调用"),
("IS INITIAL", "IS INITIAL"),
("SYSIN", "SYSIN"),
("ALPHABETIC", "编码转换"),
("DFHCOMMAREA", "online"),
("MAP", "online"),
("SORT ON KEY", "SORT"),
("MERGE ON KEY", "MERGE"),
("WRITE AFTER", "编辑输出"),
("WRITE BEFORE", "编辑输出"),
("ORGANIZATION IS", "文件编成"),
("ALTERNATE RECORD KEY", "替代索引"),
]
for keyword, expected_category in test_cases:
source = f" {keyword} DUMMY."
results = detect_keyword(source)
categories = {r[0] for r in results}
assert expected_category in categories, \
f"Keyword '{keyword}' should trigger category '{expected_category}', got {categories}"
+354
View File
@@ -0,0 +1,354 @@
"""测试: 确信度 4 因子计算 + 质量门禁评分 + 覆盖率比较"""
import pytest
from hina.confidence import compute_confidence_v2
from hina.gate import compute_quality_score, check as gate_check
from coverage.compare_coverage import compare_coverage
# ── compute_confidence_v2 判定阈值测试 ──
def test_auto_judgment():
"""确信度 >= 0.90 → auto"""
keyword_result = {
"base_confidence": 1.0,
"match_count": 3,
}
structure_features = {"structure_match_score": 5}
result = compute_confidence_v2(keyword_result, structure_features)
# 1.0 × 1.0 × 1.0 × 1.0 = 1.0
assert result["confidence"] == 1.0
assert result["judgment"] == "auto"
assert result["needs_review"] is False
def test_review_judgment():
"""确信度 0.70-0.89 → review"""
# Need 0.70 <= confidence < 0.90
# base=1.0, context=0.95, consistency=1.0, structure=0.7 → 0.665 → still manual
# base=1.0, context=1.0, consistency=0.9, structure=0.85... hmm structure is discrete
# Let's try: base=0.95, context=1.0, consistency=1.0, structure=0.7 → 0.665 (manual)
# base=0.95, context=0.95(match=2), consistency=1.0, structure=0.7 → 0.63175 (manual)
# base=0.95, context=1.0, consistency=0.90, structure=1.0 → 0.855 (review!)
keyword_result = {
"base_confidence": 0.95,
"match_count": 3,
}
structure_features = {"structure_match_score": 5}
contradictions = [
{"type": "type_mismatch", "resolved": True},
]
result = compute_confidence_v2(
keyword_result, structure_features,
contradictions=contradictions,
)
# 0.95 × 1.0 × 0.90 × 1.0 = 0.855
assert 0.70 <= result["confidence"] < 0.90
assert result["judgment"] == "review"
assert result["needs_review"] is True
def test_manual_judgment():
"""确信度 0.50-0.69 → manual"""
keyword_result = {
"base_confidence": 0.95,
"match_count": 1,
}
structure_features = {"structure_match_score": 4}
contradictions = [
{"type": "type_mismatch", "resolved": True},
]
result = compute_confidence_v2(
keyword_result, structure_features,
contradictions=contradictions,
)
# 0.95 × 0.90 × 0.90 × 0.7 = 0.53865
assert 0.50 <= result["confidence"] < 0.70
assert result["judgment"] == "manual"
assert result["needs_review"] is True
def test_impossible_judgment():
"""确信度 < 0.50 → impossible"""
keyword_result = {
"base_confidence": 0.7,
"match_count": 0,
}
structure_features = {"structure_match_score": 0}
result = compute_confidence_v2(keyword_result, structure_features)
# 0.7 × 0.50 × 1.0 × 0.3 = 0.105
assert result["confidence"] < 0.50
assert result["judgment"] == "impossible"
assert result["needs_review"] is True
# ── 因子边界测试 ──
def test_context_factor_match_counts():
"""关键字匹配数对上下文因子的影响"""
# match_count >= 3 → context_factor = 1.0
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 5},
{"structure_match_score": 5},
)
assert r["context_factor"] == 1.0
assert r["confidence"] == 1.0
# match_count == 2 → context_factor = 0.95
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 2},
{"structure_match_score": 5},
)
assert r["context_factor"] == 0.95
assert r["confidence"] == 0.95
# match_count == 1 → context_factor = 0.90
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 1},
{"structure_match_score": 5},
)
assert r["context_factor"] == 0.90
assert r["confidence"] == 0.90
# match_count == 0 → context_factor = 0.50
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 0},
{"structure_match_score": 5},
)
assert r["context_factor"] == 0.50
assert r["confidence"] == 0.50
def test_consistency_factor_contradictions():
"""矛盾数量对一致性因子的影响"""
# 无矛盾 → 1.0
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 5},
contradictions=[],
)
assert r["consistency_factor"] == 1.0
# 已解决 → 0.90
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 5},
contradictions=[{"type": "t1", "resolved": True}],
)
assert r["consistency_factor"] == 0.90
# 未解决 < 3 → 0.80
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 5},
contradictions=[{"type": "t1", "resolved": False}],
)
assert r["consistency_factor"] == 0.80
# ≥3 未解决 → 0.50
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 5},
contradictions=[
{"type": "t1", "resolved": False},
{"type": "t2", "resolved": False},
{"type": "t3", "resolved": True},
],
)
assert r["consistency_factor"] == 0.50
def test_structure_factor_scores():
"""结构匹配度对结构一致性因子的影响"""
# 5/5 → 1.0
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 5},
)
assert r["structure_factor"] == 1.0
# 3-4/5 → 0.7
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 3},
)
assert r["structure_factor"] == 0.7
# 1-2/5 → 0.5
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 1},
)
assert r["structure_factor"] == 0.5
# 无法/0 → 0.3
r = compute_confidence_v2(
{"base_confidence": 1.0, "match_count": 3},
{"structure_match_score": 0},
)
assert r["structure_factor"] == 0.3
def test_base_confidence_default():
"""keyword_result 未提供 base_confidence 时使用默认值 0.7"""
r = compute_confidence_v2(
{"match_count": 3},
{"structure_match_score": 5},
)
assert r["base"] == 0.7
# ── compute_quality_score 双模式测试 ──
def test_quality_score_no_gcov():
"""gcov 未启用模式: branch_rate×0.5 + paragraph_rate×0.5 + confidence×0.4"""
static_cov = {
"branch_rate": 0.80,
"paragraph_rate": 0.90,
}
score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.5)
# 0.80×0.5 + 0.90×0.5 + 0.5×0.4 = 0.40 + 0.45 + 0.20 = 1.05 → min(1.0, 1.05) = 1.0
assert score == 1.0
def test_quality_score_no_gcov_sub_max():
"""gcov 未启用模式,确保不超过 1.0 被 clamp"""
static_cov = {
"branch_rate": 0.60,
"paragraph_rate": 0.70,
}
score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.8)
# 0.60×0.5 + 0.70×0.5 + 0.8×0.4 = 0.30 + 0.35 + 0.32 = 0.97
assert score == 0.97
def test_quality_score_with_gcov():
"""gcov 启用模式: static_cov×0.3 + gcov_cov×0.4 + confidence×0.3"""
static_cov = {
"branch_rate": 0.80,
"paragraph_rate": 0.90,
}
gcov_cov = {"gcov_cov": 0.75}
score = compute_quality_score(static_cov, gcov_cov, confidence=0.5)
# static_cov = 0.80×0.5 + 0.90×0.5 = 0.85
# score = 0.85×0.3 + 0.75×0.4 + 0.5×0.3 = 0.255 + 0.30 + 0.15 = 0.705
assert score == 0.705
def test_quality_score_with_gcov_zero_confidence():
"""gcov 启用模式,置信度为 0"""
static_cov = {
"branch_rate": 1.0,
"paragraph_rate": 1.0,
}
gcov_cov = {"gcov_cov": 0.5}
score = compute_quality_score(static_cov, gcov_cov, confidence=0.0)
# static_cov = 1.0
# score = 1.0×0.3 + 0.5×0.4 + 0.0×0.3 = 0.30 + 0.20 + 0.0 = 0.50
assert score == 0.50
# ── compare_coverage 基本功能测试 ──
def test_compare_coverage_basic():
"""compare_coverage 基本功能"""
static = {
"branch_rate": 0.90,
"paragraph_rate": 0.85,
"total_branches": 20,
"covered_branches": 18,
}
dynamic = {
"gcov_cov": 0.75,
"covered_branches": 15,
"total_branches": 20,
"misleading_branches": ["BR001", "BR003"],
}
result = compare_coverage("TESTPROG", static, dynamic)
assert result["program"] == "TESTPROG"
assert result["static"]["branch_rate"] == 0.90
assert result["static"]["paragraph_rate"] == 0.85
assert result["dynamic"]["gcov_cov"] == 0.75
# gap = (0.90×0.5 + 0.85×0.5) - 0.75 = 0.875 - 0.75 = 0.125
assert result["gap"] == 0.125
assert result["misleading_branches"] == ["BR001", "BR003"]
def test_compare_coverage_no_gap():
"""静态与动态完全一致时 gap 为 0"""
static = {
"branch_rate": 0.80,
"paragraph_rate": 0.80,
"total_branches": 10,
"covered_branches": 8,
}
dynamic = {
"gcov_cov": 0.80,
"covered_branches": 8,
"total_branches": 10,
"misleading_branches": [],
}
result = compare_coverage("NOGAP", static, dynamic)
# gap = (0.80×0.5 + 0.80×0.5) - 0.80 = 0.80 - 0.80 = 0.0
assert result["gap"] == 0.0
assert result["misleading_branches"] == []
def test_compare_coverage_no_misleading():
"""没有误导分支时的返回"""
static = {
"branch_rate": 0.95,
"paragraph_rate": 1.0,
}
dynamic = {
"gcov_cov": 0.90,
"misleading_branches": [],
}
result = compare_coverage("CLEAN", static, dynamic)
# gap = (0.95×0.5 + 1.0×0.5) - 0.90 = 0.975 - 0.90 = 0.075
assert result["gap"] == 0.075
assert result["misleading_branches"] == []
# ── gate.check 基本功能测试 ──
def test_gate_check_passed():
"""质量门禁完全通过"""
result = gate_check(
complete_tests=[{"id": 1}],
hina_result={},
coverage={"branch_rate": 1.0, "paragraph_rate": 1.0},
)
assert result["passed"] is True
assert len(result["issues"]) == 0
def test_gate_check_failed_branch():
"""分支覆盖率不足"""
result = gate_check(
complete_tests=[{"id": 1}],
hina_result={},
coverage={
"branch_rate": 0.50,
"paragraph_rate": 1.0,
"uncovered_decision_ids": [1, 2],
},
)
assert result["passed"] is False
assert "decision_gaps" in result["issues"]
def test_gate_check_no_data():
"""无测试数据"""
result = gate_check(
complete_tests=[],
hina_result={},
coverage={"branch_rate": 1.0, "paragraph_rate": 1.0},
)
assert result["passed"] is False
assert "no_data" in result["issues"]
+35
View File
@@ -0,0 +1,35 @@
"""GC-01~03: gcov_collector — COBOL 覆盖率采集"""
import sys, os, tempfile
from pathlib import Path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.gcov_collector import collect_gcov
def test_gcov_not_installed():
"""GC-01: cobc 不在 PATH → available=False"""
# Use a temp dir that won't have .gcda/.gcno files
with tempfile.TemporaryDirectory() as tmp:
work = Path(tmp)
result = collect_gcov(work / "program.cbl", work)
assert isinstance(result, dict)
# available should be False or result has a status field
assert not result.get("available", True) or "reason" in result
def test_gcov_no_data():
"""GC-02: 无 .gcda/.gcno → available=False"""
with tempfile.TemporaryDirectory() as tmp:
cobol_src = Path(tmp) / "test.cbl"
cobol_src.write_text("PROGRAM-ID. TEST.")
result = collect_gcov(cobol_src, Path(tmp))
assert result.get("available") is False
assert "reason" in result
def test_gcov_result_structure():
"""返回的 dict 包含必要字段"""
with tempfile.TemporaryDirectory() as tmp:
result = collect_gcov(Path(tmp) / "nope.cbl", Path(tmp))
assert "available" in result
assert "reason" in result or "line_rate" in result
+314
View File
@@ -0,0 +1,314 @@
"""Tests for hina/pipeline/pipeline.py — classify_program 完整管道。
覆盖路径:
- 路径 A: keyword confidence >= 90% -> 直接输出
- 路径 B: keyword 50-89% -> 规则引擎 + 矛盾回溯
- 路径 C: keyword < 50% -> LLM 辅助
- 无矛盾场景
- orchestrator 集成契约
- 空源码边界
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from hina import classify_program
from hina.pipeline.pipeline import _get_best_keyword_match
# ── _get_best_keyword_match 单元测试 ────────────────────────────────────────────
class TestGetBestKeywordMatch:
def test_empty_matches(self) -> None:
assert _get_best_keyword_match([]) is None
def test_single_match(self) -> None:
result = _get_best_keyword_match([("DB操作", 0.95, "EXEC SQL")])
assert result is not None
assert result["category"] == "DB操作"
assert result["confidence"] == 0.95
assert result["keyword"] == "EXEC SQL"
def test_multiple_matches_picks_highest(self) -> None:
matches = [
("子程序调用", 0.90, "CALL"),
("DB操作", 0.95, "EXEC SQL"),
("SORT", 0.95, "SORT ON KEY"),
]
result = _get_best_keyword_match(matches)
assert result is not None
assert result["confidence"] == 0.95
# 置信度相同时取第一个最高值
assert "all_matches" in result
assert len(result["all_matches"]) == 3
# ── classify_program 管道测试 (模拟依赖) ──────────────────────────────────────
def _make_mock_structure(**overrides) -> dict:
"""生成用于 mock 的标准 structure dict。"""
base = {
"total_paragraphs": 5,
"file_count": 2,
"decision_points": [{"id": 1, "kind": "IF", "label": "A > B", "branches": 2}],
"if_types": {"total": 1, "comparison": 1, "equality": 0, "compound": 0, "nested_depth": 0},
"branch_tree_obj": MagicMock(),
"has_call": False,
"has_divide": False,
"has_string": False,
"has_inspect": False,
"open_pattern": "sequential",
"select_files": {"FILE1": ["REC1"], "FILE2": ["REC2"]},
"variable_patterns": {
"has_prev_key": False,
"has_accumulator": False,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": False,
"has_work": False,
},
"divide_constants": [],
"open_directions": {},
}
base.update(overrides)
return base
class TestClassifyProgramPipeline:
# ── 路径 A: keyword >= 90% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_keyword_high_confidence(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 A: keyword confidence >= 90%, 直接输出关键词结果。"""
mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
assert result["category"] == "DB操作"
assert result["confidence"] >= 0.0
assert result["method"] == "keyword"
assert result["source"] == "l1"
assert result["judgment"] in ("auto", "review")
assert len(result["matches"]) == 1
assert result["matches"][0][0] == "DB操作"
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_keyword_high_confidence_sysin(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 A 变体: SYSIN 关键字 (置信度 0.90) 也走直接输出。"""
mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
assert result["category"] == "SYSIN"
assert result["confidence"] >= 0.0
assert result["method"] == "keyword"
# ── 路径 B: keyword 50-89% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_rule_engine(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B: keyword 50-89%, 触发规则引擎 + 确信度计算。"""
mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
mock_extract.return_value = _make_mock_structure(
variable_patterns={
"has_prev_key": True,
"has_accumulator": True,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": False,
"has_work": False,
},
file_count=2,
select_files={"FILE1": ["REC1"], "FILE2": ["REC2"]},
)
result = classify_program("SOME COBOL SOURCE")
assert result["method"] in ("rule_engine", "rule_engine_fallback")
# 确信度应由 v2 计算给出合理的值
assert result["confidence"] >= 0.0
assert "category" in result
assert "resolved_types" in result
assert "contradictions" in result
assert "v2_confidence" in result
assert result["v2_confidence"]["base"] >= 0.0
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_rule_engine_with_contradiction(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B 变体: 规则引擎检测到矛盾并解决。"""
mock_detect.return_value = [("编码转换", 0.85, "ALPHABETIC")]
# 构建同时匹配マッチング和キーブレイク特征的结构, 产生矛盾
mock_extract.return_value = _make_mock_structure(
file_count=3,
select_files={"F1": ["R1"], "F2": ["R2"], "F3": ["R3"]},
if_types={"total": 3, "comparison": 3, "equality": 3, "compound": 0, "nested_depth": 2},
variable_patterns={
"has_prev_key": True,
"has_accumulator": True,
"has_error_flag": False,
"has_switch": False,
"has_index": False,
"has_save_area": False,
"has_counter": True,
"has_work": False,
},
)
result = classify_program("SOME COBOL SOURCE")
assert "contradiction_resolution" in result
assert result["contradiction_resolution"]["total_count"] >= 0
# 即使有矛盾, 结果应该是完整的
assert "category" in result
assert result["confidence"] >= 0.0
# ── 路径 C: keyword < 50% ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_llm_fallback(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 C: keyword < 50%, LLM 辅助分类。"""
mock_detect.return_value = [] # 无关键字匹配 -> confidence = 0
mock_extract.return_value = _make_mock_structure()
mock_llm = MagicMock()
mock_llm.call.return_value = (
'{"category": "simple_sequential", "subtype": "no_branch", '
'"confidence": 0.88, "features": {}, "required_tests": 1, '
'"strategy_params": {}}'
)
result = classify_program("SOME COBOL SOURCE", llm=mock_llm)
assert result["method"] == "llm"
assert "category" in result
# LLM 路径应调用 LLM
assert mock_llm.call.called
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_llm_unavailable_fallback_to_rule_engine(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 C 兜底: LLM 不可用时退化为规则引擎。"""
mock_detect.return_value = []
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE", llm=None)
# 没有 LLM, 使用规则引擎兜底
assert result["method"] == "rule_engine_fallback"
assert "category" in result
assert result["confidence"] >= 0.0
# ── 无矛盾场景 ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_no_contradiction(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""路径 B 变体: 规则引擎处理后无矛盾。"""
mock_detect.return_value = [("SYSIN", 0.90, "SYSIN")]
mock_extract.return_value = _make_mock_structure(
# 简单的结构, 不会触发复杂混淆组
file_count=1,
select_files={"F1": ["R1"]},
if_types={"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
variable_patterns={
"has_prev_key": False, "has_accumulator": False,
"has_error_flag": False, "has_switch": False,
"has_index": False, "has_save_area": False,
"has_counter": False, "has_work": False,
},
)
result = classify_program("SOME COBOL SOURCE")
assert "contradictions" in result
assert len(result["contradictions"]) == 0
# ── orchestrator 集成契约 ──
@patch("hina.pipeline.pipeline.detect_keyword")
@patch("hina.pipeline.pipeline.extract_structure")
def test_pipeline_with_orchestrator_integration(
self, mock_extract: MagicMock, mock_detect: MagicMock
) -> None:
"""验证 classify_program 输出满足 orchestrator 的集成契约。"""
mock_detect.return_value = [("DB操作", 0.95, "EXEC SQL")]
mock_extract.return_value = _make_mock_structure()
result = classify_program("SOME COBOL SOURCE")
# 模拟 orchestrator 的用法:
vr_type = result["category"]
vr_confidence = result["confidence"]
vr_debug_classification = result
vr_quality_warn = None
if result["needs_review"]:
vr_quality_warn = f"类型判定确信度过低({result['confidence']:.0%})"
# 断言 orchestrator 需要的字段
assert isinstance(vr_type, str)
assert isinstance(vr_confidence, float)
assert isinstance(vr_debug_classification, dict)
assert 0.0 <= vr_confidence <= 1.0
assert isinstance(result["needs_review"], bool)
# 高确信度不需要 review
# needs_review depends on v2 confidence
assert vr_quality_warn is None or "过低" in str(vr_quality_warn)
# ── 空源码边界 ──
def test_pipeline_empty_source(self) -> None:
"""空 COBOL 源码返回 unknown 且 needs_review=True。"""
result = classify_program("")
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["needs_review"] is True
assert result["method"] == "none"
assert result["source"] == "error"
assert result["judgment"] == "impossible"
def test_pipeline_whitespace_source(self) -> None:
"""纯空白源码也返回 unknown。"""
result = classify_program(" \n \t ")
assert result["category"] == "unknown"
assert result["needs_review"] is True
# ── import 验证 ──
def test_import_from_hina(self) -> None:
"""验证 classify_program 是 hina 包唯一导出的函数。"""
from hina import __all__ as hina_all
assert "classify_program" in hina_all
assert len(hina_all) == 1 # 唯一外部入口
+115
View File
@@ -0,0 +1,115 @@
"""RH-01~07: Retry Handler — 分层重试 + heal/simple 分离"""
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.retry import RetryHandler, HEALING_FIXES
from data.diff_result import VerificationRun
def _vr(status="PASS", build_log=""):
vr = VerificationRun(status=status, program="TEST")
if build_log:
vr.debug = {"cobol_build": {"log": build_log}}
return vr
def test_immediate_pass():
"""RH-01: 1次 PASS → heal=0, simple=0"""
h = RetryHandler()
vr = h.run(lambda: _vr("PASS"))
assert vr.status == "PASS"
assert vr.heal_retry == 0
assert vr.simple_retry == 0
def test_heal_recovery():
"""RH-02: BLOCKED(not found) → heal修复→PASS"""
calls = [0]
def fn():
calls[0] += 1
if calls[0] == 1:
return _vr("BLOCKED", build_log="file not found: libcob.so")
return _vr("PASS")
h = RetryHandler()
vr = h.run(fn)
assert vr.status == "PASS"
assert vr.heal_retry >= 1
assert vr.simple_retry == 0
def test_simple_retry():
"""RH-03: BLOCKED→重试→PASS (无 heal 匹配)"""
calls = [0]
def fn():
calls[0] += 1
if calls[0] == 1:
return _vr("BLOCKED", build_log="some random error")
return _vr("PASS")
h = RetryHandler()
vr = h.run(fn)
assert vr.status == "PASS"
assert vr.simple_retry >= 1
def test_max_retries_exceeded():
"""RH-04: 全部失败 → FATAL"""
h = RetryHandler(max_heal=1, max_simple=1)
vr = h.run(lambda: _vr("BLOCKED"))
assert vr.status == "FATAL"
assert vr.exit_code == 4
def test_quality_warn_no_retry():
"""RH-05: QUALITY_WARN → 立即返回 不重试"""
h = RetryHandler()
vr = h.run(lambda: _vr("QUALITY_WARN"))
assert vr.status == "QUALITY_WARN"
assert vr.heal_retry == 0
assert vr.simple_retry == 0
def test_heal_fails_then_simple():
"""RH-06: heal 尝试但仍然 BLOCKED → 回退 simple"""
calls = [0]
def fn():
calls[0] += 1
return _vr("BLOCKED", build_log="file not found: libcob.so")
h = RetryHandler(max_heal=2, max_simple=2)
vr = h.run(fn)
assert vr.status == "FATAL"
# 应已消耗所有 heal+simple
assert vr.heal_retry + vr.simple_retry >= 1
def test_concurrent_count_separation():
"""RH-07: heal 和 simple 计数互不影响"""
h = RetryHandler(max_heal=2, max_simple=2)
calls = [0, False] # [count, callable flag]
def fn():
calls[0] += 1
if calls[0] == 1:
return _vr("BLOCKED", build_log="file not found: libcob.so")
return _vr("PASS")
h._try_set_env = lambda k, v: None # no-op fix
# Mock fix to succeed on first heal
original_fix = HEALING_FIXES["compile_error"]["fix"]
HEALING_FIXES["compile_error"]["fix"] = lambda: None
try:
vr = h.run(fn)
assert vr.heal_retry >= 0
assert vr.simple_retry >= 0
# heal 和 simple 的计数不会混淆
finally:
HEALING_FIXES["compile_error"]["fix"] = original_fix
def test_history_records():
"""所有 VR 被记录到 history"""
h = RetryHandler(max_heal=0, max_simple=2)
results = []
def fn():
vr = _vr("BLOCKED") if len(results) < 2 else _vr("PASS")
results.append(vr)
return vr
h.run(fn)
assert len(h.history) >= 2
+468
View File
@@ -0,0 +1,468 @@
"""Tests for HINA rule engine: confusion groups, contradiction, backtrack."""
from __future__ import annotations
import sys
import os
import json
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.rule_engine.confusion_groups import (
resolve_matching_vs_keybreak,
resolve_dedup_vs_nodedup,
resolve_validation_vs_keybreak,
resolve_csv_merge_vs_split,
resolve_simple_vs_two_stage,
resolve_pure_vs_mixed,
resolve_division_50_25_100,
resolve_mn_output_mode,
resolve_confusion_pair,
)
from hina.rule_engine.contradiction import (
CONTRADICTION_PAIRS,
detect_contradictions,
resolve_contradiction,
)
from hina.rule_engine.backtrack import BacktrackResolver
# ═══════════════════════════════════════════════════════════════════════════
# 1. confusion_groups — matching_vs_keybreak
# ═══════════════════════════════════════════════════════════════════════════
def test_matching_vs_keybreak_matching():
"""3路 IF + SELECT>=2 → マッチング"""
features = {
"if_types": {"total": 5, "comparison": 3, "equality": 1, "compound": 1, "nested_depth": 2},
"select_files": {"file1": {"organization": "SEQUENTIAL"}, "file2": {"organization": "SEQUENTIAL"}},
"variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
}
result = resolve_matching_vs_keybreak(features)
assert result["resolved_type"] == "マッチング"
assert result["confidence"] >= 0.75
assert len(result["evidence"]) > 0
def test_matching_vs_keybreak_keybreak():
"""2路 IF + WS-PREV-KEY + 累加器 → キーブレイク"""
features = {
"if_types": {"total": 2, "comparison": 0, "equality": 2, "compound": 0, "nested_depth": 1},
"select_files": {"file1": {"organization": "SEQUENTIAL"}},
"variable_patterns": {"has_prev_key": True, "has_accumulator": True, "has_error_field": False},
}
result = resolve_matching_vs_keybreak(features)
assert result["resolved_type"] == "キーブレイク"
assert result["confidence"] >= 0.70
assert len(result["evidence"]) > 0
def test_matching_vs_keybreak_unknown():
"""特征不足 → unknown"""
features = {
"if_types": {"total": 0, "comparison": 0, "equality": 0, "compound": 0, "nested_depth": 0},
"select_files": {},
"variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
}
result = resolve_matching_vs_keybreak(features)
assert result["resolved_type"] == "unknown"
assert result["confidence"] == 0.0
# ═══════════════════════════════════════════════════════════════════════════
# 2. confusion_groups — dedup_vs_nodedup
# ═══════════════════════════════════════════════════════════════════════════
def test_dedup_vs_nodedup_dedup():
"""WS-PREV-KEY 存在 → 含重复"""
features = {"variable_patterns": {"has_prev_key": True, "has_accumulator": False, "has_error_field": False}}
result = resolve_dedup_vs_nodedup(features)
assert result["resolved_type"] == "項目チェック(重複含む)"
assert result["confidence"] >= 0.85
def test_dedup_vs_nodedup_nodedup():
"""WS-PREV-KEY 不存在 → 不含重复"""
features = {"variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False}}
result = resolve_dedup_vs_nodedup(features)
assert result["resolved_type"] == "項目チェック(重複含まず)"
assert result["confidence"] >= 0.70
# ═══════════════════════════════════════════════════════════════════════════
# 3. confusion_groups — validation_vs_keybreak
# ═══════════════════════════════════════════════════════════════════════════
def test_validation_vs_keybreak_validation():
"""WS-ERR* 错误字段存在 → 校验"""
features = {"variable_patterns": {"has_error_flag": True, "has_counter": False, "has_prev_key": False}}
result = resolve_validation_vs_keybreak(features)
assert result["resolved_type"] == "編集処理(校验)"
assert result["confidence"] >= 0.70
def test_validation_vs_keybreak_keybreak():
"""WS-*CNT 计数器存在 → キーブレイク"""
features = {"variable_patterns": {"has_error_field": False, "has_counter": True, "has_prev_key": False}}
result = resolve_validation_vs_keybreak(features)
assert result["resolved_type"] == "キーブレイク"
assert result["confidence"] >= 0.75
def test_validation_vs_keybreak_unknown():
"""既无错误字段也无计数器 → unknown"""
features = {"variable_patterns": {"has_error_field": False, "has_counter": False, "has_prev_key": False}}
result = resolve_validation_vs_keybreak(features)
assert result["resolved_type"] == "unknown"
# ═══════════════════════════════════════════════════════════════════════════
# 4. confusion_groups — csv_merge_vs_split
# ═══════════════════════════════════════════════════════════════════════════
def test_csv_merge_vs_split_merge():
"""STRING 存在 → CSV合并"""
features = {"has_string": True, "has_inspect": False}
result = resolve_csv_merge_vs_split(features)
assert result["resolved_type"] == "CSV合并"
assert result["confidence"] >= 0.70
def test_csv_merge_vs_split_split():
"""INSPECT REPLACING 存在 → CSV拆分"""
features = {"has_string": False, "has_inspect": True}
result = resolve_csv_merge_vs_split(features)
assert result["resolved_type"] == "CSV拆分"
assert result["confidence"] >= 0.70
def test_csv_merge_vs_split_both():
"""两个都存在 → STRING 优先 (CSV合并)"""
features = {"has_string": True, "has_inspect": True}
result = resolve_csv_merge_vs_split(features)
assert result["resolved_type"] == "CSV合并"
def test_csv_merge_vs_split_unknown():
"""两者都不存在 → unknown"""
features = {"has_string": False, "has_inspect": False}
result = resolve_csv_merge_vs_split(features)
assert result["resolved_type"] == "unknown"
# ═══════════════════════════════════════════════════════════════════════════
# 5. confusion_groups — simple_vs_two_stage
# ═══════════════════════════════════════════════════════════════════════════
def test_simple_vs_two_stage_two_stage():
"""OPEN→CLOSE→再OPEN → 二级匹配"""
features = {"open_pattern": "open-close-open"}
result = resolve_simple_vs_two_stage(features)
assert result["resolved_type"] == "二段階マッチング"
assert result["confidence"] >= 0.85
def test_simple_vs_two_stage_simple():
"""顺序 OPEN → 简单匹配"""
features = {"open_pattern": "sequential"}
result = resolve_simple_vs_two_stage(features)
assert result["resolved_type"] == "単純マッチング"
assert result["confidence"] >= 0.75
# ═══════════════════════════════════════════════════════════════════════════
# 6. confusion_groups — pure_vs_mixed
# ═══════════════════════════════════════════════════════════════════════════
def test_pure_vs_mixed_mixed():
"""has_switch + has_counter + IF≥3 → 混合匹配"""
features = {"variable_patterns": {"has_switch": True, "has_counter": True}, "if_types": {"total": 3}}
result = resolve_pure_vs_mixed(features)
assert result["resolved_type"] == "混合マッチング"
assert result["confidence"] >= 0.70
def test_pure_vs_mixed_pure():
"""无混合特征 → unknown(无法静态确定)"""
features = {"variable_patterns": {"has_switch": False, "has_counter": False}, "if_types": {"total": 1}}
result = resolve_pure_vs_mixed(features)
assert result["resolved_type"] == "unknown"
# ═══════════════════════════════════════════════════════════════════════════
# 7. confusion_groups — division_50_25_100
# ═══════════════════════════════════════════════════════════════════════════
def test_division_50():
"""DIVIDE 被除数 = 50"""
features = {"divide_constants": [50]}
result = resolve_division_50_25_100(features)
assert result["resolved_type"] == "DIVIDE_50"
assert result["confidence"] >= 0.90
def test_division_100():
"""DIVIDE 被除数 = 100"""
features = {"divide_constants": [100]}
result = resolve_division_50_25_100(features)
assert result["resolved_type"] == "DIVIDE_100"
assert result["confidence"] >= 0.90
def test_division_unknown():
"""无匹配常量 → unknown"""
features = {"divide_constants": [10, 20]}
result = resolve_division_50_25_100(features)
assert result["resolved_type"] == "unknown"
assert result["confidence"] == 0.0
def test_division_empty():
"""空列表 → unknown"""
features = {"divide_constants": []}
result = resolve_division_50_25_100(features)
assert result["resolved_type"] == "unknown"
# ═══════════════════════════════════════════════════════════════════════════
# 8. confusion_groups — mn_output_mode
# ═══════════════════════════════════════════════════════════════════════════
def test_mn_output_mode_known():
"""SELECT≥2 + 分支≥3 → M:N"""
features = {"select_files": {"a": {}, "b": {}, "c": {}}, "total_branches": 3}
result = resolve_mn_output_mode(features)
assert result["resolved_type"] == "M:N"
assert result["confidence"] >= 0.60
def test_mn_output_mode_unknown():
"""无提示且文件 < 3 → unknown (需数据验证)"""
features = {"has_mn_output_hint": False, "select_files": {"a": {}, "b": {}}}
result = resolve_mn_output_mode(features)
assert result["resolved_type"] == "unknown"
assert result["confidence"] == 0.0
def test_mn_output_mode_many_files():
"""文件数 >=3 无提示 → M:N"""
features = {"has_mn_output_hint": False, "select_files": {"a": {}, "b": {}, "c": {}}}
result = resolve_mn_output_mode(features)
assert result["resolved_type"] == "M:N"
assert result["confidence"] >= 0.55
# ═══════════════════════════════════════════════════════════════════════════
# 9. resolve_confusion_pair — dispatcher
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_confusion_pair_dispatch():
"""resolve_confusion_pair 正确调度到具体函数"""
features = {
"variable_patterns": {"has_prev_key": True, "has_accumulator": False, "has_error_field": False},
}
result = resolve_confusion_pair(features, "dedup_vs_nodedup")
assert result["resolved_type"] == "項目チェック(重複含む)"
result = resolve_confusion_pair(features, "nonexistent_pair")
assert result["resolved_type"] == "unknown"
assert "未知混淆对名称" in result["evidence"][0]
# ═══════════════════════════════════════════════════════════════════════════
# 10. contradiction — detect_contradictions
# ═══════════════════════════════════════════════════════════════════════════
def test_detect_contradictions_empty():
"""无 resolved_types → 空矛盾列表"""
features = {"resolved_types": {}}
assert detect_contradictions(features) == []
def test_detect_contradictions_no_contradiction():
"""只有一个类型 → 无矛盾"""
features = {
"resolved_types": {
"pair_1": "マッチング",
}
}
assert detect_contradictions(features) == []
def test_detect_contradictions_found():
"""マッチング 和 キーブレイク 同时存在 → 检测到矛盾"""
features = {
"resolved_types": {
"pair_1": "マッチング",
"pair_2": "キーブレイク",
}
}
contradictions = detect_contradictions(features)
assert len(contradictions) >= 1
match = [c for c in contradictions if c["type_a"] == "マッチング" and c["type_b"] == "キーブレイク"]
assert len(match) >= 1
# ═══════════════════════════════════════════════════════════════════════════
# 11. contradiction — resolve_contradiction
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_contradiction_priority():
"""マッチング(prio=10) 胜出 over キーブレイク(prio=9)"""
contradiction = {"name": "matching_vs_keybreak", "type_a": "マッチング", "type_b": "キーブレイク"}
result = resolve_contradiction({}, contradiction)
assert result == "マッチング"
def test_resolve_contradiction_csv():
"""CSV合并(prio=6) == CSV拆分(prio=6) → 使用重判定"""
contradiction = {"name": "csv_merge_vs_split", "type_a": "CSV合并", "type_b": "CSV拆分"}
features = {"has_string": True, "has_inspect": False}
result = resolve_contradiction(features, contradiction)
assert result == "CSV合并"
# ═══════════════════════════════════════════════════════════════════════════
# 12. contradiction — CONTRACTION_PAIRS 常量
# ═══════════════════════════════════════════════════════════════════════════
def test_contradiction_pairs_defined():
"""CONTRADICTION_PAIRS 包含所有 8 个混淆对"""
assert len(CONTRADICTION_PAIRS) == 8
names = {p["name"] for p in CONTRADICTION_PAIRS}
expected = {
"matching_vs_keybreak", "dedup_vs_nodedup", "validation_vs_keybreak",
"csv_merge_vs_split", "simple_vs_two_stage", "pure_vs_mixed",
"division_50_25_100", "mn_output_mode",
}
assert names == expected
# ═══════════════════════════════════════════════════════════════════════════
# 13. backtrack — BacktrackResolver
# ═══════════════════════════════════════════════════════════════════════════
def test_backtrack_no_contradiction():
"""无矛盾 → 一轮解决,backtrack_resolved=True"""
def extractor(src: str) -> dict:
return {"resolved_types": {"pair_1": "マッチング"}, "if_types": {}}
resolver = BacktrackResolver(extractor)
result = resolver.resolve("some source", {"resolved_types": {"pair_1": "マッチング"}})
assert result["backtrack_resolved"] is True
assert result["backtrack_rounds"] == 0
def test_backtrack_with_contradiction():
"""有矛盾 → 解决,标记 round"""
def extractor(src: str) -> dict:
return {"resolved_types": {"pair_1": "マッチング"}, "if_types": {}}
features = {
"resolved_types": {
"pair_1": "マッチング",
"pair_2": "キーブレイク",
}
}
resolver = BacktrackResolver(extractor)
result = resolver.resolve("some source", features)
# 核心断言: 矛盾被解决 (resolved_* keys 出现)
resolved_keys = [k for k in result if k.startswith("resolved_")]
assert len(resolved_keys) >= 1
assert result["backtrack_rounds"] >= 1
def test_backtrack_max_rounds_degraded():
"""持续矛盾 → 耗尽 max_rounds 后 degraded"""
round_count = 0
def extractor(src: str) -> dict:
nonlocal round_count
round_count += 1
# 每次都返回包含矛盾的特征
return {
"resolved_types": {
"pair_1": "マッチング",
"pair_2": "キーブレイク",
}
}
features = {
"resolved_types": {
"pair_1": "マッチング",
"pair_2": "キーブレイク",
}
}
resolver = BacktrackResolver(extractor)
resolver.max_rounds = 2
result = resolver.resolve("some source", features)
assert result["backtrack_degraded"] is True
# 应已进行多轮尝试
assert result["backtrack_rounds"] >= 1
def test_backtrack_extract_error():
"""提取器抛异常 → 标记 extract_error"""
def extractor(src: str) -> dict:
raise ValueError("extraction failed")
features = {
"resolved_types": {
"pair_1": "マッチング",
"pair_2": "キーブレイク",
}
}
resolver = BacktrackResolver(extractor)
result = resolver.resolve("some source", features)
assert result.get("backtrack_extract_error") is True
def test_backtrack_no_contradiction():
"""无矛盾 → 不超时,直接返回"""
def fast_extractor(src: str) -> dict:
return {"resolved_types": {}}
resolver = BacktrackResolver(fast_extractor)
result = resolver.resolve("source", {"resolved_types": {}})
assert isinstance(result, dict)
# ═══════════════════════════════════════════════════════════════════════════
# 14. Integration — full round-trip via resolve_confusion_pair
# ═══════════════════════════════════════════════════════════════════════════
def test_integration_matching_roundtrip():
"""完整流程: 通过 resolve_confusion_pair → resolve_matching_vs_keybreak"""
features = {
"if_types": {"total": 5, "comparison": 3, "equality": 1, "compound": 1, "nested_depth": 2},
"select_files": {"f1": {}, "f2": {}},
"variable_patterns": {"has_prev_key": False, "has_accumulator": False, "has_error_field": False},
}
result = resolve_confusion_pair(features, "matching_vs_keybreak")
assert result["resolved_type"] in ("マッチング", "キーブレイク", "unknown")
assert "confidence" in result
assert "evidence" in result
def test_integration_contradiction_resolve_cycle():
"""矛盾检测 → 解决完整闭环"""
features = {
"resolved_types": {
"from_keyword": "マッチング",
"from_llm": "キーブレイク",
}
}
contradictions = detect_contradictions(features)
assert len(contradictions) >= 1
winner = resolve_contradiction(features, contradictions[0])
assert winner in ("マッチング", "キーブレイク")