"""测试: 确信度 4 因子计算 + 质量门禁评分 + 覆盖率比较""" import pytest from hina.confidence import compute_confidence_v2 from hina.gate import compute_quality_score, check as gate_check from coverage.compare_coverage import compare_coverage # ── compute_confidence_v2 判定阈值测试 ── def test_auto_judgment(): """确信度 >= 0.90 → auto""" keyword_result = { "base_confidence": 1.0, "match_count": 3, } structure_features = {"structure_match_score": 5} result = compute_confidence_v2(keyword_result, structure_features) # 1.0 × 1.0 × 1.0 × 1.0 = 1.0 assert result["confidence"] == 1.0 assert result["judgment"] == "auto" assert result["needs_review"] is False def test_review_judgment(): """确信度 0.70-0.89 → review""" # Need 0.70 <= confidence < 0.90 # base=1.0, context=0.95, consistency=1.0, structure=0.7 → 0.665 → still manual # base=1.0, context=1.0, consistency=0.9, structure=0.85... hmm structure is discrete # Let's try: base=0.95, context=1.0, consistency=1.0, structure=0.7 → 0.665 (manual) # base=0.95, context=0.95(match=2), consistency=1.0, structure=0.7 → 0.63175 (manual) # base=0.95, context=1.0, consistency=0.90, structure=1.0 → 0.855 (review!) keyword_result = { "base_confidence": 0.95, "match_count": 3, } structure_features = {"structure_match_score": 5} contradictions = [ {"type": "type_mismatch", "resolved": True}, ] result = compute_confidence_v2( keyword_result, structure_features, contradictions=contradictions, ) # 0.95 × 1.0 × 0.90 × 1.0 = 0.855 assert 0.70 <= result["confidence"] < 0.90 assert result["judgment"] == "review" assert result["needs_review"] is True def test_manual_judgment(): """确信度 0.50-0.69 → manual""" keyword_result = { "base_confidence": 0.95, "match_count": 1, } structure_features = {"structure_match_score": 4} contradictions = [ {"type": "type_mismatch", "resolved": True}, ] result = compute_confidence_v2( keyword_result, structure_features, contradictions=contradictions, ) # 0.95 × 0.90 × 0.90 × 0.7 = 0.53865 assert 0.50 <= result["confidence"] < 0.70 assert result["judgment"] == "manual" assert result["needs_review"] is True def test_impossible_judgment(): """确信度 < 0.50 → impossible""" keyword_result = { "base_confidence": 0.7, "match_count": 0, } structure_features = {"structure_match_score": 0} result = compute_confidence_v2(keyword_result, structure_features) # 0.7 × 0.50 × 1.0 × 0.3 = 0.105 assert result["confidence"] < 0.50 assert result["judgment"] == "impossible" assert result["needs_review"] is True # ── 因子边界测试 ── def test_context_factor_match_counts(): """关键字匹配数对上下文因子的影响""" # match_count >= 3 → context_factor = 1.0 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 5}, {"structure_match_score": 5}, ) assert r["context_factor"] == 1.0 assert r["confidence"] == 1.0 # match_count == 2 → context_factor = 0.95 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 2}, {"structure_match_score": 5}, ) assert r["context_factor"] == 0.95 assert r["confidence"] == 0.95 # match_count == 1 → context_factor = 0.90 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 1}, {"structure_match_score": 5}, ) assert r["context_factor"] == 0.90 assert r["confidence"] == 0.90 # match_count == 0 → context_factor = 0.50 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 0}, {"structure_match_score": 5}, ) assert r["context_factor"] == 0.50 assert r["confidence"] == 0.50 def test_consistency_factor_contradictions(): """矛盾数量对一致性因子的影响""" # 无矛盾 → 1.0 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 5}, contradictions=[], ) assert r["consistency_factor"] == 1.0 # 已解决 → 0.90 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 5}, contradictions=[{"type": "t1", "resolved": True}], ) assert r["consistency_factor"] == 0.90 # 未解决 < 3 → 0.80 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 5}, contradictions=[{"type": "t1", "resolved": False}], ) assert r["consistency_factor"] == 0.80 # ≥3 未解决 → 0.50 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 5}, contradictions=[ {"type": "t1", "resolved": False}, {"type": "t2", "resolved": False}, {"type": "t3", "resolved": True}, ], ) assert r["consistency_factor"] == 0.50 def test_structure_factor_scores(): """结构匹配度对结构一致性因子的影响""" # 5/5 → 1.0 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 5}, ) assert r["structure_factor"] == 1.0 # 3-4/5 → 0.7 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 3}, ) assert r["structure_factor"] == 0.7 # 1-2/5 → 0.5 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 1}, ) assert r["structure_factor"] == 0.5 # 无法/0 → 0.3 r = compute_confidence_v2( {"base_confidence": 1.0, "match_count": 3}, {"structure_match_score": 0}, ) assert r["structure_factor"] == 0.3 def test_base_confidence_default(): """keyword_result 未提供 base_confidence 时使用默认值 0.7""" r = compute_confidence_v2( {"match_count": 3}, {"structure_match_score": 5}, ) assert r["base"] == 0.7 # ── compute_quality_score 双模式测试 ── def test_quality_score_no_gcov(): """gcov 未启用模式: branch_rate×0.5 + paragraph_rate×0.5 + confidence×0.4""" static_cov = { "branch_rate": 0.80, "paragraph_rate": 0.90, } score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.5) # 0.80×0.5 + 0.90×0.5 + 0.5×0.4 = 0.40 + 0.45 + 0.20 = 1.05 → min(1.0, 1.05) = 1.0 assert score == 1.0 def test_quality_score_no_gcov_sub_max(): """gcov 未启用模式,确保不超过 1.0 被 clamp""" static_cov = { "branch_rate": 0.60, "paragraph_rate": 0.70, } score = compute_quality_score(static_cov, gcov_coverage=None, confidence=0.8) # 0.60×0.5 + 0.70×0.5 + 0.8×0.4 = 0.30 + 0.35 + 0.32 = 0.97 assert score == 0.97 def test_quality_score_with_gcov(): """gcov 启用模式: static_cov×0.3 + gcov_cov×0.4 + confidence×0.3""" static_cov = { "branch_rate": 0.80, "paragraph_rate": 0.90, } gcov_cov = {"gcov_cov": 0.75} score = compute_quality_score(static_cov, gcov_cov, confidence=0.5) # static_cov = 0.80×0.5 + 0.90×0.5 = 0.85 # score = 0.85×0.3 + 0.75×0.4 + 0.5×0.3 = 0.255 + 0.30 + 0.15 = 0.705 assert score == 0.705 def test_quality_score_with_gcov_zero_confidence(): """gcov 启用模式,置信度为 0""" static_cov = { "branch_rate": 1.0, "paragraph_rate": 1.0, } gcov_cov = {"gcov_cov": 0.5} score = compute_quality_score(static_cov, gcov_cov, confidence=0.0) # static_cov = 1.0 # score = 1.0×0.3 + 0.5×0.4 + 0.0×0.3 = 0.30 + 0.20 + 0.0 = 0.50 assert score == 0.50 # ── compare_coverage 基本功能测试 ── def test_compare_coverage_basic(): """compare_coverage 基本功能""" static = { "branch_rate": 0.90, "paragraph_rate": 0.85, "total_branches": 20, "covered_branches": 18, } dynamic = { "gcov_cov": 0.75, "covered_branches": 15, "total_branches": 20, "misleading_branches": ["BR001", "BR003"], } result = compare_coverage("TESTPROG", static, dynamic) assert result["program"] == "TESTPROG" assert result["static"]["branch_rate"] == 0.90 assert result["static"]["paragraph_rate"] == 0.85 assert result["dynamic"]["gcov_cov"] == 0.75 # gap = (0.90×0.5 + 0.85×0.5) - 0.75 = 0.875 - 0.75 = 0.125 assert result["gap"] == 0.125 assert result["misleading_branches"] == ["BR001", "BR003"] def test_compare_coverage_no_gap(): """静态与动态完全一致时 gap 为 0""" static = { "branch_rate": 0.80, "paragraph_rate": 0.80, "total_branches": 10, "covered_branches": 8, } dynamic = { "gcov_cov": 0.80, "covered_branches": 8, "total_branches": 10, "misleading_branches": [], } result = compare_coverage("NOGAP", static, dynamic) # gap = (0.80×0.5 + 0.80×0.5) - 0.80 = 0.80 - 0.80 = 0.0 assert result["gap"] == 0.0 assert result["misleading_branches"] == [] def test_compare_coverage_no_misleading(): """没有误导分支时的返回""" static = { "branch_rate": 0.95, "paragraph_rate": 1.0, } dynamic = { "gcov_cov": 0.90, "misleading_branches": [], } result = compare_coverage("CLEAN", static, dynamic) # gap = (0.95×0.5 + 1.0×0.5) - 0.90 = 0.975 - 0.90 = 0.075 assert result["gap"] == 0.075 assert result["misleading_branches"] == [] # ── gate.check 基本功能测试 ── def test_gate_check_passed(): """质量门禁完全通过""" result = gate_check( complete_tests=[{"id": 1}], hina_result={}, coverage={"branch_rate": 1.0, "paragraph_rate": 1.0}, ) assert result["passed"] is True assert len(result["issues"]) == 0 def test_gate_check_failed_branch(): """分支覆盖率不足""" result = gate_check( complete_tests=[{"id": 1}], hina_result={}, coverage={ "branch_rate": 0.50, "paragraph_rate": 1.0, "uncovered_decision_ids": [1, 2], }, ) assert result["passed"] is False assert "decision_gaps" in result["issues"] def test_gate_check_no_data(): """无测试数据""" result = gate_check( complete_tests=[], hina_result={}, coverage={"branch_rate": 1.0, "paragraph_rate": 1.0}, ) assert result["passed"] is False assert "no_data" in result["issues"]