feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
确信度 4 因子计算。
|
||||
|
||||
公式: confidence = base × context_factor × consistency_factor × structure_factor
|
||||
|
||||
判定:
|
||||
>= 0.90 auto — 自动通过
|
||||
0.70-0.89 review — 需要人工审核
|
||||
0.50-0.69 manual — 需要人工介入
|
||||
< 0.50 impossible — 无法判定
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
def compute_confidence_v2(
|
||||
keyword_result: dict[str, Any],
|
||||
structure_features: dict[str, Any],
|
||||
contradictions: list[dict[str, Any]] | None = None,
|
||||
resolution: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""4 因子确信度计算。
|
||||
|
||||
Args:
|
||||
keyword_result: L1 关键字判定结果,
|
||||
例如 {"category": "DB操作", "base_confidence": 0.95, "match_count": 3}
|
||||
structure_features: 结构特征分析结果,
|
||||
例如 {"structure_match_score": 5, "total_paragraphs": 10}
|
||||
contradictions: 矛盾列表,每条包含 {"type": str, "resolved": bool, ...}
|
||||
resolution: 矛盾解决方案,
|
||||
例如 {"resolved_count": 0, "total_count": 0}
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
"confidence": float, # 综合确信度 (0.0 ~ 1.0)
|
||||
"base": float, # 基础确信度
|
||||
"context_factor": float, # 上下文因子
|
||||
"consistency_factor": float,# 一致性因子
|
||||
"structure_factor": float, # 结构一致性因子
|
||||
"judgment": str, # 判定结果 (auto/review/manual/impossible)
|
||||
"needs_review": bool, # 是否需要人工审核
|
||||
}
|
||||
"""
|
||||
# ── 1. 基础确信度 ──
|
||||
base = keyword_result.get("base_confidence", 0.7)
|
||||
|
||||
# ── 2. 上下文因子(关键字匹配数)──
|
||||
match_count = keyword_result.get("match_count", 0)
|
||||
if match_count >= 3:
|
||||
context_factor = 1.0
|
||||
elif match_count == 2:
|
||||
context_factor = 0.95
|
||||
elif match_count == 1:
|
||||
context_factor = 0.90
|
||||
else:
|
||||
context_factor = 0.50
|
||||
|
||||
# ── 3. 一致性因子(矛盾检测)──
|
||||
contradictions = contradictions or []
|
||||
unresolved_count = sum(1 for c in contradictions if not c.get("resolved", False))
|
||||
total_contradictions = len(contradictions)
|
||||
|
||||
if total_contradictions == 0:
|
||||
consistency_factor = 1.0
|
||||
elif unresolved_count == 0:
|
||||
# 有矛盾但全部已解决
|
||||
consistency_factor = 0.90
|
||||
elif total_contradictions >= 3:
|
||||
consistency_factor = 0.50
|
||||
else:
|
||||
# 有未解决的矛盾,但少于 3 个
|
||||
consistency_factor = 0.80
|
||||
|
||||
# ── 4. 结构一致性因子 ──
|
||||
structure_score = structure_features.get("structure_match_score", 0)
|
||||
if structure_score == 5:
|
||||
structure_factor = 1.0
|
||||
elif structure_score >= 3:
|
||||
structure_factor = 0.7
|
||||
elif structure_score >= 1:
|
||||
structure_factor = 0.5
|
||||
else:
|
||||
structure_factor = 0.3
|
||||
|
||||
# ── 计算综合确信度 ──
|
||||
confidence = round(base * context_factor * consistency_factor * structure_factor, 4)
|
||||
|
||||
# ── 判定 ──
|
||||
if confidence >= 0.90:
|
||||
judgment = "auto"
|
||||
needs_review = False
|
||||
elif confidence >= 0.70:
|
||||
judgment = "review"
|
||||
needs_review = True
|
||||
elif confidence >= 0.50:
|
||||
judgment = "manual"
|
||||
needs_review = True
|
||||
else:
|
||||
judgment = "impossible"
|
||||
needs_review = True
|
||||
|
||||
return {
|
||||
"confidence": confidence,
|
||||
"base": base,
|
||||
"context_factor": context_factor,
|
||||
"consistency_factor": consistency_factor,
|
||||
"structure_factor": structure_factor,
|
||||
"judgment": judgment,
|
||||
"needs_review": needs_review,
|
||||
}
|
||||
Reference in New Issue
Block a user