feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -0,0 +1,112 @@
+"""
+确信度 4 因子计算。
+
+公式: confidence = base × context_factor × consistency_factor × structure_factor
+
+判定:
+  >= 0.90  auto       — 自动通过
+  0.70-0.89 review    — 需要人工审核
+  0.50-0.69 manual    — 需要人工介入
+  < 0.50   impossible — 无法判定
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def compute_confidence_v2(
+    keyword_result: dict[str, Any],
+    structure_features: dict[str, Any],
+    contradictions: list[dict[str, Any]] | None = None,
+    resolution: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """4 因子确信度计算。
+
+    Args:
+        keyword_result: L1 关键字判定结果，
+            例如 {"category": "DB操作", "base_confidence": 0.95, "match_count": 3}
+        structure_features: 结构特征分析结果，
+            例如 {"structure_match_score": 5, "total_paragraphs": 10}
+        contradictions: 矛盾列表，每条包含 {"type": str, "resolved": bool, ...}
+        resolution: 矛盾解决方案，
+            例如 {"resolved_count": 0, "total_count": 0}
+
+    Returns:
+        dict: {
+            "confidence": float,        # 综合确信度 (0.0 ~ 1.0)
+            "base": float,              # 基础确信度
+            "context_factor": float,    # 上下文因子
+            "consistency_factor": float,# 一致性因子
+            "structure_factor": float,  # 结构一致性因子
+            "judgment": str,            # 判定结果 (auto/review/manual/impossible)
+            "needs_review": bool,       # 是否需要人工审核
+        }
+    """
+    # ── 1. 基础确信度 ──
+    base = keyword_result.get("base_confidence", 0.7)
+
+    # ── 2. 上下文因子（关键字匹配数）──
+    match_count = keyword_result.get("match_count", 0)
+    if match_count >= 3:
+        context_factor = 1.0
+    elif match_count == 2:
+        context_factor = 0.95
+    elif match_count == 1:
+        context_factor = 0.90
+    else:
+        context_factor = 0.50
+
+    # ── 3. 一致性因子（矛盾检测）──
+    contradictions = contradictions or []
+    unresolved_count = sum(1 for c in contradictions if not c.get("resolved", False))
+    total_contradictions = len(contradictions)
+
+    if total_contradictions == 0:
+        consistency_factor = 1.0
+    elif unresolved_count == 0:
+        # 有矛盾但全部已解决
+        consistency_factor = 0.90
+    elif total_contradictions >= 3:
+        consistency_factor = 0.50
+    else:
+        # 有未解决的矛盾，但少于 3 个
+        consistency_factor = 0.80
+
+    # ── 4. 结构一致性因子 ──
+    structure_score = structure_features.get("structure_match_score", 0)
+    if structure_score == 5:
+        structure_factor = 1.0
+    elif structure_score >= 3:
+        structure_factor = 0.7
+    elif structure_score >= 1:
+        structure_factor = 0.5
+    else:
+        structure_factor = 0.3
+
+    # ── 计算综合确信度 ──
+    confidence = round(base * context_factor * consistency_factor * structure_factor, 4)
+
+    # ── 判定 ──
+    if confidence >= 0.90:
+        judgment = "auto"
+        needs_review = False
+    elif confidence >= 0.70:
+        judgment = "review"
+        needs_review = True
+    elif confidence >= 0.50:
+        judgment = "manual"
+        needs_review = True
+    else:
+        judgment = "impossible"
+        needs_review = True
+
+    return {
+        "confidence": confidence,
+        "base": base,
+        "context_factor": context_factor,
+        "consistency_factor": consistency_factor,
+        "structure_factor": structure_factor,
+        "judgment": judgment,
+        "needs_review": needs_review,
+    }