feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -5,6 +5,10 @@ Phase 1 可用: 决策点覆盖、段落覆盖
 Phase 2 启用: HINA 必须项、字段覆盖
 """

+from __future__ import annotations
+
+from typing import Any
+

 def check(
    complete_tests: list,
@@ -60,3 +64,43 @@ def _compute_score(coverage: dict, hina_result: dict) -> float:
    boundary_quality = 1.0

    return round(coverage_quality * 0.6 + boundary_quality * 0.4, 2)
+
+
+def compute_quality_score(
+    static_coverage: dict[str, Any],
+    gcov_coverage: dict[str, Any] | None = None,
+    confidence: float = 0.5,
+) -> float:
+    """双模式质量评分。
+
+    模式 1 — gcov 未启用 (gcov_coverage is None):
+        score = branch_rate × 0.5 + paragraph_rate × 0.5 + confidence × 0.4
+        其中 confidence 作为加分项（最高 +0.4）
+
+    模式 2 — gcov 启用:
+        score = static_cov × 0.3 + gcov_cov × 0.4 + confidence × 0.3
+        其中 static_cov = branch_rate × 0.5 + paragraph_rate × 0.5
+
+    Args:
+        static_coverage: 静态覆盖率数据
+            {"branch_rate": float, "paragraph_rate": float, ...}
+        gcov_coverage: gcov 动态覆盖率数据，None 表示未启用
+            {"gcov_cov": float, ...} 或 None
+        confidence: 确信度 (0.0 ~ 1.0)
+
+    Returns:
+        float: 质量评分 (0.0 ~ 1.0)
+    """
+    branch_rate = static_coverage.get("branch_rate", 0.0)
+    paragraph_rate = static_coverage.get("paragraph_rate", 0.0)
+    static_cov = branch_rate * 0.5 + paragraph_rate * 0.5
+
+    if gcov_coverage is not None:
+        # 模式 2: gcov 启用
+        gcov_cov = gcov_coverage.get("gcov_cov", 0.0)
+        score = static_cov * 0.3 + gcov_cov * 0.4 + confidence * 0.3
+    else:
+        # 模式 1: gcov 未启用 — confidence 作为加分
+        score = branch_rate * 0.5 + paragraph_rate * 0.5 + confidence * 0.4
+
+    return round(min(score, 1.0), 4)