feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark

P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00
parent 63b5284715
commit bc1d56d1a4
129 changed files with 19378 additions and 261 deletions
@@ -1,3 +1,11 @@
+"""管道运行结果模型 — 对比结果 + 全管道运行记录
+
+使用例:
+  fr = FieldResult(field_name="TX-AMOUNT", status="MISMATCH",
+                    cobol_value="1500000", java_value="1499999.99")
+  vr = VerificationRun(program="BILL-CALC", runner="native")
+"""
+
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -6,6 +14,21 @@ from typing import Optional

@dataclass
 class FieldResult:
+    """单个字段的 COBOL ↔ Java 对比结果。
+
+    ────────── 字段说明 ──────────
+    field_name         — 字段名
+    status             — 对比状态:
+                         PASS       = 完全一致
+                         TOLERATED  = 在容忍度范围内
+                         MISMATCH   = 不一致
+                         NOT_SET    = 缺失侧
+    cobol_value        — COBOL 侧原始值（字符串）
+    java_value         — Java 侧原始值（字符串）
+    tolerance_applied  — 本次使用的实际容忍度
+    rounding_detected  — 检测到的舍入类型
+    suggestion         — LLM 自动诊断建议文本
+    """
    field_name: str = ""
    status: str = "PASS"
    cobol_value: str = ""
@@ -17,6 +40,33 @@ class FieldResult:

@dataclass
 class VerificationRun:
+    """单次管道运行的完整记录 — 由 orchestrator.run_pipeline() 返回。
+
+    ────────── 字段说明 ──────────
+    program            — 程序名
+    timestamp          — 时间戳（自动: YYYYMMDD-HHMMSS）
+    status             — 整体状态: PASS / MISMATCH / BLOCKED / ERROR / FATAL
+    exit_code          — 0=通过  1=不匹配  2=阻塞  3=错误  4=致命
+    duration_s         — 总耗时秒
+    fields_matched     — 一致字段数
+    fields_mismatched  — 不一致字段数
+    coverage_target    — 覆盖率目标: "" / "boundary" / "all-paths"
+    field_results      — 字段对比结果列表
+    runner             — native / spark
+    branch_rate        — 分支覆盖率（静态分析）
+    paragraph_rate     — 段落覆盖率（静态分析）
+    decision_rate      — 决策点覆盖率
+    hina_type          — HINA 分类类型
+    hina_confidence    — HINA 确信度
+    quality_score      — 质量评分 (0~1)
+    quality_warn       — 质量警告
+    heal_retry         — 自愈重试次数
+    simple_retry       — 朴素重试次数
+    total_retry        — 总重试次数
+    llm_cost           — LLM 累计成本 USD
+    report_path        — 报告输出路径
+    debug              — 调试信息（不兼容保证）
+    """
    program: str = ""
    timestamp: str = ""
    status: str = "PASS"
@@ -28,15 +78,15 @@ class VerificationRun:
    field_results: list[FieldResult] = field(default_factory=list)
    runner: str = "native"
    branch_rate: float = 0.0
-    paragraph_rate: float = 0.0          # 段落覆盖率
-    decision_rate: float = 0.0            # 决策点覆盖率
-    hina_type: str = ""                   # HINA 类型
-    hina_confidence: float = 0.0          # HINA 确信度
-    quality_score: float = 0.0            # 质量评分
-    quality_warn: str = ""                # 质量警告信息
-    heal_retry: int = 0                   # 自愈重试次数
-    simple_retry: int = 0                 # 朴素重试次数
-    total_retry: int = 0                  # 总重试次数
+    paragraph_rate: float = 0.0
+    decision_rate: float = 0.0
+    hina_type: str = ""
+    hina_confidence: float = 0.0
+    quality_score: float = 0.0
+    quality_warn: str = ""
+    heal_retry: int = 0
+    simple_retry: int = 0
+    total_retry: int = 0
    llm_cost: float = 0.0
    report_path: str = ""
    debug: dict = field(default_factory=dict)