bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
"""管道运行结果模型 — 对比结果 + 全管道运行记录
|
||
|
||
使用例:
|
||
fr = FieldResult(field_name="TX-AMOUNT", status="MISMATCH",
|
||
cobol_value="1500000", java_value="1499999.99")
|
||
vr = VerificationRun(program="BILL-CALC", runner="native")
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime
|
||
from typing import Optional
|
||
|
||
|
||
@dataclass
|
||
class FieldResult:
|
||
"""单个字段的 COBOL ↔ Java 对比结果。
|
||
|
||
────────── 字段说明 ──────────
|
||
field_name — 字段名
|
||
status — 对比状态:
|
||
PASS = 完全一致
|
||
TOLERATED = 在容忍度范围内
|
||
MISMATCH = 不一致
|
||
NOT_SET = 缺失侧
|
||
cobol_value — COBOL 侧原始值(字符串)
|
||
java_value — Java 侧原始值(字符串)
|
||
tolerance_applied — 本次使用的实际容忍度
|
||
rounding_detected — 检测到的舍入类型
|
||
suggestion — LLM 自动诊断建议文本
|
||
"""
|
||
field_name: str = ""
|
||
status: str = "PASS"
|
||
cobol_value: str = ""
|
||
java_value: str = ""
|
||
tolerance_applied: float = 0.0
|
||
rounding_detected: str = ""
|
||
suggestion: str = ""
|
||
|
||
|
||
@dataclass
|
||
class VerificationRun:
|
||
"""单次管道运行的完整记录 — 由 orchestrator.run_pipeline() 返回。
|
||
|
||
────────── 字段说明 ──────────
|
||
program — 程序名
|
||
timestamp — 时间戳(自动: YYYYMMDD-HHMMSS)
|
||
status — 整体状态: PASS / MISMATCH / BLOCKED / ERROR / FATAL
|
||
exit_code — 0=通过 1=不匹配 2=阻塞 3=错误 4=致命
|
||
duration_s — 总耗时秒
|
||
fields_matched — 一致字段数
|
||
fields_mismatched — 不一致字段数
|
||
coverage_target — 覆盖率目标: "" / "boundary" / "all-paths"
|
||
field_results — 字段对比结果列表
|
||
runner — native / spark
|
||
branch_rate — 分支覆盖率(静态分析)
|
||
paragraph_rate — 段落覆盖率(静态分析)
|
||
decision_rate — 决策点覆盖率
|
||
hina_type — HINA 分类类型
|
||
hina_confidence — HINA 确信度
|
||
quality_score — 质量评分 (0~1)
|
||
quality_warn — 质量警告
|
||
heal_retry — 自愈重试次数
|
||
simple_retry — 朴素重试次数
|
||
total_retry — 总重试次数
|
||
llm_cost — LLM 累计成本 USD
|
||
report_path — 报告输出路径
|
||
debug — 调试信息(不兼容保证)
|
||
"""
|
||
program: str = ""
|
||
timestamp: str = ""
|
||
status: str = "PASS"
|
||
exit_code: int = 0
|
||
duration_s: float = 0.0
|
||
fields_matched: int = 0
|
||
fields_mismatched: int = 0
|
||
coverage_target: str = "boundary"
|
||
field_results: list[FieldResult] = field(default_factory=list)
|
||
runner: str = "native"
|
||
branch_rate: float = 0.0
|
||
paragraph_rate: float = 0.0
|
||
decision_rate: float = 0.0
|
||
hina_type: str = ""
|
||
hina_confidence: float = 0.0
|
||
quality_score: float = 0.0
|
||
quality_warn: str = ""
|
||
heal_retry: int = 0
|
||
simple_retry: int = 0
|
||
total_retry: int = 0
|
||
llm_cost: float = 0.0
|
||
report_path: str = ""
|
||
debug: dict = field(default_factory=dict)
|
||
|
||
def __post_init__(self):
|
||
if not self.timestamp:
|
||
self.timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||
|
||
@property
|
||
def total_fields(self) -> int:
|
||
return self.fields_matched + self.fields_mismatched
|
||
|
||
def verdict(self) -> str:
|
||
return self.status
|
||
|
||
|
||
_fr = FieldResult(field_name="BR-AMT", status="MISMATCH")
|
||
assert _fr.status == "MISMATCH"
|
||
|
||
_vr = VerificationRun(program="BILL-CALC", runner="spark")
|
||
assert _vr.program == "BILL-CALC"
|
||
assert _vr.timestamp != ""
|