Files
cobol-java-v3/data/diff_result.py
T
hangshuo652 bc1d56d1a4 feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00

112 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""管道运行结果模型 — 对比结果 + 全管道运行记录
使用例:
fr = FieldResult(field_name="TX-AMOUNT", status="MISMATCH",
cobol_value="1500000", java_value="1499999.99")
vr = VerificationRun(program="BILL-CALC", runner="native")
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
@dataclass
class FieldResult:
"""单个字段的 COBOL ↔ Java 对比结果。
────────── 字段说明 ──────────
field_name — 字段名
status — 对比状态:
PASS = 完全一致
TOLERATED = 在容忍度范围内
MISMATCH = 不一致
NOT_SET = 缺失侧
cobol_value — COBOL 侧原始值(字符串)
java_value — Java 侧原始值(字符串)
tolerance_applied — 本次使用的实际容忍度
rounding_detected — 检测到的舍入类型
suggestion — LLM 自动诊断建议文本
"""
field_name: str = ""
status: str = "PASS"
cobol_value: str = ""
java_value: str = ""
tolerance_applied: float = 0.0
rounding_detected: str = ""
suggestion: str = ""
@dataclass
class VerificationRun:
"""单次管道运行的完整记录 — 由 orchestrator.run_pipeline() 返回。
────────── 字段说明 ──────────
program — 程序名
timestamp — 时间戳(自动: YYYYMMDD-HHMMSS
status — 整体状态: PASS / MISMATCH / BLOCKED / ERROR / FATAL
exit_code — 0=通过 1=不匹配 2=阻塞 3=错误 4=致命
duration_s — 总耗时秒
fields_matched — 一致字段数
fields_mismatched — 不一致字段数
coverage_target — 覆盖率目标: "" / "boundary" / "all-paths"
field_results — 字段对比结果列表
runner — native / spark
branch_rate — 分支覆盖率(静态分析)
paragraph_rate — 段落覆盖率(静态分析)
decision_rate — 决策点覆盖率
hina_type — HINA 分类类型
hina_confidence — HINA 确信度
quality_score — 质量评分 (0~1)
quality_warn — 质量警告
heal_retry — 自愈重试次数
simple_retry — 朴素重试次数
total_retry — 总重试次数
llm_cost — LLM 累计成本 USD
report_path — 报告输出路径
debug — 调试信息(不兼容保证)
"""
program: str = ""
timestamp: str = ""
status: str = "PASS"
exit_code: int = 0
duration_s: float = 0.0
fields_matched: int = 0
fields_mismatched: int = 0
coverage_target: str = "boundary"
field_results: list[FieldResult] = field(default_factory=list)
runner: str = "native"
branch_rate: float = 0.0
paragraph_rate: float = 0.0
decision_rate: float = 0.0
hina_type: str = ""
hina_confidence: float = 0.0
quality_score: float = 0.0
quality_warn: str = ""
heal_retry: int = 0
simple_retry: int = 0
total_retry: int = 0
llm_cost: float = 0.0
report_path: str = ""
debug: dict = field(default_factory=dict)
def __post_init__(self):
if not self.timestamp:
self.timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
@property
def total_fields(self) -> int:
return self.fields_matched + self.fields_mismatched
def verdict(self) -> str:
return self.status
_fr = FieldResult(field_name="BR-AMT", status="MISMATCH")
assert _fr.status == "MISMATCH"
_vr = VerificationRun(program="BILL-CALC", runner="spark")
assert _vr.program == "BILL-CALC"
assert _vr.timestamp != ""