feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,11 @@
|
||||
"""字段树模型 — COPYBOOK 解析后的字段结构
|
||||
|
||||
使用例:
|
||||
field = Field(name="TX-AMOUNT", level=5, pic="S9(7)V99", usage="COMP-3")
|
||||
tree = FieldTree(fields=[field], copybook_name="TXCPY")
|
||||
flat = tree.flatten() # → {"TX-AMOUNT": field}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
@@ -5,6 +13,25 @@ from typing import Optional
|
||||
|
||||
@dataclass
|
||||
class Field:
|
||||
"""单个字段定义(对应 COBOL DATA DIVISION 中的一行 01/05/10/77/88 级条目)。
|
||||
|
||||
────────── 字段说明 ──────────
|
||||
name — 字段名(大写,如 WS-AMOUNT)
|
||||
level — 层级号(01~49 / 77 / 88)
|
||||
pic — PIC 字符串(如 "S9(7)V99", "X(10)", "9(4)")
|
||||
usage — 存储类型: DISPLAY / COMP / COMP-3 / COMP-5 / BINARY / PACKED-DECIMAL
|
||||
offset — 在记录中的偏移量(字节)
|
||||
length — 字段长度(字节)
|
||||
decimal — 小数位数(从 PIC 解析)
|
||||
signed — 是否带符号(PIC 以 S 开头)
|
||||
sign_separate — 符号是否独立存储(SIGN IS LEADING/TRAILING SEPARATE)
|
||||
occurs — OCCURS 出现次数(None 表示非表列)
|
||||
occurs_max— OCCURS DEPENDING ON 的最大值
|
||||
redefines — 重定义的父字段名(如 "WS-BLOCK" 表示 REDEFINES WS-BLOCK)
|
||||
redefines_variant — REDEFINES 变体标识
|
||||
conditions— 88-level 条件列表: [{"name": "WS-APPROVED", "value": "'A'"}, ...]
|
||||
children — 子字段列表(层级嵌套时使用)
|
||||
"""
|
||||
name: str
|
||||
level: int
|
||||
pic: str
|
||||
@@ -24,11 +51,22 @@ class Field:
|
||||
|
||||
@dataclass
|
||||
class FieldTree:
|
||||
"""COPYBOOK 解析结果 —— 包含所有顶层字段(递归展开子字段)。
|
||||
|
||||
────────── 字段说明 ──────────
|
||||
fields — 顶层字段列表(01 级,不含子字段嵌入)
|
||||
copybook_name — 源 COPYBOOK 文件名
|
||||
sha256 — 源码的 SHA256 哈希
|
||||
"""
|
||||
fields: list[Field] = field(default_factory=list)
|
||||
copybook_name: str = ""
|
||||
sha256: str = ""
|
||||
|
||||
def flatten(self) -> dict[str, Field]:
|
||||
"""展平为 {字段名 → Field} 字典(递归展开 children)。
|
||||
|
||||
注意: 同名子字段会覆盖父字段,使用 get_by_name 可自动处理。
|
||||
"""
|
||||
result = {}
|
||||
def _walk(ff):
|
||||
for f in ff:
|
||||
@@ -38,6 +76,7 @@ class FieldTree:
|
||||
return result
|
||||
|
||||
def get_by_name(self, name: str) -> Optional[Field]:
|
||||
"""按字段名查找(递归搜索所有层级)。"""
|
||||
return self.flatten().get(name)
|
||||
|
||||
@classmethod
|
||||
@@ -45,6 +84,7 @@ class FieldTree:
|
||||
return cls(fields=fields, copybook_name=name)
|
||||
|
||||
|
||||
# ── 模块级断言(确保 dataclass 结构正确) ──
|
||||
_f = Field(name="BR-AMT", level=5, pic="S9(7)V99", usage="COMP-3", offset=0, length=5, decimal=2, signed=True)
|
||||
assert _f.name == "BR-AMT"
|
||||
assert _f.decimal == 2
|
||||
|
||||
Reference in New Issue
Block a user