bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
96 lines
3.5 KiB
Python
96 lines
3.5 KiB
Python
"""字段树模型 — COPYBOOK 解析后的字段结构
|
||
|
||
使用例:
|
||
field = Field(name="TX-AMOUNT", level=5, pic="S9(7)V99", usage="COMP-3")
|
||
tree = FieldTree(fields=[field], copybook_name="TXCPY")
|
||
flat = tree.flatten() # → {"TX-AMOUNT": field}
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
from dataclasses import dataclass, field
|
||
from typing import Optional
|
||
|
||
|
||
@dataclass
|
||
class Field:
|
||
"""单个字段定义(对应 COBOL DATA DIVISION 中的一行 01/05/10/77/88 级条目)。
|
||
|
||
────────── 字段说明 ──────────
|
||
name — 字段名(大写,如 WS-AMOUNT)
|
||
level — 层级号(01~49 / 77 / 88)
|
||
pic — PIC 字符串(如 "S9(7)V99", "X(10)", "9(4)")
|
||
usage — 存储类型: DISPLAY / COMP / COMP-3 / COMP-5 / BINARY / PACKED-DECIMAL
|
||
offset — 在记录中的偏移量(字节)
|
||
length — 字段长度(字节)
|
||
decimal — 小数位数(从 PIC 解析)
|
||
signed — 是否带符号(PIC 以 S 开头)
|
||
sign_separate — 符号是否独立存储(SIGN IS LEADING/TRAILING SEPARATE)
|
||
occurs — OCCURS 出现次数(None 表示非表列)
|
||
occurs_max— OCCURS DEPENDING ON 的最大值
|
||
redefines — 重定义的父字段名(如 "WS-BLOCK" 表示 REDEFINES WS-BLOCK)
|
||
redefines_variant — REDEFINES 变体标识
|
||
conditions— 88-level 条件列表: [{"name": "WS-APPROVED", "value": "'A'"}, ...]
|
||
children — 子字段列表(层级嵌套时使用)
|
||
"""
|
||
name: str
|
||
level: int
|
||
pic: str
|
||
usage: str = "DISPLAY"
|
||
offset: int = 0
|
||
length: int = 0
|
||
decimal: int = 0
|
||
signed: bool = False
|
||
sign_separate: bool = False
|
||
occurs: Optional[int] = None
|
||
occurs_max: Optional[int] = None
|
||
redefines: Optional[str] = None
|
||
redefines_variant: Optional[str] = None
|
||
conditions: list[dict] = field(default_factory=list)
|
||
children: list["Field"] = field(default_factory=list)
|
||
|
||
|
||
@dataclass
|
||
class FieldTree:
|
||
"""COPYBOOK 解析结果 —— 包含所有顶层字段(递归展开子字段)。
|
||
|
||
────────── 字段说明 ──────────
|
||
fields — 顶层字段列表(01 级,不含子字段嵌入)
|
||
copybook_name — 源 COPYBOOK 文件名
|
||
sha256 — 源码的 SHA256 哈希
|
||
"""
|
||
fields: list[Field] = field(default_factory=list)
|
||
copybook_name: str = ""
|
||
sha256: str = ""
|
||
|
||
def flatten(self) -> dict[str, Field]:
|
||
"""展平为 {字段名 → Field} 字典(递归展开 children)。
|
||
|
||
注意: 同名子字段会覆盖父字段,使用 get_by_name 可自动处理。
|
||
"""
|
||
result = {}
|
||
def _walk(ff):
|
||
for f in ff:
|
||
result[f.name] = f
|
||
_walk(f.children)
|
||
_walk(self.fields)
|
||
return result
|
||
|
||
def get_by_name(self, name: str) -> Optional[Field]:
|
||
"""按字段名查找(递归搜索所有层级)。"""
|
||
return self.flatten().get(name)
|
||
|
||
@classmethod
|
||
def from_list(cls, fields: list[Field], name: str = "") -> "FieldTree":
|
||
return cls(fields=fields, copybook_name=name)
|
||
|
||
|
||
# ── 模块级断言(确保 dataclass 结构正确) ──
|
||
_f = Field(name="BR-AMT", level=5, pic="S9(7)V99", usage="COMP-3", offset=0, length=5, decimal=2, signed=True)
|
||
assert _f.name == "BR-AMT"
|
||
assert _f.decimal == 2
|
||
assert _f.signed
|
||
|
||
_ft = FieldTree(fields=[_f], copybook_name="BILLCPY")
|
||
assert "BR-AMT" in _ft.flatten()
|
||
assert _ft.get_by_name("BR-AMT") is _f
|