v1: executing-plans 模式生成,54 文件 1320 行 Python

This commit is contained in:
hangshuo652
2026-05-24 10:02:52 +08:00
commit 06b295f780
55 changed files with 1749 additions and 0 deletions
View File
+54
View File
@@ -0,0 +1,54 @@
from typing import Optional
def align_records(
cobol_records: list[dict],
java_records: list[dict],
key_field: str = "CUST-ID"
) -> list[tuple]:
"""Align records by key field. Returns list of (cobol, java, status)."""
if not cobol_records and not java_records:
return []
cobol_by_key = {}
for i, r in enumerate(cobol_records):
k = r.get(key_field)
k = _normalize_key(k)
if k not in cobol_by_key:
cobol_by_key[k] = []
cobol_by_key[k].append(r)
java_by_key = {}
for i, r in enumerate(java_records):
k = r.get(key_field)
k = _normalize_key(k)
if k not in java_by_key:
java_by_key[k] = []
java_by_key[k].append(r)
pairs = []
all_keys = set(list(cobol_by_key.keys()) + list(java_by_key.keys()))
for key in all_keys:
cobol_items = cobol_by_key.get(key, [])
java_items = java_by_key.get(key, [])
max_len = max(len(cobol_items), len(java_items))
for i in range(max_len):
c = cobol_items[i] if i < len(cobol_items) else None
j = java_items[i] if i < len(java_items) else None
if c is not None and j is not None:
pairs.append((c, j, "MATCHED"))
elif c is not None:
pairs.append((c, None, "MISSING_IN_SPARK"))
else:
pairs.append((None, j, "EXTRA_IN_SPARK"))
return pairs
def _normalize_key(key) -> str:
if key is None:
return "__NONE__"
return str(key)
+56
View File
@@ -0,0 +1,56 @@
import struct
from pathlib import Path
from data.field_tree import FieldTree
class CobolBinaryReader:
def read(self, binary_path: str, tree: FieldTree) -> list[dict]:
data = Path(binary_path).read_bytes()
record_size = self._compute_record_size(tree)
if record_size == 0 or len(data) == 0:
return []
records = []
for offset in range(0, len(data), record_size):
record = data[offset:offset + record_size]
if len(record) >= record_size:
records.append(self._parse_record(record, tree))
return records
def _compute_record_size(self, tree: FieldTree) -> int:
max_end = 0
for f in tree.fields:
end = f.offset + f.length
if end > max_end:
max_end = end
return max_end
def _parse_record(self, record: bytes, tree: FieldTree) -> dict:
result = {}
for name, field in tree.flatten().items():
if field.length == 0 or field.offset + field.length > len(record):
continue
raw = record[field.offset:field.offset + field.length]
if field.usage == "COMP-3":
result[name] = self._parse_comp3(raw, field.signed, field.decimal)
elif field.usage == "COMP" or field.usage == "COMP-5":
result[name] = int.from_bytes(raw, "big", signed=field.signed)
else:
result[name] = raw.decode("ascii", errors="replace").strip()
return result
def _parse_comp3(self, raw: bytes, signed: bool, decimal: int) -> str:
if not raw:
return "0"
nibbles = []
for b in raw:
nibbles.append((b >> 4) & 0x0F)
nibbles.append(b & 0x0F)
sign = nibbles.pop()
value = 0
for n in nibbles:
value = value * 10 + n
if signed and sign in (0x0D, 0x0B):
value = -value
divisor = 10 ** decimal
result = float(value) / divisor
return f"{result:.{decimal}f}" if decimal else str(value)
+105
View File
@@ -0,0 +1,105 @@
from data.diff_result import FieldResult
from decimal import Decimal, InvalidOperation, ROUND_DOWN
DEFAULT_TOLERANCE = 0.01
def compare_field(
name: str,
cobol_val: str,
java_val: str,
field_type: str = "decimal",
tolerance: float = DEFAULT_TOLERANCE
) -> FieldResult:
result = FieldResult(field_name=name,
cobol_value=cobol_val,
java_value=java_val)
if field_type in ("decimal", "numeric"):
return _compare_numeric(result, cobol_val, java_val, tolerance)
if field_type == "date":
return _compare_date(result, cobol_val, java_val)
if field_type in ("string", "alpha"):
return _compare_string(result, cobol_val, java_val)
return _compare_generic(result, cobol_val, java_val)
def _compare_numeric(fr: FieldResult, c: str, j: str, tol: float) -> FieldResult:
c_val = _parse_number(c)
j_val = _parse_number(j)
if c_val is None and j_val is None:
fr.status = "PASS"
return fr
if c_val is None:
fr.status = "NOT_SET"
fr.suggestion = "cobol_parse_error"
return fr
if j_val is None:
fr.status = "MISMATCH"
fr.suggestion = "java_missing_init: null/None where COBOL has value"
return fr
if c_val == j_val:
fr.status = "PASS"
return fr
diff = abs(c_val - j_val)
if isinstance(c_val, Decimal):
diff = abs(float(c_val - j_val))
if diff <= tol:
fr.status = "TOLERATED"
fr.tolerance_applied = tol
else:
fr.status = "MISMATCH"
return fr
def _compare_date(fr: FieldResult, c: str, j: str) -> FieldResult:
c_norm = _normalize_date(c)
j_norm = _normalize_date(j)
fr.status = "PASS" if c_norm == j_norm else "MISMATCH"
return fr
def _compare_string(fr: FieldResult, c: str, j: str) -> FieldResult:
c_clean = c.strip() if c else ""
j_clean = j.strip() if j else ""
fr.status = "PASS" if c_clean == j_clean else "MISMATCH"
return fr
def _compare_generic(fr: FieldResult, c: str, j: str) -> FieldResult:
fr.status = "PASS" if c == j else "MISMATCH"
return fr
def _parse_number(val: str):
if val is None or val == "None":
return None
s = str(val).strip()
if s in ("", "\x00", "\x00\x00\x00\x00\x00"):
return Decimal("0")
s = s.replace("\x00", "")
try:
return Decimal(s)
except InvalidOperation:
return None
def _normalize_date(val: str, default: str = "1970-01-01") -> str:
if not val:
return default
s = val.strip()
if len(s) == 8 and s.isdigit():
return f"{s[0:4]}-{s[4:6]}-{s[6:8]}"
if len(s) == 10 and s[4] == '-':
return s
return s
+89
View File
@@ -0,0 +1,89 @@
from dataclasses import dataclass
EBCDIC_TO_ASCII = {
0xC1: 'A', 0xC2: 'B', 0xC3: 'C', 0xC4: 'D', 0xC5: 'E',
0xC6: 'F', 0xC7: 'G', 0xC8: 'H', 0xC9: 'I', 0xD1: 'J',
0xD2: 'K', 0xD3: 'L', 0xD4: 'M', 0xD5: 'N', 0xD6: 'O',
0xD7: 'P', 0xD8: 'Q', 0xD9: 'R', 0xE2: 'S', 0xE3: 'T',
0xE4: 'U', 0xE5: 'V', 0xE6: 'W', 0xE7: 'X', 0xE8: 'Y',
0xE9: 'Z', 0xF0: '0', 0xF1: '1', 0xF2: '2', 0xF3: '3',
0xF4: '4', 0xF5: '5', 0xF6: '6', 0xF7: '7', 0xF8: '8',
0xF9: '9', 0x40: ' ', 0x4B: '.', 0x6B: ',', 0x5A: '!',
}
@dataclass
class CobolIRField:
raw_hex: str
decoded_value: str
encoding: str
field_type: str
length: int
scale: int
signed: bool
@dataclass
class JavaIRField:
raw_value: str
decoded_value: str
field_type: str
nullable: bool
@dataclass
class IRRecord:
field_name: str
cobol: CobolIRField | None = None
java: JavaIRField | None = None
class Normalizer:
def normalize_encoding(self, raw: bytes, encoding: str) -> str:
if encoding == "EBCDIC":
return self._ebcdic_to_ascii(raw)
return raw.decode("ascii", errors="replace")
def normalize_comp3(self, raw: bytes) -> str:
if not raw:
return "0"
nibbles = []
for b in raw:
nibbles.append((b >> 4) & 0x0F)
nibbles.append(b & 0x0F)
sign = nibbles.pop()
value = 0
for n in nibbles:
value = value * 10 + n
if sign in (0x0D, 0x0B):
value = -value
return str(value)
def normalize_date(self, date_str: str) -> str:
s = date_str.strip()
if len(s) == 8 and s.isdigit():
return f"{s[0:4]}-{s[4:6]}-{s[6:8]}"
return s
def to_ir_record(self, field_name, raw_hex, decoded_value,
encoding, field_type, length=0, scale=0, signed=False) -> IRRecord:
return IRRecord(
field_name=field_name,
cobol=CobolIRField(
raw_hex=raw_hex, decoded_value=decoded_value,
encoding=encoding, field_type=field_type,
length=length, scale=scale, signed=signed))
def to_null_ir(self, field_name, side="java") -> IRRecord:
if side == "java":
return IRRecord(field_name=field_name,
cobol=None, java=JavaIRField(raw_value="", decoded_value="", field_type="null", nullable=True))
return IRRecord(field_name=field_name,
cobol=None, java=JavaIRField(raw_value="", decoded_value="", field_type="null", nullable=True))
def _ebcdic_to_ascii(self, raw: bytes) -> str:
result = []
for b in raw:
result.append(EBCDIC_TO_ASCII.get(b, chr(b) if 32 <= b < 127 else '?'))
return ''.join(result)
+46
View File
@@ -0,0 +1,46 @@
from dataclasses import dataclass
from decimal import Decimal, InvalidOperation
@dataclass
class RoundingResult:
mode: str = "EXACT"
confidence: float = 1.0
suggestion: str = ""
def detect_rounding(cobol_value: str, java_value: str) -> RoundingResult:
c = _to_decimal(cobol_value)
j = _to_decimal(java_value)
if c is None or j is None:
return RoundingResult(mode="UNKNOWN", confidence=0.0, suggestion="cannot parse values")
if c == j:
return RoundingResult(mode="EXACT", confidence=1.0, suggestion="values are identical")
diff = abs(float(c - j))
magnitude = max(abs(float(c)), abs(float(j)), 1.0)
relative_diff = diff / magnitude
if diff < 2.0:
mode = "TRUNCATE"
confidence = 0.6
suggestion = f"Likely TRUNCATE rounding: COBOL truncates, Java rounds or retains precision. Diff: {diff}"
elif diff < 100.0:
mode = "ROUNDING"
confidence = 0.4
suggestion = f"Possible rounding difference. Diff: {diff}"
else:
mode = "SIGNIFICANT"
confidence = 0.9
suggestion = f"Values differ significantly (diff={diff}) — not a rounding issue"
return RoundingResult(mode=mode, confidence=confidence, suggestion=suggestion)
def _to_decimal(val: str):
try:
return Decimal(str(val).strip())
except (InvalidOperation, ValueError):
return None