v1: executing-plans 模式生成,54 文件 1320 行 Python
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def align_records(
|
||||
cobol_records: list[dict],
|
||||
java_records: list[dict],
|
||||
key_field: str = "CUST-ID"
|
||||
) -> list[tuple]:
|
||||
"""Align records by key field. Returns list of (cobol, java, status)."""
|
||||
if not cobol_records and not java_records:
|
||||
return []
|
||||
|
||||
cobol_by_key = {}
|
||||
for i, r in enumerate(cobol_records):
|
||||
k = r.get(key_field)
|
||||
k = _normalize_key(k)
|
||||
if k not in cobol_by_key:
|
||||
cobol_by_key[k] = []
|
||||
cobol_by_key[k].append(r)
|
||||
|
||||
java_by_key = {}
|
||||
for i, r in enumerate(java_records):
|
||||
k = r.get(key_field)
|
||||
k = _normalize_key(k)
|
||||
if k not in java_by_key:
|
||||
java_by_key[k] = []
|
||||
java_by_key[k].append(r)
|
||||
|
||||
pairs = []
|
||||
all_keys = set(list(cobol_by_key.keys()) + list(java_by_key.keys()))
|
||||
|
||||
for key in all_keys:
|
||||
cobol_items = cobol_by_key.get(key, [])
|
||||
java_items = java_by_key.get(key, [])
|
||||
max_len = max(len(cobol_items), len(java_items))
|
||||
|
||||
for i in range(max_len):
|
||||
c = cobol_items[i] if i < len(cobol_items) else None
|
||||
j = java_items[i] if i < len(java_items) else None
|
||||
|
||||
if c is not None and j is not None:
|
||||
pairs.append((c, j, "MATCHED"))
|
||||
elif c is not None:
|
||||
pairs.append((c, None, "MISSING_IN_SPARK"))
|
||||
else:
|
||||
pairs.append((None, j, "EXTRA_IN_SPARK"))
|
||||
|
||||
return pairs
|
||||
|
||||
|
||||
def _normalize_key(key) -> str:
|
||||
if key is None:
|
||||
return "__NONE__"
|
||||
return str(key)
|
||||
@@ -0,0 +1,56 @@
|
||||
import struct
|
||||
from pathlib import Path
|
||||
from data.field_tree import FieldTree
|
||||
|
||||
|
||||
class CobolBinaryReader:
|
||||
def read(self, binary_path: str, tree: FieldTree) -> list[dict]:
|
||||
data = Path(binary_path).read_bytes()
|
||||
record_size = self._compute_record_size(tree)
|
||||
if record_size == 0 or len(data) == 0:
|
||||
return []
|
||||
records = []
|
||||
for offset in range(0, len(data), record_size):
|
||||
record = data[offset:offset + record_size]
|
||||
if len(record) >= record_size:
|
||||
records.append(self._parse_record(record, tree))
|
||||
return records
|
||||
|
||||
def _compute_record_size(self, tree: FieldTree) -> int:
|
||||
max_end = 0
|
||||
for f in tree.fields:
|
||||
end = f.offset + f.length
|
||||
if end > max_end:
|
||||
max_end = end
|
||||
return max_end
|
||||
|
||||
def _parse_record(self, record: bytes, tree: FieldTree) -> dict:
|
||||
result = {}
|
||||
for name, field in tree.flatten().items():
|
||||
if field.length == 0 or field.offset + field.length > len(record):
|
||||
continue
|
||||
raw = record[field.offset:field.offset + field.length]
|
||||
if field.usage == "COMP-3":
|
||||
result[name] = self._parse_comp3(raw, field.signed, field.decimal)
|
||||
elif field.usage == "COMP" or field.usage == "COMP-5":
|
||||
result[name] = int.from_bytes(raw, "big", signed=field.signed)
|
||||
else:
|
||||
result[name] = raw.decode("ascii", errors="replace").strip()
|
||||
return result
|
||||
|
||||
def _parse_comp3(self, raw: bytes, signed: bool, decimal: int) -> str:
|
||||
if not raw:
|
||||
return "0"
|
||||
nibbles = []
|
||||
for b in raw:
|
||||
nibbles.append((b >> 4) & 0x0F)
|
||||
nibbles.append(b & 0x0F)
|
||||
sign = nibbles.pop()
|
||||
value = 0
|
||||
for n in nibbles:
|
||||
value = value * 10 + n
|
||||
if signed and sign in (0x0D, 0x0B):
|
||||
value = -value
|
||||
divisor = 10 ** decimal
|
||||
result = float(value) / divisor
|
||||
return f"{result:.{decimal}f}" if decimal else str(value)
|
||||
@@ -0,0 +1,105 @@
|
||||
from data.diff_result import FieldResult
|
||||
from decimal import Decimal, InvalidOperation, ROUND_DOWN
|
||||
|
||||
DEFAULT_TOLERANCE = 0.01
|
||||
|
||||
|
||||
def compare_field(
|
||||
name: str,
|
||||
cobol_val: str,
|
||||
java_val: str,
|
||||
field_type: str = "decimal",
|
||||
tolerance: float = DEFAULT_TOLERANCE
|
||||
) -> FieldResult:
|
||||
result = FieldResult(field_name=name,
|
||||
cobol_value=cobol_val,
|
||||
java_value=java_val)
|
||||
|
||||
if field_type in ("decimal", "numeric"):
|
||||
return _compare_numeric(result, cobol_val, java_val, tolerance)
|
||||
|
||||
if field_type == "date":
|
||||
return _compare_date(result, cobol_val, java_val)
|
||||
|
||||
if field_type in ("string", "alpha"):
|
||||
return _compare_string(result, cobol_val, java_val)
|
||||
|
||||
return _compare_generic(result, cobol_val, java_val)
|
||||
|
||||
|
||||
def _compare_numeric(fr: FieldResult, c: str, j: str, tol: float) -> FieldResult:
|
||||
c_val = _parse_number(c)
|
||||
j_val = _parse_number(j)
|
||||
|
||||
if c_val is None and j_val is None:
|
||||
fr.status = "PASS"
|
||||
return fr
|
||||
|
||||
if c_val is None:
|
||||
fr.status = "NOT_SET"
|
||||
fr.suggestion = "cobol_parse_error"
|
||||
return fr
|
||||
|
||||
if j_val is None:
|
||||
fr.status = "MISMATCH"
|
||||
fr.suggestion = "java_missing_init: null/None where COBOL has value"
|
||||
return fr
|
||||
|
||||
if c_val == j_val:
|
||||
fr.status = "PASS"
|
||||
return fr
|
||||
|
||||
diff = abs(c_val - j_val)
|
||||
if isinstance(c_val, Decimal):
|
||||
diff = abs(float(c_val - j_val))
|
||||
|
||||
if diff <= tol:
|
||||
fr.status = "TOLERATED"
|
||||
fr.tolerance_applied = tol
|
||||
else:
|
||||
fr.status = "MISMATCH"
|
||||
|
||||
return fr
|
||||
|
||||
|
||||
def _compare_date(fr: FieldResult, c: str, j: str) -> FieldResult:
|
||||
c_norm = _normalize_date(c)
|
||||
j_norm = _normalize_date(j)
|
||||
fr.status = "PASS" if c_norm == j_norm else "MISMATCH"
|
||||
return fr
|
||||
|
||||
|
||||
def _compare_string(fr: FieldResult, c: str, j: str) -> FieldResult:
|
||||
c_clean = c.strip() if c else ""
|
||||
j_clean = j.strip() if j else ""
|
||||
fr.status = "PASS" if c_clean == j_clean else "MISMATCH"
|
||||
return fr
|
||||
|
||||
|
||||
def _compare_generic(fr: FieldResult, c: str, j: str) -> FieldResult:
|
||||
fr.status = "PASS" if c == j else "MISMATCH"
|
||||
return fr
|
||||
|
||||
|
||||
def _parse_number(val: str):
|
||||
if val is None or val == "None":
|
||||
return None
|
||||
s = str(val).strip()
|
||||
if s in ("", "\x00", "\x00\x00\x00\x00\x00"):
|
||||
return Decimal("0")
|
||||
s = s.replace("\x00", "")
|
||||
try:
|
||||
return Decimal(s)
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_date(val: str, default: str = "1970-01-01") -> str:
|
||||
if not val:
|
||||
return default
|
||||
s = val.strip()
|
||||
if len(s) == 8 and s.isdigit():
|
||||
return f"{s[0:4]}-{s[4:6]}-{s[6:8]}"
|
||||
if len(s) == 10 and s[4] == '-':
|
||||
return s
|
||||
return s
|
||||
@@ -0,0 +1,89 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
EBCDIC_TO_ASCII = {
|
||||
0xC1: 'A', 0xC2: 'B', 0xC3: 'C', 0xC4: 'D', 0xC5: 'E',
|
||||
0xC6: 'F', 0xC7: 'G', 0xC8: 'H', 0xC9: 'I', 0xD1: 'J',
|
||||
0xD2: 'K', 0xD3: 'L', 0xD4: 'M', 0xD5: 'N', 0xD6: 'O',
|
||||
0xD7: 'P', 0xD8: 'Q', 0xD9: 'R', 0xE2: 'S', 0xE3: 'T',
|
||||
0xE4: 'U', 0xE5: 'V', 0xE6: 'W', 0xE7: 'X', 0xE8: 'Y',
|
||||
0xE9: 'Z', 0xF0: '0', 0xF1: '1', 0xF2: '2', 0xF3: '3',
|
||||
0xF4: '4', 0xF5: '5', 0xF6: '6', 0xF7: '7', 0xF8: '8',
|
||||
0xF9: '9', 0x40: ' ', 0x4B: '.', 0x6B: ',', 0x5A: '!',
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CobolIRField:
|
||||
raw_hex: str
|
||||
decoded_value: str
|
||||
encoding: str
|
||||
field_type: str
|
||||
length: int
|
||||
scale: int
|
||||
signed: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class JavaIRField:
|
||||
raw_value: str
|
||||
decoded_value: str
|
||||
field_type: str
|
||||
nullable: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class IRRecord:
|
||||
field_name: str
|
||||
cobol: CobolIRField | None = None
|
||||
java: JavaIRField | None = None
|
||||
|
||||
|
||||
class Normalizer:
|
||||
def normalize_encoding(self, raw: bytes, encoding: str) -> str:
|
||||
if encoding == "EBCDIC":
|
||||
return self._ebcdic_to_ascii(raw)
|
||||
return raw.decode("ascii", errors="replace")
|
||||
|
||||
def normalize_comp3(self, raw: bytes) -> str:
|
||||
if not raw:
|
||||
return "0"
|
||||
nibbles = []
|
||||
for b in raw:
|
||||
nibbles.append((b >> 4) & 0x0F)
|
||||
nibbles.append(b & 0x0F)
|
||||
sign = nibbles.pop()
|
||||
value = 0
|
||||
for n in nibbles:
|
||||
value = value * 10 + n
|
||||
if sign in (0x0D, 0x0B):
|
||||
value = -value
|
||||
return str(value)
|
||||
|
||||
def normalize_date(self, date_str: str) -> str:
|
||||
s = date_str.strip()
|
||||
if len(s) == 8 and s.isdigit():
|
||||
return f"{s[0:4]}-{s[4:6]}-{s[6:8]}"
|
||||
return s
|
||||
|
||||
def to_ir_record(self, field_name, raw_hex, decoded_value,
|
||||
encoding, field_type, length=0, scale=0, signed=False) -> IRRecord:
|
||||
return IRRecord(
|
||||
field_name=field_name,
|
||||
cobol=CobolIRField(
|
||||
raw_hex=raw_hex, decoded_value=decoded_value,
|
||||
encoding=encoding, field_type=field_type,
|
||||
length=length, scale=scale, signed=signed))
|
||||
|
||||
def to_null_ir(self, field_name, side="java") -> IRRecord:
|
||||
if side == "java":
|
||||
return IRRecord(field_name=field_name,
|
||||
cobol=None, java=JavaIRField(raw_value="", decoded_value="", field_type="null", nullable=True))
|
||||
return IRRecord(field_name=field_name,
|
||||
cobol=None, java=JavaIRField(raw_value="", decoded_value="", field_type="null", nullable=True))
|
||||
|
||||
def _ebcdic_to_ascii(self, raw: bytes) -> str:
|
||||
result = []
|
||||
for b in raw:
|
||||
result.append(EBCDIC_TO_ASCII.get(b, chr(b) if 32 <= b < 127 else '?'))
|
||||
return ''.join(result)
|
||||
@@ -0,0 +1,46 @@
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoundingResult:
|
||||
mode: str = "EXACT"
|
||||
confidence: float = 1.0
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
def detect_rounding(cobol_value: str, java_value: str) -> RoundingResult:
|
||||
c = _to_decimal(cobol_value)
|
||||
j = _to_decimal(java_value)
|
||||
|
||||
if c is None or j is None:
|
||||
return RoundingResult(mode="UNKNOWN", confidence=0.0, suggestion="cannot parse values")
|
||||
|
||||
if c == j:
|
||||
return RoundingResult(mode="EXACT", confidence=1.0, suggestion="values are identical")
|
||||
|
||||
diff = abs(float(c - j))
|
||||
magnitude = max(abs(float(c)), abs(float(j)), 1.0)
|
||||
relative_diff = diff / magnitude
|
||||
|
||||
if diff < 2.0:
|
||||
mode = "TRUNCATE"
|
||||
confidence = 0.6
|
||||
suggestion = f"Likely TRUNCATE rounding: COBOL truncates, Java rounds or retains precision. Diff: {diff}"
|
||||
elif diff < 100.0:
|
||||
mode = "ROUNDING"
|
||||
confidence = 0.4
|
||||
suggestion = f"Possible rounding difference. Diff: {diff}"
|
||||
else:
|
||||
mode = "SIGNIFICANT"
|
||||
confidence = 0.9
|
||||
suggestion = f"Values differ significantly (diff={diff}) — not a rounding issue"
|
||||
|
||||
return RoundingResult(mode=mode, confidence=confidence, suggestion=suggestion)
|
||||
|
||||
|
||||
def _to_decimal(val: str):
|
||||
try:
|
||||
return Decimal(str(val).strip())
|
||||
except (InvalidOperation, ValueError):
|
||||
return None
|
||||
Reference in New Issue
Block a user