55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
from typing import Optional
|
|
|
|
|
|
def align_records(
|
|
cobol_records: list[dict],
|
|
java_records: list[dict],
|
|
key_field: str = "CUST-ID"
|
|
) -> list[tuple]:
|
|
"""Align records by key field. Returns list of (cobol, java, status)."""
|
|
if not cobol_records and not java_records:
|
|
return []
|
|
|
|
cobol_by_key = {}
|
|
for i, r in enumerate(cobol_records):
|
|
k = r.get(key_field)
|
|
k = _normalize_key(k)
|
|
if k not in cobol_by_key:
|
|
cobol_by_key[k] = []
|
|
cobol_by_key[k].append(r)
|
|
|
|
java_by_key = {}
|
|
for i, r in enumerate(java_records):
|
|
k = r.get(key_field)
|
|
k = _normalize_key(k)
|
|
if k not in java_by_key:
|
|
java_by_key[k] = []
|
|
java_by_key[k].append(r)
|
|
|
|
pairs = []
|
|
all_keys = set(list(cobol_by_key.keys()) + list(java_by_key.keys()))
|
|
|
|
for key in all_keys:
|
|
cobol_items = cobol_by_key.get(key, [])
|
|
java_items = java_by_key.get(key, [])
|
|
max_len = max(len(cobol_items), len(java_items))
|
|
|
|
for i in range(max_len):
|
|
c = cobol_items[i] if i < len(cobol_items) else None
|
|
j = java_items[i] if i < len(java_items) else None
|
|
|
|
if c is not None and j is not None:
|
|
pairs.append((c, j, "MATCHED"))
|
|
elif c is not None:
|
|
pairs.append((c, None, "MISSING_IN_SPARK"))
|
|
else:
|
|
pairs.append((None, j, "EXTRA_IN_SPARK"))
|
|
|
|
return pairs
|
|
|
|
|
|
def _normalize_key(key) -> str:
|
|
if key is None:
|
|
return "__NONE__"
|
|
return str(key)
|