36 lines
1.6 KiB
Python
36 lines
1.6 KiB
Python
import struct, json
|
|
from pathlib import Path
|
|
from data.test_case import TestCase, SparkConfig
|
|
|
|
|
|
class DataWriter:
|
|
def write_cobol_binary(self, test_cases: list[TestCase], output: Path):
|
|
with open(output, "wb") as f:
|
|
for tc in test_cases:
|
|
for name, value in tc.fields.items():
|
|
if isinstance(value, int):
|
|
f.write(struct.pack(">q", value))
|
|
elif isinstance(value, float):
|
|
f.write(struct.pack(">d", value))
|
|
elif isinstance(value, str):
|
|
encoded = value.encode("ascii", errors="replace")
|
|
f.write(encoded.ljust(10, b" ")[:10])
|
|
|
|
def write_spark_json(self, test_cases: list[TestCase], spark_config: SparkConfig,
|
|
output_dir: Path):
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
base = test_cases[0].fields if test_cases else {}
|
|
records = []
|
|
for i in range(spark_config.num_records):
|
|
record = dict(base)
|
|
if spark_config.key_field and spark_config.key_field in record:
|
|
record[spark_config.key_field] = f"{record[spark_config.key_field]}-{i:04d}"
|
|
records.append(record)
|
|
(output_dir / "part-00000.json").write_text("\n".join(json.dumps(r) for r in records))
|
|
|
|
def write_native_json(self, test_cases: list[TestCase], output: Path):
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output, "w") as f:
|
|
for tc in test_cases:
|
|
f.write(json.dumps(tc.fields) + "\n")
|