v1: executing-plans 模式生成，54 文件 1320 行 Python

2026-05-24 10:02:52 +08:00
commit 06b295f780
55 changed files with 1749 additions and 0 deletions
@@ -0,0 +1,35 @@
+import struct, json
+from pathlib import Path
+from data.test_case import TestCase, SparkConfig
+
+
+class DataWriter:
+    def write_cobol_binary(self, test_cases: list[TestCase], output: Path):
+        with open(output, "wb") as f:
+            for tc in test_cases:
+                for name, value in tc.fields.items():
+                    if isinstance(value, int):
+                        f.write(struct.pack(">q", value))
+                    elif isinstance(value, float):
+                        f.write(struct.pack(">d", value))
+                    elif isinstance(value, str):
+                        encoded = value.encode("ascii", errors="replace")
+                        f.write(encoded.ljust(10, b" ")[:10])
+
+    def write_spark_json(self, test_cases: list[TestCase], spark_config: SparkConfig,
+                         output_dir: Path):
+        output_dir.mkdir(parents=True, exist_ok=True)
+        base = test_cases[0].fields if test_cases else {}
+        records = []
+        for i in range(spark_config.num_records):
+            record = dict(base)
+            if spark_config.key_field and spark_config.key_field in record:
+                record[spark_config.key_field] = f"{record[spark_config.key_field]}-{i:04d}"
+            records.append(record)
+        (output_dir / "part-00000.json").write_text("\n".join(json.dumps(r) for r in records))
+
+    def write_native_json(self, test_cases: list[TestCase], output: Path):
+        output.parent.mkdir(parents=True, exist_ok=True)
+        with open(output, "w") as f:
+            for tc in test_cases:
+                f.write(json.dumps(tc.fields) + "\n")