Files
cobol-java-v3/orchestrator.py
T

136 lines
5.4 KiB
Python

import shutil, time
from pathlib import Path
from data.field_tree import FieldTree
from data.test_case import TestSuite, SparkConfig
from data.diff_result import VerificationRun, FieldResult
from runners.runner import Runner
from runners.native_java_runner import NativeJavaRunner
from runners.spark_java_runner import SparkJavaRunner
from runners.cobol_runner import CobolRunner
from runners.data_writer import DataWriter
from agents.agent1_parser import Agent1Parser
from agents.agent2_data import Agent2Data
from agents.agent3_diagnostic import Agent3Diagnostic
from agents.llm import LLMClient
from comparator.aligner import align_records
from comparator.field_compare import compare_field
from comparator.cobol_binary_reader import CobolBinaryReader
from report.generator import ReportGenerator
from storage.bundle import TestDataBundle
from config import Config
def run_pipeline(cfg: Config, cpath: str, cbl: str, java: str, map_path: str) -> VerificationRun:
t0 = time.time()
vr = VerificationRun(program=Path(java).stem, runner=cfg.runner_mode)
try:
text = Path(cpath).read_text()
if not text.strip():
return _done(vr, t0, "BLOCKED", 2)
llm = LLMClient(model=cfg.llm_model, timeout=cfg.llm_timeout, cache_dir=cfg.llm_cache_dir)
tree = Agent1Parser(llm).parse(text)
vr.llm_cost += 0.002
vr.debug["field_tree"] = [{"name":f.name,"level":f.level,"pic":f.pic,"usage":f.usage,
"offset":f.offset,"length":f.length,"redefines":f.redefines}
for f in tree.flatten().values()]
if not tree.fields:
return _done(vr, t0, "BLOCKED", 2)
if vr.llm_cost > cfg.max_llm_cost:
return _done(vr, t0, "BLOCKED", 3)
suite = Agent2Data(llm).design(tree, cfg.coverage_default, cfg.runner_mode == "spark")
vr.llm_cost += 0.002
vr.debug["test_cases"] = [{"id":tc.id,"fields":tc.fields,"targets":tc.coverage_targets} for tc in suite.test_cases]
vr.debug["spark_config"] = {"records":suite.spark_config.num_records} if suite.has_spark else None
bundle = TestDataBundle(base_path=Path("test-data-bundle"))
bundle.ensure_dirs()
dw = DataWriter()
dw.write_cobol_binary(suite.test_cases, bundle.cobol_input())
if cfg.runner_mode == "spark":
sc = suite.spark_config or SparkConfig(num_records=cfg.num_records)
dw.write_spark_json(suite.test_cases, sc, bundle.spark_input_dir())
else:
dw.write_native_json(suite.test_cases, bundle.native_input())
cob = CobolRunner()
build = cob.compile(cbl, cfg.dialect)
vr.debug["cobol_build"] = {"ok": build.success, "log": build.log[-300:]}
if not build.success:
return _done(vr, t0, "BLOCKED", 2)
co = Path("cobol_out.bin")
if not cob.run(build.artifact_path, str(bundle.cobol_input()), str(co)).success:
return _done(vr, t0, "ERROR", 3)
if not shutil.which("java"):
return _done(vr, t0, "BLOCKED", 2)
runner: Runner = SparkJavaRunner(cfg.spark_master) if cfg.runner_mode == "spark" else NativeJavaRunner()
jb = runner.compile(java)
vr.debug["java_build"] = {"ok": jb.success, "log": jb.log[-300:]}
if not jb.success:
return _done(vr, t0, "BLOCKED", 2)
inp = str(bundle.spark_input_dir() if cfg.runner_mode == "spark" else bundle.native_input())
jr = runner.run(jb.artifact_path, inp, "java_out")
reader = CobolBinaryReader()
cr = reader.read(str(co), tree)
if len(cr) == 0 and len(jr.records) == 0:
return _done(vr, t0, "PASS", 0)
aligned = align_records(cr, jr.records, key_field="CUST-ID")
frs = []
for c, j, st in aligned:
if st != "MATCHED":
frs.append(FieldResult(field_name="unknown", status="NOT_SET" if st == "MISSING_IN_SPARK" else "EXTRA"))
continue
for k in c:
if k == "CUST-ID":
continue
cv = str(c.get(k, ""))
jv = str(j.get(k, ""))
ft = "decimal"
m = tree.get_by_name(k)
if m and m.usage != "COMP-3":
ft = "string"
frs.append(compare_field(k, cv, jv, ft, cfg.tolerance))
m = sum(1 for f in frs if f.status in ("MISMATCH", "NOT_SET"))
vr.fields_matched = len(frs) - m
vr.fields_mismatched = m
vr.field_results = frs
vr.status = "PASS" if m == 0 else "MISMATCH"
vr.exit_code = 0 if m == 0 else 1
diag = Agent3Diagnostic(llm)
for fr in frs:
if fr.status in ("MISMATCH", "NOT_SET", "NPE"):
try:
fr.suggestion = diag.analyze(fr) or ""
except:
pass
rd = Path(f"reports/{vr.program}") / vr.timestamp
rd.mkdir(parents=True, exist_ok=True)
g = ReportGenerator()
g.generate_json(vr, rd / "result.json")
g.generate_html(vr, rd / "report.html")
g.generate_machine_json(vr, rd / "machine.json")
vr.report_path = str(rd)
except Exception as e:
vr.status = "ERROR"
vr.exit_code = 3
vr.report_path = str(e)[:200]
vr.duration_s = time.time() - t0
return vr
def _done(vr, t0, s, ec):
vr.status = s
vr.exit_code = ec
vr.duration_s = time.time() - t0
return vr