bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
74 lines
2.7 KiB
Python
74 lines
2.7 KiB
Python
"""Worker process — polls task queue, executes run_pipeline()."""
|
|
import json, sys, time, shutil
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
TASKS_DIR = Path("tasks")
|
|
|
|
def main():
|
|
print("Worker started. Watching tasks/ ...")
|
|
while True:
|
|
for tf in sorted(TASKS_DIR.glob("*.json")):
|
|
data = {}
|
|
try:
|
|
raw = tf.read_text()
|
|
if not raw.strip():
|
|
data = {"id": tf.stem, "status": "error", "result": "empty file"}
|
|
tf.write_text(json.dumps(data))
|
|
continue
|
|
data = json.loads(raw)
|
|
if not isinstance(data, dict):
|
|
data = {"id": tf.stem, "status": "error", "result": "invalid JSON type"}
|
|
tf.write_text(json.dumps(data))
|
|
continue
|
|
if data.get("status") != "queued":
|
|
continue
|
|
|
|
data["status"] = "running"
|
|
tf.write_text(json.dumps(data))
|
|
|
|
from config import Config
|
|
from orchestrator import run_pipeline
|
|
|
|
cfg = Config()
|
|
cfg.runner_mode = data.get("runner", "native")
|
|
if cfg.runner_mode == "spark" and not shutil.which("spark-submit"):
|
|
data["status"] = "blocked"
|
|
data["result"] = "spark-submit not installed"
|
|
tf.write_text(json.dumps(data))
|
|
continue
|
|
|
|
vr = run_pipeline(cfg, data["copybook"], data["cobol_src"],
|
|
data["java_src"], data["mapping"])
|
|
|
|
fields = [{"name":fr.field_name,"status":fr.status,
|
|
"cobol":fr.cobol_value,"java":fr.java_value,
|
|
"suggestion":fr.suggestion} for fr in vr.field_results]
|
|
|
|
data["status"] = "done"
|
|
data["fields"] = fields
|
|
data["debug"] = vr.debug
|
|
data["result"] = {
|
|
"program": vr.program, "status": vr.status,
|
|
"matched": vr.fields_matched, "mismatched": vr.fields_mismatched,
|
|
"duration": vr.duration_s, "runner": vr.runner,
|
|
}
|
|
if vr.report_path and "BLOCKED" in vr.status:
|
|
data["build_log"] = vr.report_path
|
|
tf.write_text(json.dumps(data))
|
|
|
|
except Exception as e:
|
|
data["status"] = "error"
|
|
data["result"] = str(e)[:500]
|
|
try:
|
|
tf.write_text(json.dumps(data))
|
|
except Exception:
|
|
pass # 无法写入错误状态时静默跳过
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|