jcl-cobol-git/jcl-runner/parser.py

"""
JCL Parser - parses JCL scripts into structured JobStep objects.
Phase 1: supports JOB, EXEC PGM=, DD, SYSOUT, SYSIN inline data,
         COND=(code,op), * comments.
Phase 2+: PROC, GDG, COND with step names, EVEN/ONLY.
"""

import re
from dataclasses import dataclass, field
from typing import Optional


@dataclass
class DDEntry:
    dd_name: str
    dsn: Optional[str] = None
    disp: Optional[str] = None
    sysout: Optional[str] = None
    inline_data: list[str] = field(default_factory=list)
    unit: Optional[str] = None
    space: Optional[str] = None


@dataclass
class CondParam:
    code: int
    operator: str  # EQ, NE, GT, GE, LT, LE
    step_name: Optional[str] = None  # None means "any previous step"


@dataclass
class JobStep:
    step_name: str
    program: str
    dd_entries: list[DDEntry] = field(default_factory=list)
    cond: Optional[CondParam] = None
    parm: Optional[str] = None


@dataclass
class Job:
    job_name: str
    steps: list[JobStep] = field(default_factory=list)


# COND operator mapping
COND_OPS = {
    "EQ": lambda rc, code: rc == code,
    "NE": lambda rc, code: rc != code,
    "GT": lambda rc, code: rc > code,
    "GE": lambda rc, code: rc >= code,
    "LT": lambda rc, code: rc < code,
    "LE": lambda rc, code: rc <= code,
}


def parse_jcl(filepath: str) -> Job:
    """Parse a JCL file into a Job object."""
    with open(filepath, "r", encoding="utf-8") as f:
        raw_lines = f.readlines()

    # Continuation handling: lines ending with ',' continue on next line
    lines = _merge_continuations(raw_lines)

    job = None
    current_step: Optional[JobStep] = None
    current_dd: Optional[DDEntry] = None
    in_sysin = False
    sysin_lines: list[str] = []

    for line in lines:
        stripped = line.strip()

        # Skip comments
        if stripped.startswith("//*"):
            continue
        if not stripped:
            continue

        # Handle SYSIN inline data (lines after //SYSIN DD * until /*)
        if in_sysin:
            if stripped == "/*":
                if current_dd:
                    current_dd.inline_data = sysin_lines
                sysin_lines = []
                in_sysin = False
                current_dd = None
            else:
                sysin_lines.append(stripped)
            continue

        # Must start with //
        if not stripped.startswith("//"):
            continue

        content = stripped[2:].strip()

        # JOB statement: //jobname JOB ...
        if re.search(r"\bJOB\b", content, re.IGNORECASE):
            parts = stripped[2:].split(None, 2)
            job_name = parts[0]
            job = Job(job_name=job_name)
            continue

        # EXEC statement
        match = re.match(r"(\w+)\s+EXEC\s+(?:PGM=)?(\w+)", content, re.IGNORECASE)
        if match:
            step_name = match.group(1)
            program = match.group(2)

            # Parse COND parameter
            cond = None
            cond_match = re.search(
                r"COND=\s*\(\s*(\d+)\s*,\s*(\w+)", content, re.IGNORECASE
            )
            if cond_match:
                code = int(cond_match.group(1))
                op = cond_match.group(2).upper()
                cond = CondParam(code=code, operator=op)

            # Parse PARM parameter
            parm = None
            parm_match = re.search(r"PARM=\s*'([^']*)'", content, re.IGNORECASE)
            if parm_match:
                parm = parm_match.group(1)

            current_step = JobStep(
                step_name=step_name,
                program=program,
                cond=cond,
                parm=parm,
            )
            if job:
                job.steps.append(current_step)
            continue

        # DD statement
        dd_match = re.match(r"(\w+)\s+DD\s*(.*)", content, re.IGNORECASE)
        if dd_match and current_step is not None:
            dd_name = dd_match.group(1)
            dd_params = dd_match.group(2)
            dd = DDEntry(dd_name=dd_name)

            # Parse DSN
            dsn_match = re.search(r"DSN=\s*([^\s,]+)", dd_params, re.IGNORECASE)
            if dsn_match:
                dd.dsn = dsn_match.group(1)

            # Parse DISP
            disp_match = re.search(
                r"DISP=\s*\(?([^,\s)]+)(?:,([^,\s)]+))?(?:,([^,\s)]+))?\)?",
                dd_params, re.IGNORECASE,
            )
            if disp_match:
                dd.disp = disp_match.group(1)

            # Parse SYSOUT
            sysout_match = re.search(r"SYSOUT=\s*(\*|\w+)", dd_params, re.IGNORECASE)
            if sysout_match:
                dd.sysout = sysout_match.group(1)

            # Check for SYSIN inline data
            if dd_name.upper() == "SYSIN" and "*" in dd_params:
                in_sysin = True

            current_step.dd_entries.append(dd)
            current_dd = dd
            continue

    return job


def _merge_continuations(lines: list[str]) -> list[str]:
    """Merge JCL continuation lines (lines ending with ',')."""
    merged = []
    buffer = ""
    for line in lines:
        stripped = line.rstrip("\n\r")
        if buffer:
            buffer += stripped
        else:
            buffer = stripped
        # Check if line ends with continuation
        if stripped.rstrip().endswith(",") and not stripped.strip().startswith("//*"):
            continue
        merged.append(buffer)
        buffer = ""
    if buffer:
        merged.append(buffer)
    return merged