diff --git a/cobol_testgen/__init__.py b/cobol_testgen/__init__.py index 6362913..0b43ec6 100644 --- a/cobol_testgen/__init__.py +++ b/cobol_testgen/__init__.py @@ -958,9 +958,12 @@ def generate_data(cobol_source: str, structure: dict = None, if copybook_dirs: src_resolved = resolve_copybooks(cobol_source, '.', extra_search_paths=copybook_dirs) + src_resolved = resolve_sql_includes(src_resolved, '.') preprocessed = preprocess(src_resolved) else: - preprocessed = preprocess(cobol_source) + # Also try SQL include resolution without copybook + src_sql = resolve_sql_includes(cobol_source, '.') + preprocessed = preprocess(src_sql) data_div = extract_data_division(preprocessed) data_fields = parse_data_division(data_div) if data_div else [] @@ -1004,17 +1007,29 @@ def generate_data(cobol_source: str, structure: dict = None, ufn = fn.upper() if ' OF ' in ufn: fn = fn.split(' OF ')[0].strip() + if fn in _fdict_names: + return fn + # Check subscript: WS-PLAN-CODE(WS-PLAN-IDX) -> WS-PLAN-CODE m = re.match(r'^(\w[\w-]*)\s*\(', fn) - if m and m.group(1) in _fdict_names: - return m.group(1) + if m: + base = m.group(1) + if base in _fdict_names: + return base + # Check if any field in fdict starts with base + "(" + if any(f.startswith(base + "(") for f in _fdict_names): + return base return fn + def _is_arith_expr(fn): + return any(op in fn for op in [' + ', ' - ', ' * ', ' / ']) + filtered_paths = [] for cons_list, asgn, term in path_infos: clean = [] for c in cons_list: if len(c) >= 4: fn = _resolve_field(str(c[0])) - if fn in _fdict_names or fn.startswith("_"): + if fn in _fdict_names or fn.startswith("_") or _is_arith_expr(str(c[0])) or \ + any(f.startswith(fn + "(") for f in _fdict_names): c = list(c); c[0] = fn clean.append(tuple(c)) else: diff --git a/cobol_testgen/cond.py b/cobol_testgen/cond.py index 1b7ea87..dd6a83e 100644 --- a/cobol_testgen/cond.py +++ b/cobol_testgen/cond.py @@ -94,20 +94,21 @@ def parse_single_condition(text, fields=None): text = text.split(' OF ')[0].strip() # COBOL class condition: WS-KEY-DGT-N NUMERIC - if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE): - m = re.match(r'^([A-Z][A-Z0-9-]*(?:\([^)]*\))?)\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE) + if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE): + m = re.match(r'^([A-Z][A-Z0-9_-]*(?:\([^)]*\))?)\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE) return (m.group(1), '=', m.group(2).upper()) # Bare field reference (no operator, no NOT): WS-EOF → WS-EOF = 'Y' - if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?\s*$', text, re.IGNORECASE): + if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s*$', text, re.IGNORECASE): return (text, '=', 'Y') # Bare NOT field reference (no operator): NOT WS-EOF → WS-EOF <> 'Y' if text.upper().startswith('NOT ') and not re.search(r'(>=|<=|<>|>|<|=)', text): fn = text[4:].strip() - if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', fn, re.IGNORECASE): + if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?$', fn, re.IGNORECASE): return (fn, '<>', 'Y') + # NOT at start of condition: NOT WS-X > 50 → WS-X <= 50 # Strip leading NOT, parse the inner condition, invert the operator if text.upper().startswith('NOT '): @@ -135,14 +136,18 @@ def parse_single_condition(text, fields=None): normalized = re.sub(pat, repl, text, flags=re.IGNORECASE) break - # Standard regex: FIELD OP VALUE - m = re.match( - r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.*)$", - normalized - ) - if m: - field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)) - return (field, m.group(2), m.group(3).strip().strip("'").strip('"')) + # FUNCTION call as left value: FUNCTION MOD(X, 2) NOT = 0 → _FUNC_MOD <> 0 + if text.upper().startswith('FUNCTION '): + # After not_map normalization, NOT = has been converted to <> + func_match = re.match( + r'^FUNCTION\s+(\w+)\(([^)]*)\)\s*(>=|<=|<>|>|<|=)\s*(.*)$', + normalized, re.IGNORECASE + ) + if func_match: + func_name = func_match.group(1).upper() + op = func_match.group(3) + val = func_match.group(4).strip().strip("'").strip('"') + return ('_FUNC_' + func_name, op, val) # Arithmetic expression regex (lazy match allows spaces in field expr) m = re.match( @@ -156,8 +161,21 @@ def parse_single_condition(text, fields=None): field = field[:-4].strip() return (field, m.group(2), m.group(3).strip().strip("'").strip('"')) - # Bare field: WS-EOF (no operator) → treat as WS-EOF = 'Y' - if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', text, re.IGNORECASE): + # Standard regex: FIELD OP VALUE + m = re.match( + r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.*)$", + normalized + ) + if m: + field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)) + # Strip subscript/substring for matching: CDR-ID(1:3) -> CDR-ID + bare_m = re.match(r'^\w[\w-]*', field) + if bare_m: + field = bare_m.group(0) + return (field, m.group(2), m.group(3).strip().strip("'").strip('"')) + + # Bare field: WS-EOF (no operator) -> WS-EOF = 'Y' + if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s*$', text, re.IGNORECASE): return (text, '=', 'Y') return None