feat: complete INSPECT/SEARCH support, fix PERFORM/EVAL coverage marking
- Add INSPECT (TALLYING/REPLACING/CONVERTING) with BEFORE/AFTER INITIAL - Add SEARCH/SEARCH ALL with element-assignment path enumeration - Fix _mark_perform compound condition marking via evaluate_tree - Fix EVALUATE TRUE prior_false to collect all MC/DC false sets - Add impossible path filtering (Pass A.5) with trace-to-root conflict detection - Fix multi-line PERFORM VARYING parsing (VARYING/FROM/BY/UNTIL on separate lines) - Remove dead code: agents.py LLM parser (replaced by rule-based _BrParser) - 59 unit tests passing, 5 integration programs verified
This commit is contained in:
+197
-13
@@ -3,7 +3,7 @@
|
||||
import re
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from .models import BrSeq, BrIf, BrEval, BrPerform, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
|
||||
from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
|
||||
from .cond import parse_compound_condition, parse_single_condition, collect_leaves
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)
|
||||
_COBOL_SCOPE_ENDERS = {
|
||||
'END-IF', 'END-EVALUATE', 'END-PERFORM', 'END-EXEC', 'END-CALL',
|
||||
'END-READ', 'END-WRITE', 'END-DELETE', 'END-REWRITE', 'END-START',
|
||||
'END-SEARCH',
|
||||
'ELSE', 'WHEN', 'OTHER',
|
||||
}
|
||||
|
||||
@@ -22,22 +23,26 @@ def scan_paragraphs(raw_lines):
|
||||
while i < len(raw_lines):
|
||||
line = raw_lines[i].strip()
|
||||
m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
|
||||
sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
|
||||
if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
|
||||
name = m.group(1)
|
||||
start = i + 1
|
||||
j = i + 1
|
||||
while j < len(raw_lines):
|
||||
nline = raw_lines[j].strip()
|
||||
nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
|
||||
if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
|
||||
break
|
||||
if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
|
||||
break
|
||||
j += 1
|
||||
paragraphs[name] = (start, j - 1)
|
||||
i = j
|
||||
elif sec_m:
|
||||
name = sec_m.group(1).upper()
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
start = i + 1
|
||||
j = i + 1
|
||||
while j < len(raw_lines):
|
||||
nline = raw_lines[j].strip()
|
||||
nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
|
||||
if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
|
||||
break
|
||||
if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
|
||||
break
|
||||
j += 1
|
||||
paragraphs[name] = (start, j - 1)
|
||||
i = j
|
||||
return paragraphs
|
||||
|
||||
|
||||
@@ -160,6 +165,10 @@ class _BrParser:
|
||||
if perf_node:
|
||||
seq.add(perf_node)
|
||||
continue
|
||||
m_search = re.match(r'^SEARCH\b(?:\s+(ALL))?\s+(\w[\w-]*)(?:\s+VARYING\s+(\w[\w-]*))?', line, re.IGNORECASE)
|
||||
if m_search:
|
||||
seq.add(self._parse_search(m_search))
|
||||
continue
|
||||
m = re.match(r'^INITIALIZE\s+', line)
|
||||
if m:
|
||||
init_seq = self._parse_initialize()
|
||||
@@ -229,6 +238,15 @@ class _BrParser:
|
||||
if m_set:
|
||||
seq.add(self._parse_set_true(m_set.group(1)))
|
||||
continue
|
||||
m_insp = re.match(r'^INSPECT\s+', line, re.IGNORECASE)
|
||||
if m_insp:
|
||||
info = self._parse_inspect(line)
|
||||
if info:
|
||||
tgt = info.get('tgt', '')
|
||||
self.assignments.setdefault(tgt, []).append(info)
|
||||
seq.add(Assign(tgt, info))
|
||||
self.advance()
|
||||
continue
|
||||
assign_node = self._record_assignment(line)
|
||||
if assign_node:
|
||||
seq.add(assign_node)
|
||||
@@ -243,6 +261,81 @@ class _BrParser:
|
||||
return True
|
||||
return False
|
||||
|
||||
# ── INSPECT ──
|
||||
|
||||
_PIC_FIG_CONV = {'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
|
||||
'SPACE': ' ', 'SPACES': ' '}
|
||||
|
||||
@staticmethod
|
||||
def _expand_figurative(val):
|
||||
if val.upper() in _BrParser._PIC_FIG_CONV:
|
||||
return _BrParser._PIC_FIG_CONV[val.upper()]
|
||||
return val
|
||||
|
||||
def _parse_inspect_phrase(self, phrase):
|
||||
m = re.match(
|
||||
r'TALLYING\s+(\w[\w-]*)\s+FOR\s+'
|
||||
r'(LEADING|TRAILING|CHARACTERS)'
|
||||
r'(?:\s+([\'"])(.*?)\3)?'
|
||||
r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\6)?\s*$',
|
||||
phrase, re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
return ('tally', {
|
||||
'count_var': m.group(1).upper(),
|
||||
'kind': m.group(2).upper(),
|
||||
'char': self._expand_figurative(m.group(4) or ''),
|
||||
'before_after': (m.group(5) or '').upper(),
|
||||
'delimiter': self._expand_figurative(m.group(7) or ''),
|
||||
})
|
||||
m = re.match(
|
||||
r'REPLACING\s+'
|
||||
r'(ALL|LEADING|FIRST|CHARACTERS)\s+'
|
||||
r'([\'"])(.*?)\2\s+BY\s+'
|
||||
r'([\'"])(.*?)\4'
|
||||
r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\7)?\s*$',
|
||||
phrase, re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
return ('replace', {
|
||||
'kind': m.group(1).upper(),
|
||||
'src': self._expand_figurative(m.group(3)),
|
||||
'dst': self._expand_figurative(m.group(5)),
|
||||
'before_after': (m.group(6) or '').upper(),
|
||||
'delimiter': self._expand_figurative(m.group(8) or ''),
|
||||
})
|
||||
m = re.match(
|
||||
r'CONVERTING\s+([\'"])(.*?)\1\s+TO\s+([\'"])(.*?)\3\s*$',
|
||||
phrase, re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
return ('convert', {
|
||||
'from_chars': self._expand_figurative(m.group(2)),
|
||||
'to_chars': self._expand_figurative(m.group(4)),
|
||||
})
|
||||
return None
|
||||
|
||||
def _parse_inspect(self, line):
|
||||
m = re.match(r'^INSPECT\s+(\w[\w-]*)\s+(.+)$', line, re.IGNORECASE)
|
||||
if not m:
|
||||
return None
|
||||
tgt = m.group(1).upper()
|
||||
rest = m.group(2).strip()
|
||||
phrases = re.split(r'\s+(?=(?:TALLYING|REPLACING|CONVERTING)\b)', rest, flags=re.IGNORECASE)
|
||||
sub_ops = []
|
||||
for phrase in phrases:
|
||||
sub = self._parse_inspect_phrase(phrase.strip())
|
||||
if sub:
|
||||
sub_ops.append(sub)
|
||||
if not sub_ops:
|
||||
return None
|
||||
return {
|
||||
'type': 'inspect',
|
||||
'tgt': tgt,
|
||||
'source_vars': [tgt],
|
||||
'sub_ops': sub_ops,
|
||||
}
|
||||
|
||||
def _record_assignment(self, line):
|
||||
if self.assignments is None:
|
||||
return None
|
||||
@@ -503,6 +596,44 @@ class _BrParser:
|
||||
vars_in = re.findall(r'[A-Z][A-Z0-9-]*', expr.upper())
|
||||
return {'type': 'compute', 'source_vars': list(set(vars_in)), 'op': None, 'const': None, 'expr': expr}
|
||||
|
||||
# ── SEARCH / SEARCH ALL ──
|
||||
|
||||
def _parse_search(self, m):
|
||||
is_all = bool(m.group(1))
|
||||
table = m.group(2).upper()
|
||||
varying = m.group(3).upper() if m.group(3) else None
|
||||
node = BrSearch(table, is_all=is_all, varying=varying)
|
||||
self.advance()
|
||||
while self.pos < len(self.lines):
|
||||
line = self.clean()
|
||||
if line in ('END-SEARCH', 'END-SEARCH.'):
|
||||
self.advance()
|
||||
return node
|
||||
m_at = re.match(r'^AT\s+END(.+)?$', line, re.IGNORECASE)
|
||||
if m_at:
|
||||
self.advance()
|
||||
rest = m_at.group(1)
|
||||
if rest and rest.strip():
|
||||
self.lines.insert(self.pos, rest.strip())
|
||||
node.at_end_seq = self.parse_seq(
|
||||
end_check=lambda l: re.match(r'^WHEN\b', l) or l in ('END-SEARCH',)
|
||||
)
|
||||
node.has_at_end = True
|
||||
continue
|
||||
m_when = re.match(r'^WHEN\s+(.+?)\s*$', line, re.IGNORECASE)
|
||||
if m_when:
|
||||
cond_upper = m_when.group(1).strip()
|
||||
self.advance()
|
||||
cond_tree = parse_compound_condition(cond_upper, self.fields)
|
||||
body_seq = self.parse_seq(
|
||||
end_check=lambda l: re.match(r'^(WHEN|AT\s+END)\b', l) or l in ('END-SEARCH',)
|
||||
)
|
||||
node.when_list.append((cond_upper, body_seq))
|
||||
node.cond_trees.append(cond_tree)
|
||||
continue
|
||||
self.advance()
|
||||
return node
|
||||
|
||||
def _parse_if(self):
|
||||
line = self.clean()
|
||||
m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
|
||||
@@ -1039,6 +1170,18 @@ def _resolve_subscript(key, rec):
|
||||
return key
|
||||
|
||||
|
||||
def _apply_before_after(val, before_after, delimiter):
|
||||
if not delimiter:
|
||||
return val
|
||||
if before_after == 'BEFORE':
|
||||
idx = val.find(delimiter)
|
||||
return val[:idx] if idx >= 0 else val
|
||||
if before_after == 'AFTER':
|
||||
idx = val.find(delimiter)
|
||||
return val[idx + len(delimiter):] if idx >= 0 else ''
|
||||
return val
|
||||
|
||||
|
||||
def propagate_assignments(rec, assignments, fields, file_sec=None):
|
||||
def raw_to_float(val, pi):
|
||||
if pi.get('type') == 'numeric':
|
||||
@@ -1233,6 +1376,47 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
|
||||
if all_found:
|
||||
rec[resolved_tgt] = float_to_raw(total, pi_tgt)
|
||||
|
||||
# Pass 4.5: INSPECT
|
||||
for tgt, asgn in flat_list:
|
||||
if asgn['type'] != 'inspect':
|
||||
continue
|
||||
resolved_tgt = _resolve_subscript(tgt, rec)
|
||||
if resolved_tgt not in rec:
|
||||
continue
|
||||
src_val = str(rec[resolved_tgt])
|
||||
for op_type, params in asgn.get('sub_ops', []):
|
||||
if op_type == 'tally':
|
||||
cv = params['count_var'].upper()
|
||||
cv_pi = pi_map.get(cv, {})
|
||||
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
|
||||
cnt = 0
|
||||
if params['kind'] == 'LEADING':
|
||||
cnt = len(effective) - len(effective.lstrip(params['char']))
|
||||
elif params['kind'] == 'TRAILING':
|
||||
cnt = len(effective) - len(effective.rstrip(params['char']))
|
||||
else:
|
||||
cnt = len(effective)
|
||||
if cv_pi.get('type') == 'numeric':
|
||||
rec[cv] = float_to_raw(float(cnt), cv_pi)
|
||||
elif op_type == 'replace':
|
||||
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
|
||||
if params['kind'] == 'ALL':
|
||||
new_val = effective.replace(params['src'], params['dst'])
|
||||
elif params['kind'] == 'LEADING':
|
||||
new_val = effective
|
||||
while new_val.startswith(params['src']):
|
||||
new_val = new_val[len(params['src']):]
|
||||
new_val = effective.replace(params['src'], params['dst'], 1)
|
||||
elif params['kind'] == 'FIRST':
|
||||
new_val = effective.replace(params['src'], params['dst'], 1)
|
||||
else:
|
||||
new_val = params['dst'] * len(effective)
|
||||
rec[resolved_tgt] = new_val
|
||||
elif op_type == 'convert':
|
||||
effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
|
||||
table = str.maketrans(params['from_chars'], params['to_chars'])
|
||||
rec[resolved_tgt] = effective.translate(table)
|
||||
|
||||
# Pass 5: STRING / UNSTRING
|
||||
for tgt, asgn in flat_list:
|
||||
if asgn['type'] == 'string_concat':
|
||||
|
||||
Reference in New Issue
Block a user