feat: complete INSPECT/SEARCH support, fix PERFORM/EVAL coverage marking

- Add INSPECT (TALLYING/REPLACING/CONVERTING) with BEFORE/AFTER INITIAL - Add SEARCH/SEARCH ALL with element-assignment path enumeration - Fix _mark_perform compound condition marking via evaluate_tree - Fix EVALUATE TRUE prior_false to collect all MC/DC false sets - Add impossible path filtering (Pass A.5) with trace-to-root conflict detection - Fix multi-line PERFORM VARYING parsing (VARYING/FROM/BY/UNTIL on separate lines) - Remove dead code: agents.py LLM parser (replaced by rule-based _BrParser) - 59 unit tests passing, 5 integration programs verified
2026-06-10 22:56:22 +08:00
parent 0730045e27
commit 7ac887c776
9 changed files with 509 additions and 1005 deletions
@@ -3,7 +3,7 @@
 import re
 import logging
 from datetime import datetime
-from .models import BrSeq, BrIf, BrEval, BrPerform, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
+from .models import BrSeq, BrIf, BrEval, BrPerform, BrSearch, BrSeq, CondLeaf, CondNot, ParseError, Assign, CallNode, ExitNode, GoTo
 from .cond import parse_compound_condition, parse_single_condition, collect_leaves

 logger = logging.getLogger(__name__)
@@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)
 _COBOL_SCOPE_ENDERS = {
    'END-IF', 'END-EVALUATE', 'END-PERFORM', 'END-EXEC', 'END-CALL',
    'END-READ', 'END-WRITE', 'END-DELETE', 'END-REWRITE', 'END-START',
+    'END-SEARCH',
    'ELSE', 'WHEN', 'OTHER',
 }

@@ -22,22 +23,26 @@ def scan_paragraphs(raw_lines):
    while i < len(raw_lines):
        line = raw_lines[i].strip()
        m = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', line)
+        sec_m = re.match(r'^([A-Z][A-Z0-9-]*)\s+SECTION\.?\s*$', line, re.IGNORECASE)
        if m and m.group(1) not in _COBOL_SCOPE_ENDERS:
            name = m.group(1)
-            start = i + 1
-            j = i + 1
-            while j < len(raw_lines):
-                nline = raw_lines[j].strip()
-                nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
-                if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
-                    break
-                if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
-                    break
-                j += 1
-            paragraphs[name] = (start, j - 1)
-            i = j
+        elif sec_m:
+            name = sec_m.group(1).upper()
        else:
            i += 1
+            continue
+        start = i + 1
+        j = i + 1
+        while j < len(raw_lines):
+            nline = raw_lines[j].strip()
+            nm = re.match(r'^([A-Z0-9][A-Z0-9-]*)\.\s*$', nline)
+            if nm and nm.group(1) not in _COBOL_SCOPE_ENDERS:
+                break
+            if re.match(r'^[A-Z][A-Z0-9-]*\s+SECTION\.\s*$', nline, re.IGNORECASE):
+                break
+            j += 1
+        paragraphs[name] = (start, j - 1)
+        i = j
    return paragraphs


@@ -160,6 +165,10 @@ class _BrParser:
                if perf_node:
                    seq.add(perf_node)
                continue
+            m_search = re.match(r'^SEARCH\b(?:\s+(ALL))?\s+(\w[\w-]*)(?:\s+VARYING\s+(\w[\w-]*))?', line, re.IGNORECASE)
+            if m_search:
+                seq.add(self._parse_search(m_search))
+                continue
            m = re.match(r'^INITIALIZE\s+', line)
            if m:
                init_seq = self._parse_initialize()
@@ -229,6 +238,15 @@ class _BrParser:
            if m_set:
                seq.add(self._parse_set_true(m_set.group(1)))
                continue
+            m_insp = re.match(r'^INSPECT\s+', line, re.IGNORECASE)
+            if m_insp:
+                info = self._parse_inspect(line)
+                if info:
+                    tgt = info.get('tgt', '')
+                    self.assignments.setdefault(tgt, []).append(info)
+                    seq.add(Assign(tgt, info))
+                self.advance()
+                continue
            assign_node = self._record_assignment(line)
            if assign_node:
                seq.add(assign_node)
@@ -243,6 +261,81 @@ class _BrParser:
                return True
        return False

+    # ── INSPECT ──
+
+    _PIC_FIG_CONV = {'ZERO': '0', 'ZEROS': '0', 'ZEROES': '0',
+                     'SPACE': ' ', 'SPACES': ' '}
+
+    @staticmethod
+    def _expand_figurative(val):
+        if val.upper() in _BrParser._PIC_FIG_CONV:
+            return _BrParser._PIC_FIG_CONV[val.upper()]
+        return val
+
+    def _parse_inspect_phrase(self, phrase):
+        m = re.match(
+            r'TALLYING\s+(\w[\w-]*)\s+FOR\s+'
+            r'(LEADING|TRAILING|CHARACTERS)'
+            r'(?:\s+([\'"])(.*?)\3)?'
+            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\6)?\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('tally', {
+                'count_var': m.group(1).upper(),
+                'kind': m.group(2).upper(),
+                'char': self._expand_figurative(m.group(4) or ''),
+                'before_after': (m.group(5) or '').upper(),
+                'delimiter': self._expand_figurative(m.group(7) or ''),
+            })
+        m = re.match(
+            r'REPLACING\s+'
+            r'(ALL|LEADING|FIRST|CHARACTERS)\s+'
+            r'([\'"])(.*?)\2\s+BY\s+'
+            r'([\'"])(.*?)\4'
+            r'(?:\s+(BEFORE|AFTER)\s+INITIAL\s+([\'"])(.*?)\7)?\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('replace', {
+                'kind': m.group(1).upper(),
+                'src': self._expand_figurative(m.group(3)),
+                'dst': self._expand_figurative(m.group(5)),
+                'before_after': (m.group(6) or '').upper(),
+                'delimiter': self._expand_figurative(m.group(8) or ''),
+            })
+        m = re.match(
+            r'CONVERTING\s+([\'"])(.*?)\1\s+TO\s+([\'"])(.*?)\3\s*$',
+            phrase, re.IGNORECASE
+        )
+        if m:
+            return ('convert', {
+                'from_chars': self._expand_figurative(m.group(2)),
+                'to_chars': self._expand_figurative(m.group(4)),
+            })
+        return None
+
+    def _parse_inspect(self, line):
+        m = re.match(r'^INSPECT\s+(\w[\w-]*)\s+(.+)$', line, re.IGNORECASE)
+        if not m:
+            return None
+        tgt = m.group(1).upper()
+        rest = m.group(2).strip()
+        phrases = re.split(r'\s+(?=(?:TALLYING|REPLACING|CONVERTING)\b)', rest, flags=re.IGNORECASE)
+        sub_ops = []
+        for phrase in phrases:
+            sub = self._parse_inspect_phrase(phrase.strip())
+            if sub:
+                sub_ops.append(sub)
+        if not sub_ops:
+            return None
+        return {
+            'type': 'inspect',
+            'tgt': tgt,
+            'source_vars': [tgt],
+            'sub_ops': sub_ops,
+        }
+
    def _record_assignment(self, line):
        if self.assignments is None:
            return None
@@ -503,6 +596,44 @@ class _BrParser:
        vars_in = re.findall(r'[A-Z][A-Z0-9-]*', expr.upper())
        return {'type': 'compute', 'source_vars': list(set(vars_in)), 'op': None, 'const': None, 'expr': expr}

+    # ── SEARCH / SEARCH ALL ──
+
+    def _parse_search(self, m):
+        is_all = bool(m.group(1))
+        table = m.group(2).upper()
+        varying = m.group(3).upper() if m.group(3) else None
+        node = BrSearch(table, is_all=is_all, varying=varying)
+        self.advance()
+        while self.pos < len(self.lines):
+            line = self.clean()
+            if line in ('END-SEARCH', 'END-SEARCH.'):
+                self.advance()
+                return node
+            m_at = re.match(r'^AT\s+END(.+)?$', line, re.IGNORECASE)
+            if m_at:
+                self.advance()
+                rest = m_at.group(1)
+                if rest and rest.strip():
+                    self.lines.insert(self.pos, rest.strip())
+                node.at_end_seq = self.parse_seq(
+                    end_check=lambda l: re.match(r'^WHEN\b', l) or l in ('END-SEARCH',)
+                )
+                node.has_at_end = True
+                continue
+            m_when = re.match(r'^WHEN\s+(.+?)\s*$', line, re.IGNORECASE)
+            if m_when:
+                cond_upper = m_when.group(1).strip()
+                self.advance()
+                cond_tree = parse_compound_condition(cond_upper, self.fields)
+                body_seq = self.parse_seq(
+                    end_check=lambda l: re.match(r'^(WHEN|AT\s+END)\b', l) or l in ('END-SEARCH',)
+                )
+                node.when_list.append((cond_upper, body_seq))
+                node.cond_trees.append(cond_tree)
+                continue
+            self.advance()
+        return node
+
    def _parse_if(self):
        line = self.clean()
        m = re.match(r'^IF\s+(.+?)(?:THEN)?\s*$', line)
@@ -1039,6 +1170,18 @@ def _resolve_subscript(key, rec):
    return key


+def _apply_before_after(val, before_after, delimiter):
+    if not delimiter:
+        return val
+    if before_after == 'BEFORE':
+        idx = val.find(delimiter)
+        return val[:idx] if idx >= 0 else val
+    if before_after == 'AFTER':
+        idx = val.find(delimiter)
+        return val[idx + len(delimiter):] if idx >= 0 else ''
+    return val
+
+
 def propagate_assignments(rec, assignments, fields, file_sec=None):
    def raw_to_float(val, pi):
        if pi.get('type') == 'numeric':
@@ -1233,6 +1376,47 @@ def propagate_assignments(rec, assignments, fields, file_sec=None):
                    if all_found:
                        rec[resolved_tgt] = float_to_raw(total, pi_tgt)

+        # Pass 4.5: INSPECT
+        for tgt, asgn in flat_list:
+            if asgn['type'] != 'inspect':
+                continue
+            resolved_tgt = _resolve_subscript(tgt, rec)
+            if resolved_tgt not in rec:
+                continue
+            src_val = str(rec[resolved_tgt])
+            for op_type, params in asgn.get('sub_ops', []):
+                if op_type == 'tally':
+                    cv = params['count_var'].upper()
+                    cv_pi = pi_map.get(cv, {})
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    cnt = 0
+                    if params['kind'] == 'LEADING':
+                        cnt = len(effective) - len(effective.lstrip(params['char']))
+                    elif params['kind'] == 'TRAILING':
+                        cnt = len(effective) - len(effective.rstrip(params['char']))
+                    else:
+                        cnt = len(effective)
+                    if cv_pi.get('type') == 'numeric':
+                        rec[cv] = float_to_raw(float(cnt), cv_pi)
+                elif op_type == 'replace':
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    if params['kind'] == 'ALL':
+                        new_val = effective.replace(params['src'], params['dst'])
+                    elif params['kind'] == 'LEADING':
+                        new_val = effective
+                        while new_val.startswith(params['src']):
+                            new_val = new_val[len(params['src']):]
+                        new_val = effective.replace(params['src'], params['dst'], 1)
+                    elif params['kind'] == 'FIRST':
+                        new_val = effective.replace(params['src'], params['dst'], 1)
+                    else:
+                        new_val = params['dst'] * len(effective)
+                    rec[resolved_tgt] = new_val
+                elif op_type == 'convert':
+                    effective = _apply_before_after(src_val, params.get('before_after'), params.get('delimiter'))
+                    table = str.maketrans(params['from_chars'], params['to_chars'])
+                    rec[resolved_tgt] = effective.translate(table)
+
        # Pass 5: STRING / UNSTRING
        for tgt, asgn in flat_list:
            if asgn['type'] == 'string_concat':