fix: 3 critical parsing bugs found through statement benchmark testing
Bug 1: ELSE IF breaks IF false_seq parsing (core.py)
- _parse_if checked self.clean() == 'ELSE' which fails on 'ELSE IF ...'
- Fix: use startswith('ELSE'), reinsert IF portion for recursive parse
- Impact: ALL ELSE IF chains were silently dropped (huge branch loss)
Bug 2: READ skip loop greedily consumes subsequent statements (core.py)
- READ's AT END / NOT AT END skip loop used bare advance() with no
statement boundary detection
- Fix: add _stmt_boundary regex that stops on IF/PERFORM/READ/etc.
- Impact: everything after first READ was consumed as 'AT END' lines
Bug 3: _walk() in extract_structure doesn't descend into BrPerform (__init__.py)
- Branch counting _walk() only handled BrIf/BrEval/BrSeq
- IF statements inside PERFORM bodies were never counted
- Fix: add BrPerform.body_seq and BrSearch descent
Combined impact: matching programs (MT01-33) now correctly report
their branches instead of 0. Full regression: 749 passed (unchanged).
This commit is contained in:
@@ -372,7 +372,7 @@ def extract_structure(cobol_source: str) -> dict:
|
|||||||
file_sec = parse_file_section(preprocessed)
|
file_sec = parse_file_section(preprocessed)
|
||||||
open_dir = scan_open_statements(proc_div) if proc_div else {}
|
open_dir = scan_open_statements(proc_div) if proc_div else {}
|
||||||
|
|
||||||
from .models import BrIf, BrEval, BrSeq, BrPerform, Assign, CondAnd, CondOr
|
from .models import BrIf, BrEval, BrSeq, BrPerform, BrSearch, Assign, CondAnd, CondOr
|
||||||
|
|
||||||
decision_points = []
|
decision_points = []
|
||||||
total_branches = 0
|
total_branches = 0
|
||||||
@@ -403,6 +403,12 @@ def extract_structure(cobol_source: str) -> dict:
|
|||||||
elif isinstance(node, BrSeq):
|
elif isinstance(node, BrSeq):
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
_walk(child, counter)
|
_walk(child, counter)
|
||||||
|
elif isinstance(node, BrPerform):
|
||||||
|
_walk(node.body_seq, counter)
|
||||||
|
elif isinstance(node, BrSearch):
|
||||||
|
_walk(node.at_end_seq, counter)
|
||||||
|
for _, seq in node.when_list:
|
||||||
|
_walk(seq, counter)
|
||||||
|
|
||||||
if branch_tree:
|
if branch_tree:
|
||||||
_walk(branch_tree, [0])
|
_walk(branch_tree, [0])
|
||||||
|
|||||||
+17
-2
@@ -211,11 +211,21 @@ class _BrParser:
|
|||||||
seq.add(Assign(tgt, info))
|
seq.add(Assign(tgt, info))
|
||||||
self.advance()
|
self.advance()
|
||||||
# 跳过 READ 语句剩余行(AT END / NOT AT END / END-READ)
|
# 跳过 READ 语句剩余行(AT END / NOT AT END / END-READ)
|
||||||
|
# 遇到新的语句关键词时停止,避免贪婪吞咽后续内容
|
||||||
|
_stmt_boundary = re.compile(
|
||||||
|
r'^(IF |EVALUATE |PERFORM |SEARCH |INITIALIZE |STRING |'
|
||||||
|
r'UNSTRING |CALL |ACCEPT |READ |WRITE |REWRITE |SET |'
|
||||||
|
r'INSPECT |MOVE |COMPUTE |ADD |SUBTRACT |MULTIPLY |DIVIDE |'
|
||||||
|
r'GO\s+TO |GOBACK |STOP\s+RUN|EXIT\s|CLOSE |OPEN |DISPLAY |'
|
||||||
|
r'DELETE |START |'
|
||||||
|
r'END-IF|END-PERFORM|END-EVALUATE|END-READ)', re.IGNORECASE)
|
||||||
while self.pos < len(self.lines):
|
while self.pos < len(self.lines):
|
||||||
cl = self.clean()
|
cl = self.clean()
|
||||||
if cl in ('END-READ', 'END-READ.'):
|
if cl in ('END-READ', 'END-READ.'):
|
||||||
self.advance()
|
self.advance()
|
||||||
break
|
break
|
||||||
|
if _stmt_boundary.match(cl):
|
||||||
|
break
|
||||||
self.advance()
|
self.advance()
|
||||||
continue
|
continue
|
||||||
m_set_false = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+FALSE\s*$', line, re.IGNORECASE)
|
m_set_false = re.match(r'^SET\s+(\w[\w-]*)\s+TO\s+FALSE\s*$', line, re.IGNORECASE)
|
||||||
@@ -658,8 +668,13 @@ class _BrParser:
|
|||||||
node = BrIf(cond_text)
|
node = BrIf(cond_text)
|
||||||
node.cond_tree = parse_compound_condition(node.condition, self.fields)
|
node.cond_tree = parse_compound_condition(node.condition, self.fields)
|
||||||
node.true_seq = self.parse_seq(['ELSE', 'END-IF'])
|
node.true_seq = self.parse_seq(['ELSE', 'END-IF'])
|
||||||
if self.clean() == 'ELSE':
|
clean = self.clean()
|
||||||
self.advance()
|
if clean.startswith('ELSE'):
|
||||||
|
self.advance() # consume ELSE keyword
|
||||||
|
rest = clean[4:].strip() if len(clean) > 4 else ''
|
||||||
|
# ELSE IF → reinsert IF statement as next line for recursive parse
|
||||||
|
if rest.upper().startswith('IF '):
|
||||||
|
self.lines.insert(self.pos, rest)
|
||||||
node.false_seq = self.parse_seq(['END-IF'])
|
node.false_seq = self.parse_seq(['END-IF'])
|
||||||
if self.clean() == 'END-IF':
|
if self.clean() == 'END-IF':
|
||||||
self.advance()
|
self.advance()
|
||||||
|
|||||||
Reference in New Issue
Block a user