e5ab3baa46
## 核心变更 ### 1. 新PROCEDURE DIVISION解析器(procedure_parser.py) - 行级状态机替换旧的BrParser regex解析器 - 覆盖:IF/ELSE/END-IF(嵌套)、EVALUATE/WHEN/ALSO、 PERFORM UNTIL/VARYING、READ/AT END/NOT AT END、 SORT/MERGE、GO TO DEPENDING ON - 之前:3/37程序有分支检测 → 现在:37/37全部有分支 - 速度:~20ms/程序,纯规则引擎 ### 2. 桥接层(pipeline_bridge.py) - 新解析器为主,旧解析器3秒超时兜底 - 自动选取分支数更多的结果 ### 3. 线性路径枚举(design_mcdc.py) - 替换旧的Cartesian积路径枚举(O(2^N))为每决策点独立枚举(O(N)) - 28-sysin: 162分支仅163条路径(之前需截断到60DP) - 消除了500路径硬上限和60DP截断 ### 4. 条件解析修复(cond.py) - NOT运算符规范化:X NOT = 5 → X <> 5 - 88-level反向:NOT WS-EOF-Y → parent <> value - 裸字段引用:NOT WS-EOF → WS-EOF <> 'Y' - 验证:1182个IF条件中0个NOT污染 ### 5. 约束字段过滤(__init__.py) - OF限定词剥离:STD-KEY OF MASTER-REC → STD-KEY - 下标字段解析:WS-ITEM(SUB) → WS-ITEM - 跳过不在fields_dict中的字段(group item/伪影) ### 6. 预处理器增强(read.py) - VALUE ALL剥离(VALUE ALL '*' → VALUE '*') - &续行合并(COBOL多行字符串拼接) - PIC小数点点→V转换(Z(9)9.99. → Z(9)9V99.) - 缺少点号补全 ### 7. Grammar修复(grammar.lark) - OCCURS 1 TIME支持(原只认TIMES) - USAGE IS COMP支持(可选IS) - $符号在PICTURE_STRING中 - 无NAME条款支持(clause+) ### 8. Flatfile写入(flatfile.py) - 多记录FD支持(选字段最多的记录) - Path类型强制转换 - 回退零值记录 ### 9. Bug修复 - trace_to_root空列表保护(core.py) ### 10. 测试套件(S16-S21) - S16: 全量基准程序端到端 - S17: gcov运行时对比 - S18/S19: 桥接器验证 - S20: DISPLAY插桩运行时验证+gcov分支覆盖率 - S21: 条件解析修复验证 - 全部17/17回归测试通过 Co-Authored-By: Claude <noreply@anthropic.com>
303 lines
11 KiB
Python
303 lines
11 KiB
Python
"""条件层:COBOL条件表达式解析 + MC/DC枚举 + 约束合并"""
|
|
|
|
import re
|
|
from .models import CondLeaf, CondAnd, CondOr, CondNot, PicInfo
|
|
|
|
|
|
# ── 条件解析 ──
|
|
|
|
def _split_at_operator(text, operator):
|
|
"""Split text on operator word, respecting parentheses."""
|
|
result = []
|
|
current = []
|
|
depth = 0
|
|
# Normalize so parentheses are space-delimited tokens
|
|
normalized = text.replace('(', ' ( ').replace(')', ' ) ')
|
|
for token in normalized.split():
|
|
if not token:
|
|
continue
|
|
if token == '(':
|
|
depth += 1
|
|
current.append(token)
|
|
elif token == ')':
|
|
depth -= 1
|
|
current.append(token)
|
|
elif token == operator and depth == 0:
|
|
result.append(' '.join(current).strip())
|
|
current = []
|
|
else:
|
|
current.append(token)
|
|
result.append(' '.join(current).strip())
|
|
return result
|
|
|
|
|
|
def parse_single_condition(text, fields=None):
|
|
"""Parse a COBOL condition into (field, operator, value) 3-tuple.
|
|
|
|
Handles:
|
|
- Basic: AMOUNT > 1000 → (AMOUNT, '>', '1000')
|
|
- 88-lev: STATUS-APPROVED → (parent, '=', value)
|
|
- NOT =: X NOT = 5 → (X, '<>', '5') (NOT = means <>)
|
|
- NOT >: X NOT > 5 → (X, '<=', '5')
|
|
- NOT <: X NOT < 5 → (X, '>=', '5')
|
|
- NOT 88: NOT WS-EOF-Y → (parent, '<>', value)
|
|
- Bare: WS-EOF → (WS-EOF, '=', 'Y')
|
|
- NOT bare: NOT WS-EOF → (WS-EOF, '<>', 'Y')
|
|
- NOT arith: A+B NOT = C → ('A+B', '<>', 'C')
|
|
|
|
Returns None for compound (AND/OR) conditions.
|
|
"""
|
|
if ' AND ' in text or ' OR ' in text:
|
|
return None
|
|
text = text.strip()
|
|
|
|
# Resolve 88-level condition names
|
|
if fields:
|
|
for f in fields:
|
|
if f.get('is_88') and f['name'] == text.upper():
|
|
return (f.get('parent', ''), '=', f.get('value', ''))
|
|
# NOT 88-level → invert operator
|
|
if f.get('is_88') and text.upper().startswith('NOT ') and f['name'] == text[4:].strip().upper():
|
|
return (f.get('parent', ''), '<>', f.get('value', ''))
|
|
|
|
# Bare NOT field reference (no operator): NOT WS-EOF → WS-EOF <> 'Y'
|
|
if text.upper().startswith('NOT ') and not re.search(r'(>=|<=|<>|>|<|=)', text):
|
|
field_name = text[4:].strip()
|
|
if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', field_name, re.IGNORECASE):
|
|
return (field_name, '<>', 'Y')
|
|
|
|
# Normalize COBOL NOT-operators: X NOT = Y → X <> Y
|
|
normalized = text
|
|
not_map = [
|
|
(r'\bNOT\s+>=', '<'), (r'\bNOT\s+<=', '>'),
|
|
(r'\bNOT\s+<>', '='), (r'\bNOT\s+=', '<>'),
|
|
(r'\bNOT\s+>', '<='), (r'\bNOT\s+<', '>='),
|
|
]
|
|
for pat, repl in not_map:
|
|
if re.search(pat, text, re.IGNORECASE):
|
|
normalized = re.sub(pat, repl, text, flags=re.IGNORECASE)
|
|
break
|
|
|
|
# Standard regex: FIELD OP VALUE
|
|
m = re.match(
|
|
r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
|
|
normalized
|
|
)
|
|
if m:
|
|
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1))
|
|
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
|
|
|
|
# Arithmetic expression regex (lazy match allows spaces in field expr)
|
|
m = re.match(
|
|
r"^(\w[\w\s+\-*/().-]+?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
|
|
normalized
|
|
)
|
|
if m:
|
|
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)).strip()
|
|
# Clean trailing ' NOT' that got swallowed by lazy match
|
|
if field.upper().endswith(' NOT'):
|
|
field = field[:-4].strip()
|
|
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
|
|
|
|
# Bare field: WS-EOF (no operator) → treat as WS-EOF = 'Y'
|
|
if re.match(r'^[A-Z][A-Z0-9-]*(?:\([^)]*\))?$', text, re.IGNORECASE):
|
|
return (text, '=', 'Y')
|
|
|
|
return None
|
|
|
|
|
|
def parse_compound_condition(text, fields=None):
|
|
"""Parse a COBOL condition into a condition tree (AND/OR/LEAF).
|
|
Handles AND > OR precedence and parentheses.
|
|
"""
|
|
text = text.strip()
|
|
if not text:
|
|
return None
|
|
# Normalize parentheses to be space-delimited for reliable tokenization
|
|
text = text.replace('(', ' ( ').replace(')', ' ) ')
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
# Strip outer parentheses
|
|
if text.startswith('(') and text.endswith(')'):
|
|
depth = 0
|
|
wrapped = True
|
|
for i, c in enumerate(text):
|
|
if c == '(':
|
|
depth += 1
|
|
elif c == ')':
|
|
depth -= 1
|
|
if depth == 0 and i < len(text) - 1:
|
|
wrapped = False
|
|
break
|
|
if wrapped:
|
|
inner = parse_compound_condition(text[1:-1], fields)
|
|
if inner:
|
|
return inner
|
|
# Split on OR (lowest precedence)
|
|
parts = _split_at_operator(text, 'OR')
|
|
if len(parts) > 1:
|
|
node = parse_compound_condition(parts[0], fields)
|
|
for p in parts[1:]:
|
|
node = CondOr(node, parse_compound_condition(p, fields))
|
|
return node
|
|
# Split on AND
|
|
parts = _split_at_operator(text, 'AND')
|
|
if len(parts) > 1:
|
|
node = parse_compound_condition(parts[0], fields)
|
|
for p in parts[1:]:
|
|
node = CondAnd(node, parse_compound_condition(p, fields))
|
|
return node
|
|
# NOT prefix (highest precedence, after AND/OR splitting)
|
|
if text.upper().startswith('NOT '):
|
|
inner = parse_compound_condition(text[4:].strip(), fields)
|
|
return CondNot(inner) if inner else None
|
|
# Leaf condition
|
|
parsed = parse_single_condition(text, fields)
|
|
if parsed:
|
|
return CondLeaf(*parsed)
|
|
return None
|
|
|
|
|
|
def collect_leaves(tree):
|
|
"""Return list of all CondLeaf nodes in the tree."""
|
|
if isinstance(tree, CondLeaf):
|
|
return [tree]
|
|
elif isinstance(tree, CondNot):
|
|
return collect_leaves(tree.child)
|
|
elif isinstance(tree, (CondAnd, CondOr)):
|
|
return collect_leaves(tree.left) + collect_leaves(tree.right)
|
|
return []
|
|
|
|
|
|
def evaluate_tree(tree, assignment):
|
|
"""Evaluate condition tree given leaf→bool assignment dict."""
|
|
if isinstance(tree, CondLeaf):
|
|
return assignment[tree]
|
|
elif isinstance(tree, CondNot):
|
|
return not evaluate_tree(tree.child, assignment)
|
|
elif isinstance(tree, CondAnd):
|
|
return evaluate_tree(tree.left, assignment) and evaluate_tree(tree.right, assignment)
|
|
elif isinstance(tree, CondOr):
|
|
return evaluate_tree(tree.left, assignment) or evaluate_tree(tree.right, assignment)
|
|
return False
|
|
|
|
|
|
def is_field(name, fields):
|
|
# Strip subscript: WS-ITEM-STATUS(WS-INDEX-VAR) -> WS-ITEM-STATUS
|
|
bare = re.sub(r'\s*\(.*\)\s*$', '', name).strip()
|
|
for f in fields:
|
|
if f['name'] == bare.upper():
|
|
return True
|
|
return False
|
|
|
|
|
|
# ── MC/DC ──
|
|
|
|
def mcdc_sets(tree, fields=None):
|
|
"""Generate MC/DC constraint sets.
|
|
Returns list of (constraints_list, decision_outcome) or None for simple conditions.
|
|
Each constraint is (field, op, value, want_true).
|
|
"""
|
|
leaves = collect_leaves(tree)
|
|
n = len(leaves)
|
|
if n <= 1:
|
|
return None
|
|
# Evaluate all 2^n truth assignments
|
|
all_results = []
|
|
for bits in range(1 << n):
|
|
assignment = {}
|
|
for i, leaf in enumerate(leaves):
|
|
assignment[leaf] = bool(bits & (1 << i))
|
|
result = evaluate_tree(tree, assignment)
|
|
all_results.append((assignment, result))
|
|
# For each leaf, find a pair showing independent effect on decision
|
|
needed_pairs = {}
|
|
for leaf in leaves:
|
|
for a1, r1 in all_results:
|
|
if leaf in needed_pairs:
|
|
break
|
|
for a2, r2 in all_results:
|
|
if a1[leaf] != a2[leaf] and r1 != r2:
|
|
if all(a1[o] == a2[o] for o in leaves if o != leaf):
|
|
needed_pairs[leaf] = (dict(a1), r1, dict(a2), r2)
|
|
break
|
|
# Convert leaf assignments to constraint tuples
|
|
result = []
|
|
added = set()
|
|
for leaf, (a1, r1, a2, r2) in needed_pairs.items():
|
|
for assignment, decision in [(a1, r1), (a2, r2)]:
|
|
key = frozenset((l, assignment[l]) for l in leaves)
|
|
if key not in added:
|
|
added.add(key)
|
|
constraints = []
|
|
for l in leaves:
|
|
want = assignment[l]
|
|
constraints.append((l.field, l.op, l.value, want))
|
|
result.append((constraints, decision))
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
# ── 值计算 ──
|
|
|
|
def satisfying_value(field_info: dict, operator: str, value, want_true: bool) -> str:
|
|
ftype = field_info.get('type', 'unknown')
|
|
digits = field_info.get('digits', 0)
|
|
decimal = field_info.get('decimal', 0)
|
|
total = digits + decimal
|
|
|
|
if ftype == 'numeric':
|
|
try:
|
|
val_str = str(value)
|
|
val_float = float(val_str)
|
|
val_int = int(val_float * (10 ** decimal) + 0.5)
|
|
except (ValueError, TypeError):
|
|
val_int = 0
|
|
|
|
if want_true:
|
|
if operator == '>':
|
|
val_int = val_int + 1
|
|
elif operator in ('>=', '=', '<='):
|
|
pass
|
|
elif operator == '<':
|
|
val_int = max(0, val_int - 1)
|
|
elif operator == '<>':
|
|
val_int = (val_int + 1) % (10 ** total)
|
|
else:
|
|
if operator in ('>', '>='):
|
|
val_int = 0
|
|
elif operator == '=':
|
|
val_int = (val_int + 1) % (10 ** total)
|
|
elif operator == '<':
|
|
pass
|
|
elif operator == '<=':
|
|
val_int = val_int + 1
|
|
elif operator == '<>':
|
|
pass
|
|
|
|
val_int = val_int % (10 ** total)
|
|
int_part = str(val_int // (10 ** decimal)).zfill(digits)
|
|
dec_part = str(val_int % (10 ** decimal)).zfill(decimal)
|
|
if decimal == 0:
|
|
return int_part
|
|
return int_part + dec_part
|
|
|
|
elif ftype in ('alphanumeric', 'alphabetic'):
|
|
length = field_info.get('length', 1)
|
|
base_chr = value[0].upper() if isinstance(value, str) and value else 'A'
|
|
if want_true:
|
|
if operator in ('=', '=='):
|
|
return base_chr.ljust(length, base_chr)
|
|
elif operator in ('<>', '!='):
|
|
other = chr(65 + (ord(base_chr) - 64) % 26)
|
|
return other.ljust(length, other)
|
|
else:
|
|
if operator in ('=', '=='):
|
|
other = chr(65 + (ord(base_chr) - 64) % 26)
|
|
return other.ljust(length, other)
|
|
elif operator in ('<>', '!='):
|
|
return base_chr.ljust(length, base_chr)
|
|
|
|
return '0'.zfill(total)
|