Files
cobol-java-v3/cobol_testgen/cond.py
T
NB-076 874b16f48c fix: code review issues #1-#9
1. cond.py: 删除重复裸字段代码块 (dedup bare field)
2. coverage.py: 移除_mark_perform无条件fallback (虚假覆盖)
3. pipeline_bridge.py: except:pass加日志记录异常
4. __init__.py: generate_data文档更新 (copybook_dirs等)
5. cond.py: is_field贪婪→非贪婪.*→.*?
6. coverage.py: 移除残留无条件Enter+Skip标记

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-25 10:20:18 +08:00

409 lines
15 KiB
Python

"""条件层:COBOL条件表达式解析 + MC/DC枚举 + 约束合并"""
import re
from .models import CondLeaf, CondAnd, CondOr, CondNot, PicInfo
# ── 条件解析 ──
def _split_at_operator(text, operator):
"""Split text on operator word, respecting parentheses."""
result = []
current = []
depth = 0
# Normalize so parentheses are space-delimited tokens
normalized = text.replace('(', ' ( ').replace(')', ' ) ')
for token in normalized.split():
if not token:
continue
if token == '(':
depth += 1
current.append(token)
elif token == ')':
depth -= 1
current.append(token)
elif token == operator and depth == 0:
result.append(' '.join(current).strip())
current = []
else:
current.append(token)
result.append(' '.join(current).strip())
return result
def parse_single_condition(text, fields=None):
"""Parse a COBOL condition into (field, operator, value) 3-tuple.
Handles:
- Basic: AMOUNT > 1000 → (AMOUNT, '>', '1000')
- 88-lev: STATUS-APPROVED → (parent, '=', value)
- NOT =: X NOT = 5 → (X, '<>', '5') (NOT = means <>)
- NOT >: X NOT > 5 → (X, '<=', '5')
- NOT <: X NOT < 5 → (X, '>=', '5')
- NOT 88: NOT WS-EOF-Y → (parent, '<>', value)
- Bare: WS-EOF → (WS-EOF, '=', 'Y')
- NOT bare: NOT WS-EOF → (WS-EOF, '<>', 'Y')
- NOT arith: A+B NOT = C → ('A+B', '<>', 'C')
- SQLCODE: SQLCODE = 100 → ('SQLCODE', '=', '100')
- SQLSTATE: SQLSTATE <> '02000' → ('SQLSTATE', '<>', '02000')
Returns None for compound (AND/OR) conditions.
"""
if ' AND ' in text or ' OR ' in text:
return None
text = text.strip()
if not text:
return None
text = re.sub(r'(\w)\s*\(', r'\1(', text)
field_name = text.split()[0] if text else ''
# AT END: synthetic condition from READ blocks
if text.upper() == 'AT END':
return ('_FILE_STATUS', '=', '10')
# SQLCODE special handling
if field_name.upper() == 'SQLCODE':
text_upper = text.upper()
if 'GREATER THAN 0' in text_upper or 'GREATER THAN ZERO' in text_upper:
return ('SQLCODE', '>', '0')
if 'LESS THAN 0' in text_upper:
return ('SQLCODE', '<', '0')
if '= 100' in text_upper:
return ('SQLCODE', '=', '100')
if 'NOT = 100' in text_upper:
return ('SQLCODE', '<>', '100')
# SQLSTATE special handling
if field_name.upper() == 'SQLSTATE':
normalized_sql = re.sub(r'\bNOT\s*=', '<>', text, flags=re.IGNORECASE)
m = re.match(r"SQLSTATE\s*(>=|<=|<>|>|<|=)\s*['\"]?(.+?)['\"]?\s*$", normalized_sql, re.IGNORECASE)
if m:
return ('SQLSTATE', m.group(1), m.group(2).strip().strip("'\""))
# Resolve 88-level condition names
if fields:
for f in fields:
if f.get('is_88') and f['name'] == text.upper():
return (f.get('parent', ''), '=', f.get('value', ''))
# NOT 88-level → invert operator
if f.get('is_88') and text.upper().startswith('NOT ') and f['name'] == text[4:].strip().upper():
return (f.get('parent', ''), '<>', f.get('value', ''))
# Strip OF qualifier: "STD-KEY OF MASTER-REC" → "STD-KEY"
if ' OF ' in text.upper():
text = text.split(' OF ')[0].strip()
# COBOL class condition: WS-KEY-DGT-N NUMERIC
if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE):
m = re.match(r'^([A-Z][A-Z0-9_-]*(?:\([^)]*\))?)\s+(NUMERIC|ALPHABETIC|ALPHABETIC-UPPER|POSITIVE|NEGATIVE|ZERO)\s*$', text, re.IGNORECASE)
return (m.group(1), '=', m.group(2).upper())
# Bare field reference (no operator, no NOT): WS-EOF → WS-EOF = 'Y'
if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s*$', text, re.IGNORECASE):
bare = re.match(r'^[A-Z][A-Z0-9_-]*', text, re.IGNORECASE)
field = bare.group(0) if bare else text
return (field, '=', 'Y')
# Bare NOT field reference (no operator): NOT WS-EOF → WS-EOF <> 'Y'
if text.upper().startswith('NOT ') and not re.search(r'(>=|<=|<>|>|<|=)', text):
fn = text[4:].strip()
if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?$', fn, re.IGNORECASE):
return (fn, '<>', 'Y')
# NOT at start of condition: NOT WS-X > 50 → WS-X <= 50
# Strip leading NOT, parse the inner condition, invert the operator
if text.upper().startswith('NOT '):
inner = text[4:].strip()
inner_parsed = None
# Try standard regex on inner text
m_inner = re.match(r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.*)$", inner)
if m_inner:
inv_op_map = {'=': '<>', '<>': '=', '>': '<=', '<': '>=', '>=': '<', '<=': '>'}
f = re.sub(r'\s*([(),])\s*', r'\1', m_inner.group(1))
bare = re.match(r"^(\w[\w-]*)", f)
if bare:
f = bare.group(1)
op = m_inner.group(2)
val = m_inner.group(3).strip().strip("'").strip('"')
inv = inv_op_map.get(op, op)
return (f, inv, val)
# Normalize COBOL NOT-operators: X NOT = Y → X <> Y
normalized = text
not_map = [
(r'\bNOT\s+>=', '<'), (r'\bNOT\s+<=', '>'),
(r'\bNOT\s+<>', '='), (r'\bNOT\s+=', '<>'),
(r'\bNOT\s+>', '<='), (r'\bNOT\s+<', '>='),
]
for pat, repl in not_map:
if re.search(pat, text, re.IGNORECASE):
normalized = re.sub(pat, repl, text, flags=re.IGNORECASE)
break
# FUNCTION call as left value: FUNCTION MOD(X, 2) NOT = 0 → _FUNC_MOD <> 0
if text.upper().startswith('FUNCTION '):
# After not_map normalization, NOT = has been converted to <>
func_match = re.match(
r'^FUNCTION\s+(\w+)\(([^)]*)\)\s*(>=|<=|<>|>|<|=)\s*(.*)$',
normalized, re.IGNORECASE
)
if func_match:
func_name = func_match.group(1).upper()
op = func_match.group(3)
val = func_match.group(4).strip().strip("'").strip('"')
return ('_FUNC_' + func_name, op, val)
# Arithmetic expression regex (lazy match allows spaces in field expr)
m = re.match(
r"^(\w[\w\s+\-*/().-]+?)\s*(>=|<=|<>|>|<|=)\s*(.+)$",
normalized
)
if m:
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1)).strip()
# Clean trailing ' NOT' that got swallowed by lazy match
if field.upper().endswith(' NOT'):
field = field[:-4].strip()
# Strip subscript: WS-KEY-DUP-CNT(WS-J) -> WS-KEY-DUP-CNT
bare_m = re.match(r'^(\w[\w-]*)', field)
if bare_m:
field = bare_m.group(1)
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
# Standard regex: FIELD OP VALUE
m = re.match(
r"^(\w[\w-]*(?:\s*\([^)]*\))?)\s*(>=|<=|<>|>|<|=)\s*(.*)$",
normalized
)
if m:
field = re.sub(r'\s*([(),])\s*', r'\1', m.group(1))
# Strip subscript/substring for matching: CDR-ID(1:3) -> CDR-ID
bare_m = re.match(r'^\w[\w-]*', field)
if bare_m:
field = bare_m.group(0)
return (field, m.group(2), m.group(3).strip().strip("'").strip('"'))
# Bare field: WS-EOF (no operator) -> WS-EOF = 'Y'
if re.match(r'^[A-Z][A-Z0-9_-]*(?:\([^)]*\))?\s*$', text, re.IGNORECASE):
bare = re.match(r'^[A-Z][A-Z0-9_-]*', text, re.IGNORECASE)
field = bare.group(0) if bare else text
return (field, '=', 'Y')
return None
def parse_compound_condition(text, fields=None):
"""Parse a COBOL condition into a condition tree (AND/OR/LEAF).
Handles AND > OR precedence and parentheses.
"""
text = text.strip()
if not text:
return None
# Normalize parentheses to be space-delimited for reliable tokenization
text = text.replace('(', ' ( ').replace(')', ' ) ')
text = re.sub(r'\s+', ' ', text).strip()
# Strip outer parentheses
if text.startswith('(') and text.endswith(')'):
depth = 0
wrapped = True
for i, c in enumerate(text):
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0 and i < len(text) - 1:
wrapped = False
break
if wrapped:
inner = parse_compound_condition(text[1:-1], fields)
if inner:
return inner
# Split on OR (lowest precedence)
parts = _split_at_operator(text, 'OR')
if len(parts) > 1:
node = parse_compound_condition(parts[0], fields)
for p in parts[1:]:
node = CondOr(node, parse_compound_condition(p, fields))
return node
# Split on AND
parts = _split_at_operator(text, 'AND')
if len(parts) > 1:
node = parse_compound_condition(parts[0], fields)
for p in parts[1:]:
node = CondAnd(node, parse_compound_condition(p, fields))
return node
# NOT prefix (highest precedence, after AND/OR splitting)
if text.upper().startswith('NOT '):
inner = parse_compound_condition(text[4:].strip(), fields)
return CondNot(inner) if inner else None
# Leaf condition
parsed = parse_single_condition(text, fields)
if parsed:
return CondLeaf(*parsed)
return None
def collect_leaves(tree):
"""Return list of all CondLeaf nodes in the tree."""
if isinstance(tree, CondLeaf):
return [tree]
elif isinstance(tree, CondNot):
return collect_leaves(tree.child)
elif isinstance(tree, (CondAnd, CondOr)):
return collect_leaves(tree.left) + collect_leaves(tree.right)
return []
def evaluate_tree(tree, assignment):
"""Evaluate condition tree given leaf→bool assignment dict."""
if isinstance(tree, CondLeaf):
return assignment[tree]
elif isinstance(tree, CondNot):
return not evaluate_tree(tree.child, assignment)
elif isinstance(tree, CondAnd):
return evaluate_tree(tree.left, assignment) and evaluate_tree(tree.right, assignment)
elif isinstance(tree, CondOr):
return evaluate_tree(tree.left, assignment) or evaluate_tree(tree.right, assignment)
return False
def is_field(name, fields):
# Strip subscript: WS-ITEM-STATUS(WS-INDEX-VAR) -> WS-ITEM-STATUS
bare = re.sub(r'\s*\(.*?\)\s*$', '', name).strip()
for f in fields:
if f['name'] == bare.upper():
return True
return False
# ── MC/DC ──
def mcdc_sets(tree, fields=None):
"""Generate MC/DC constraint sets.
Returns list of (constraints_list, decision_outcome) or None for simple conditions.
Each constraint is (field, op, value, want_true).
"""
leaves = collect_leaves(tree)
n = len(leaves)
if n <= 1:
return None
# Evaluate all 2^n truth assignments
all_results = []
for bits in range(1 << n):
assignment = {}
for i, leaf in enumerate(leaves):
assignment[leaf] = bool(bits & (1 << i))
result = evaluate_tree(tree, assignment)
all_results.append((assignment, result))
# For each leaf, find a pair showing independent effect on decision
needed_pairs = {}
for leaf in leaves:
for a1, r1 in all_results:
if leaf in needed_pairs:
break
for a2, r2 in all_results:
if a1[leaf] != a2[leaf] and r1 != r2:
if all(a1[o] == a2[o] for o in leaves if o != leaf):
needed_pairs[leaf] = (dict(a1), r1, dict(a2), r2)
break
# Convert leaf assignments to constraint tuples
result = []
added = set()
for leaf, (a1, r1, a2, r2) in needed_pairs.items():
for assignment, decision in [(a1, r1), (a2, r2)]:
key = frozenset((l, assignment[l]) for l in leaves)
if key not in added:
added.add(key)
constraints = []
for l in leaves:
want = assignment[l]
constraints.append((l.field, l.op, l.value, want))
result.append((constraints, decision))
return result
# ── 值计算 ──
def satisfying_value(field_info: dict, operator: str, value, want_true: bool) -> str:
ftype = field_info.get('type', 'unknown')
digits = field_info.get('digits', 0)
decimal = field_info.get('decimal', 0)
total = digits + decimal
if ftype == 'numeric':
try:
val_str = str(value)
val_float = float(val_str)
val_int = int(val_float * (10 ** decimal) + 0.5)
except (ValueError, TypeError):
val_int = 0
if want_true:
if operator == '>':
val_int = val_int + 1
elif operator in ('>=', '=', '<='):
pass
elif operator == '<':
val_int = max(0, val_int - 1)
elif operator == '<>':
val_int = (val_int + 1) % (10 ** total)
else:
if operator in ('>', '>='):
val_int = 0
elif operator == '=':
val_int = (val_int + 1) % (10 ** total)
elif operator == '<':
pass
elif operator == '<=':
val_int = val_int + 1
elif operator == '<>':
pass
val_int = val_int % (10 ** total)
int_part = str(val_int // (10 ** decimal)).zfill(digits)
dec_part = str(val_int % (10 ** decimal)).zfill(decimal)
if decimal == 0:
return int_part
return int_part + dec_part
elif ftype in ('alphanumeric', 'alphabetic'):
length = field_info.get('length', 1)
base_chr = value[0].upper() if isinstance(value, str) and value else 'A'
if want_true:
if operator in ('=', '=='):
return base_chr.ljust(length, base_chr)
elif operator in ('<>', '!='):
other = chr(65 + (ord(base_chr) - 64) % 26)
return other.ljust(length, other)
elif operator == '>':
sv = str(value)[:length].ljust(length)
chars = list(sv)
last = chars[-1]
if last not in '9Zz':
chars[-1] = chr(ord(last) + 1)
return ''.join(chars)
elif operator == '<':
sv = str(value)[:length].ljust(length)
chars = list(sv)
last = chars[-1]
if last == ' ':
pass
elif last in '0Aa':
chars[-1] = ' '
else:
chars[-1] = chr(ord(last) - 1)
return ''.join(chars)
else:
if operator in ('=', '=='):
other = chr(65 + (ord(base_chr) - 64) % 26)
return other.ljust(length, other)
elif operator in ('<>', '!='):
return base_chr.ljust(length, base_chr)
elif operator in ('>', '<'):
return str(value)[:length].ljust(length)
return '0'.zfill(total)