fix: 生产级 COBOL 程序解析 — COPY + OCCURS TO + FD 修复
对抗性测试发现的生产程序解析缺陷和修复: 缺陷1: COPY 语句从未被预处理(18 个月 bug) - resolve_copybooks() 在 main() CLI 中调用但在 extract_structure() 路径中从未被调用 - 修复: preprocess() 函数头部调用 resolve_copybooks() - 不可解析的 COPY 行被移除(避免 Lark 在 FD 块内遇到无法识别的指令) 缺陷2: Lark 语法的 fd 规则要求 data_item+ (至少一个记录) - 生产程序 FD 可以通过 COPY 引入记录定义 - COPY 被移除后 FD 内无 data_item 导致 Lark 崩溃 - 修复: fd 改为 data_item* (零或多个) 缺陷3: OCCURS 1 TO 100 TIMES(变量范围表) - 语法只支持 OCCURS INT TIMES,不支持 OCCURS 1 TO 100 TIMES - 修复: occurs_clause 增加 'TO' INT 可选部分 效果: 4 个生产程序中 2 个成功解析(CRDVAL, GENDATA) - 剩余 2 个(CRDCALC, CRDRPT)因固定格式续行限制未修复 全回归: 767 passed(0 new failures)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
start: data_div_content
|
start: data_div_content
|
||||||
data_div_content: (file_section | working_storage | linkage)*
|
data_div_content: (file_section | working_storage | linkage)*
|
||||||
file_section: "FILE" "SECTION" DOT (fd | sd)+
|
file_section: "FILE" "SECTION" DOT (fd | sd)+
|
||||||
fd: "FD" NAME FD_SUFFIX data_item+
|
fd: "FD" NAME FD_SUFFIX data_item*
|
||||||
sd: "SD" NAME FD_SUFFIX data_item*
|
sd: "SD" NAME FD_SUFFIX data_item*
|
||||||
FD_SUFFIX: /(?:"[^"]*"|'[^']*'|[^.])*\./
|
FD_SUFFIX: /(?:"[^"]*"|'[^']*'|[^.])*\./
|
||||||
working_storage: "WORKING-STORAGE" "SECTION" DOT data_item*
|
working_storage: "WORKING-STORAGE" "SECTION" DOT data_item*
|
||||||
@@ -22,13 +22,13 @@ value_literal: INT | SIGNED_NUMBER | STRING | SQSTRING
|
|||||||
| "LOW-VALUE" | "LOW-VALUES"
|
| "LOW-VALUE" | "LOW-VALUES"
|
||||||
SQSTRING: /'[^']*'/
|
SQSTRING: /'[^']*'/
|
||||||
redefines_clause: "REDEFINES" NAME
|
redefines_clause: "REDEFINES" NAME
|
||||||
occurs_clause: "OCCURS" INT "TIMES"? ("DEPENDING" "ON" NAME)? key_clause? indexed_clause?
|
occurs_clause: "OCCURS" INT ("TO" INT)? "TIMES"? ("DEPENDING" "ON" NAME)? key_clause? indexed_clause?
|
||||||
key_clause: ("ASCENDING" | "DESCENDING") "KEY" "IS"? NAME (","? NAME)*
|
key_clause: ("ASCENDING" | "DESCENDING") "KEY" "IS"? NAME (","? NAME)*
|
||||||
indexed_clause: "INDEXED" "BY" NAME (","? NAME)*
|
indexed_clause: "INDEXED" "BY" NAME (","? NAME)*
|
||||||
usage_clause: USAGE_VAL
|
usage_clause: USAGE_VAL
|
||||||
USAGE_VAL: "COMP" | "COMP-3" | "COMP-5" | "BINARY" | "PACKED-DECIMAL" | "DISPLAY"
|
USAGE_VAL: "COMP" | "COMP-3" | "COMP-5" | "BINARY" | "PACKED-DECIMAL" | "DISPLAY"
|
||||||
LEVEL: /0[1-9]|[1-4][0-9]|49|77|88/
|
LEVEL: /0[1-9]|[1-4][0-9]|49|77|88/
|
||||||
NAME: /[A-Z][A-Z0-9-]*/
|
NAME: /[A-Z][A-Z0-9-]*/i
|
||||||
PICTURE_STRING: /[0-9A-Z()+,\-*\/V]+/i
|
PICTURE_STRING: /[0-9A-Z()+,\-*\/V]+/i
|
||||||
INT: /[0-9]+/
|
INT: /[0-9]+/
|
||||||
DOT: /\./
|
DOT: /\./
|
||||||
|
|||||||
@@ -27,6 +27,10 @@ def _is_fixed_format(source: str) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def preprocess(source: str) -> str:
|
def preprocess(source: str) -> str:
|
||||||
|
# COPY 预处理:展开或移除 COPY 语句
|
||||||
|
# Lark 语法不支持 COPY(这是预处理指令),必须在解析前处理
|
||||||
|
source = resolve_copybooks(source, '.')
|
||||||
|
|
||||||
fixed = _is_fixed_format(source)
|
fixed = _is_fixed_format(source)
|
||||||
lines = []
|
lines = []
|
||||||
for raw_line in source.splitlines():
|
for raw_line in source.splitlines():
|
||||||
@@ -110,10 +114,12 @@ def resolve_copybooks(source: str, source_dir: str) -> str:
|
|||||||
re.escape(old.strip()), new.strip(),
|
re.escape(old.strip()), new.strip(),
|
||||||
cb, flags=re.IGNORECASE
|
cb, flags=re.IGNORECASE
|
||||||
)
|
)
|
||||||
result.append(f' * COPY {name}')
|
# 展开 COPYBOOK 内容,不添加注释行(避免 Lark 在 FD 块内看到注释)
|
||||||
result.append(cb)
|
result.append(cb)
|
||||||
else:
|
else:
|
||||||
result.append(line)
|
# COPY 未找到时完全跳过(预处理指令,Lark 不应处理)
|
||||||
|
# 该行可能在 FD/SD 块内,保留会破坏 Lark 解析
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
result.append(line)
|
result.append(line)
|
||||||
return '\n'.join(result)
|
return '\n'.join(result)
|
||||||
|
|||||||
@@ -92,11 +92,12 @@ def test_resolve_copybooks_found():
|
|||||||
|
|
||||||
|
|
||||||
def test_resolve_copybooks_not_found():
|
def test_resolve_copybooks_not_found():
|
||||||
"""COPY 文件不存在时返回含 NOT FOUND 或 NOTEXIST 的文本"""
|
"""COPY 不可解析时移除该行(预处理器指令,Lark 不应处理)"""
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
src = " COPY NOTEXIST.\n"
|
src = " COPY NOTEXIST.\n"
|
||||||
result = resolve_copybooks(src, tmp)
|
result = resolve_copybooks(src, tmp)
|
||||||
assert "NOT FOUND" in result or "NOTEXIST" in result.upper()
|
# COPY 被移除(无残留)
|
||||||
|
assert "NOTEXIST" not in result.upper()
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_copybooks_no_copy():
|
def test_resolve_copybooks_no_copy():
|
||||||
|
|||||||
Reference in New Issue
Block a user