Files
cobol-java-v3/tests/cobol_testgen/test_read.py
T
NB-076 4be2aae66d fix: 生产级 COBOL 程序解析 — COPY + OCCURS TO + FD 修复
对抗性测试发现的生产程序解析缺陷和修复:

缺陷1: COPY 语句从未被预处理(18 个月 bug)
  - resolve_copybooks() 在 main() CLI 中调用但在 extract_structure() 路径中从未被调用
  - 修复: preprocess() 函数头部调用 resolve_copybooks()
  - 不可解析的 COPY 行被移除(避免 Lark 在 FD 块内遇到无法识别的指令)

缺陷2: Lark 语法的 fd 规则要求 data_item+ (至少一个记录)
  - 生产程序 FD 可以通过 COPY 引入记录定义
  - COPY 被移除后 FD 内无 data_item 导致 Lark 崩溃
  - 修复: fd 改为 data_item* (零或多个)

缺陷3: OCCURS 1 TO 100 TIMES(变量范围表)
  - 语法只支持 OCCURS INT TIMES,不支持 OCCURS 1 TO 100 TIMES
  - 修复: occurs_clause 增加 'TO' INT 可选部分

效果: 4 个生产程序中 2 个成功解析(CRDVAL, GENDATA)
  - 剩余 2 个(CRDCALC, CRDRPT)因固定格式续行限制未修复

全回归: 767 passed(0 new failures)
2026-06-21 16:13:58 +08:00

212 lines
6.6 KiB
Python

"""RD-01~13: cobol_testgen read 模块 — 预处理 / DATA DIVISION / PIC / COPY"""
import sys, os, tempfile
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from cobol_testgen.read import (
preprocess, _is_fixed_format, extract_data_division, extract_procedure_division,
resolve_copybooks, parse_pic, parse_data_division,
parse_file_control, scan_open_statements,
)
from cobol_testgen.models import PicInfo, FieldDef
# ── RD-01~02: preprocess ──
def test_is_fixed_format_yes():
"""7桁目*/ 等 → fixed"""
src = "000100* COMMENT\n000200 MOVE A TO B.\n"
assert _is_fixed_format(src) is True
def test_is_fixed_format_free():
""">>SOURCE FORMAT IS FREE → free"""
src = ">>SOURCE FORMAT IS FREE\nMOVE A TO B."
assert _is_fixed_format(src) is False
def test_preprocess_fixed_removes_comment():
"""RD-01: 固定格式 去除 * 注释行"""
src = "000100* THIS IS COMMENT\n000200 MOVE 1 TO A.\n"
out = preprocess(src)
assert "* THIS IS COMMENT" not in out
assert "MOVE 1 TO A" in out
def test_preprocess_free_strips_inline_comment():
"""RD-02: 自由格式 去除 *> 行内注释"""
src = ">>SOURCE FORMAT IS FREE\nMOVE 1 TO A. *> this is comment"
out = preprocess(src)
assert "*>" not in out
def test_preprocess_empty():
"""空字符串 → 空"""
assert preprocess("") == ""
def test_preprocess_free_uppercase():
"""自由格式大写转换"""
src = ">>SOURCE FORMAT IS FREE\nmove 1 to a."
out = preprocess(src)
assert "MOVE 1 TO A" in out
# ── extract_data_division / extract_procedure_division ──
def test_extract_data_division():
"""RD-05: 提取 DATA DIVISION 文本"""
src = "IDENTIFICATION DIVISION.\nDATA DIVISION.\nWORKING-STORAGE SECTION.\n01 WS-A PIC 9.\nPROCEDURE DIVISION.\nSTOP RUN."
dd = extract_data_division(src)
assert "WORKING-STORAGE" in dd
assert "PROCEDURE DIVISION" not in dd
def test_extract_data_division_not_found():
"""无 DATA DIVISION → 空字符串"""
assert extract_data_division("PROCEDURE DIVISION.") == ""
def test_extract_procedure_division():
"""提取 PROCEDURE DIVISION"""
src = "DATA DIVISION.\nPROCEDURE DIVISION.\nSTOP RUN."
pd = extract_procedure_division(src)
assert "PROCEDURE DIVISION" in pd
def test_extract_procedure_division_not_found():
"""无 PROCEDURE DIVISION → 空字符串"""
assert extract_procedure_division("DATA DIVISION.") == ""
# ── resolve_copybooks ──
def test_resolve_copybooks_found():
"""RD-03: COPY 文件存在时展开"""
with tempfile.TemporaryDirectory() as tmp:
cpy_path = os.path.join(tmp, "MYCPY.cpy")
with open(cpy_path, "w") as f:
f.write("01 WS-FIELD PIC 9.\n")
src = " COPY MYCPY.\n"
result = resolve_copybooks(src, tmp)
assert "WS-FIELD" in result
def test_resolve_copybooks_not_found():
"""COPY 不可解析时移除该行(预处理器指令,Lark 不应处理)"""
with tempfile.TemporaryDirectory() as tmp:
src = " COPY NOTEXIST.\n"
result = resolve_copybooks(src, tmp)
# COPY 被移除(无残留)
assert "NOTEXIST" not in result.upper()
def test_resolve_copybooks_no_copy():
"""无 COPY 语句 → 原文不变"""
result = resolve_copybooks(" MOVE 1 TO A.\n", "/tmp")
assert "MOVE 1 TO A" in result
# ── RD-06~08: parse_pic ──
def test_parse_pic_simple():
"""RD-06: PIC 9(4) → numeric, digits=4"""
info = parse_pic("9(4)")
assert info.type == "numeric"
assert info.digits == 4
assert info.decimal == 0
def test_parse_pic_signed_decimal():
"""RD-07: PIC S9(7)V99 → signed, digits=9, decimal=2"""
info = parse_pic("S9(7)V99")
assert info.signed is True
assert info.digits == 7
assert info.decimal == 2
def test_parse_pic_alpha():
"""PIC X(10) → alphanumeric, length=10"""
info = parse_pic("X(10)")
assert info.type == "alphanumeric"
assert info.length == 10
def test_parse_pic_alphabetic():
"""PIC A(5) → alphabetic, length=5"""
info = parse_pic("A(5)")
assert info.type == "alphabetic"
assert info.length == 5
def test_parse_pic_numeric_edited():
"""PIC Z(7).99 → numeric-edited"""
info = parse_pic("Z(7).99")
assert info.type == "numeric-edited"
def test_parse_pic_empty():
"""空字符串 → type=unknown"""
info = parse_pic("")
assert info.type == "unknown"
# ── parse_data_division ──
def test_parse_data_division_basic():
"""RD-09: 简单 DATA DIVISION 解析层级(需要 SECTION 头)"""
dd = "WORKING-STORAGE SECTION.\n 01 WS-GROUP.\n 05 WS-ITEM PIC 9(4).\n 05 WS-AMOUNT PIC S9(7)V99 COMP-3.\n"
fields = parse_data_division(dd)
names = [f.name for f in fields]
assert "WS-ITEM" in names
assert "WS-AMOUNT" in names
def test_parse_data_division_88():
"""RD-10: 88-level 识别"""
dd = "WORKING-STORAGE SECTION.\n 01 WS-STATUS PIC X.\n 88 WS-APPROVED VALUE 'A'.\n 88 WS-REJECTED VALUE 'R'.\n"
fields = parse_data_division(dd)
eights = [f for f in fields if f.is_88]
assert len(eights) >= 2
def test_parse_data_division_redefines():
"""RD-11: REDEFINES 识别"""
dd = "WORKING-STORAGE SECTION.\n 01 WS-BLOCK PIC X(10).\n 01 WS-BLOCK-REDEF REDEFINES WS-BLOCK.\n 05 WS-AMOUNT PIC 9(10).\n"
fields = parse_data_division(dd)
redef = [f for f in fields if f.redefines]
assert len(redef) >= 1
assert redef[0].redefines == "WS-BLOCK"
def test_parse_data_division_occurs():
"""RD-12: OCCURS 识别"""
dd = "WORKING-STORAGE SECTION.\n 01 WS-TABLE.\n 05 WS-ENTRY PIC 9(5) OCCURS 10 TIMES.\n"
fields = parse_data_division(dd)
occurs = [f for f in fields if f.occurs_count > 0]
assert len(occurs) >= 1
assert occurs[0].occurs_count == 10
# ── parse_file_control ──
def test_parse_file_control():
"""FILE-CONTROL 解析"""
src = "FILE-CONTROL.\n SELECT INFILE ASSIGN TO 'INPUT.DAT'.\n SELECT OUTFILE ASSIGN TO 'OUTPUT.DAT'.\nDATA DIVISION."
fc = parse_file_control(src)
assert "INFILE" in fc
assert "OUTFILE" in fc
def test_parse_file_control_not_found():
"""无 FILE-CONTROL → 空 dict"""
assert parse_file_control("DATA DIVISION.") == {}
# ── scan_open_statements ──
def test_scan_open_statements():
"""OPEN 语句扫描"""
src = "PROCEDURE DIVISION.\n OPEN INPUT INFILE.\n OPEN OUTPUT OUTFILE."
opens = scan_open_statements(src)
assert len(opens) >= 2