feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,210 @@
|
||||
"""RD-01~13: cobol_testgen read 模块 — 预处理 / DATA DIVISION / PIC / COPY"""
|
||||
|
||||
import sys, os, tempfile
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
from cobol_testgen.read import (
|
||||
preprocess, _is_fixed_format, extract_data_division, extract_procedure_division,
|
||||
resolve_copybooks, parse_pic, parse_data_division,
|
||||
parse_file_control, scan_open_statements,
|
||||
)
|
||||
from cobol_testgen.models import PicInfo, FieldDef
|
||||
|
||||
|
||||
# ── RD-01~02: preprocess ──
|
||||
|
||||
def test_is_fixed_format_yes():
|
||||
"""7桁目*/ 等 → fixed"""
|
||||
src = "000100* COMMENT\n000200 MOVE A TO B.\n"
|
||||
assert _is_fixed_format(src) is True
|
||||
|
||||
|
||||
def test_is_fixed_format_free():
|
||||
""">>SOURCE FORMAT IS FREE → free"""
|
||||
src = ">>SOURCE FORMAT IS FREE\nMOVE A TO B."
|
||||
assert _is_fixed_format(src) is False
|
||||
|
||||
|
||||
def test_preprocess_fixed_removes_comment():
|
||||
"""RD-01: 固定格式 去除 * 注释行"""
|
||||
src = "000100* THIS IS COMMENT\n000200 MOVE 1 TO A.\n"
|
||||
out = preprocess(src)
|
||||
assert "* THIS IS COMMENT" not in out
|
||||
assert "MOVE 1 TO A" in out
|
||||
|
||||
|
||||
def test_preprocess_free_strips_inline_comment():
|
||||
"""RD-02: 自由格式 去除 *> 行内注释"""
|
||||
src = ">>SOURCE FORMAT IS FREE\nMOVE 1 TO A. *> this is comment"
|
||||
out = preprocess(src)
|
||||
assert "*>" not in out
|
||||
|
||||
|
||||
def test_preprocess_empty():
|
||||
"""空字符串 → 空"""
|
||||
assert preprocess("") == ""
|
||||
|
||||
|
||||
def test_preprocess_free_uppercase():
|
||||
"""自由格式大写转换"""
|
||||
src = ">>SOURCE FORMAT IS FREE\nmove 1 to a."
|
||||
out = preprocess(src)
|
||||
assert "MOVE 1 TO A" in out
|
||||
|
||||
|
||||
# ── extract_data_division / extract_procedure_division ──
|
||||
|
||||
def test_extract_data_division():
|
||||
"""RD-05: 提取 DATA DIVISION 文本"""
|
||||
src = "IDENTIFICATION DIVISION.\nDATA DIVISION.\nWORKING-STORAGE SECTION.\n01 WS-A PIC 9.\nPROCEDURE DIVISION.\nSTOP RUN."
|
||||
dd = extract_data_division(src)
|
||||
assert "WORKING-STORAGE" in dd
|
||||
assert "PROCEDURE DIVISION" not in dd
|
||||
|
||||
|
||||
def test_extract_data_division_not_found():
|
||||
"""无 DATA DIVISION → 空字符串"""
|
||||
assert extract_data_division("PROCEDURE DIVISION.") == ""
|
||||
|
||||
|
||||
def test_extract_procedure_division():
|
||||
"""提取 PROCEDURE DIVISION"""
|
||||
src = "DATA DIVISION.\nPROCEDURE DIVISION.\nSTOP RUN."
|
||||
pd = extract_procedure_division(src)
|
||||
assert "PROCEDURE DIVISION" in pd
|
||||
|
||||
|
||||
def test_extract_procedure_division_not_found():
|
||||
"""无 PROCEDURE DIVISION → 空字符串"""
|
||||
assert extract_procedure_division("DATA DIVISION.") == ""
|
||||
|
||||
|
||||
# ── resolve_copybooks ──
|
||||
|
||||
def test_resolve_copybooks_found():
|
||||
"""RD-03: COPY 文件存在时展开"""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
cpy_path = os.path.join(tmp, "MYCPY.cpy")
|
||||
with open(cpy_path, "w") as f:
|
||||
f.write("01 WS-FIELD PIC 9.\n")
|
||||
src = " COPY MYCPY.\n"
|
||||
result = resolve_copybooks(src, tmp)
|
||||
assert "WS-FIELD" in result
|
||||
|
||||
|
||||
def test_resolve_copybooks_not_found():
|
||||
"""COPY 文件不存在时返回含 NOT FOUND 或 NOTEXIST 的文本"""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
src = " COPY NOTEXIST.\n"
|
||||
result = resolve_copybooks(src, tmp)
|
||||
assert "NOT FOUND" in result or "NOTEXIST" in result.upper()
|
||||
|
||||
|
||||
def test_resolve_copybooks_no_copy():
|
||||
"""无 COPY 语句 → 原文不变"""
|
||||
result = resolve_copybooks(" MOVE 1 TO A.\n", "/tmp")
|
||||
assert "MOVE 1 TO A" in result
|
||||
|
||||
|
||||
# ── RD-06~08: parse_pic ──
|
||||
|
||||
def test_parse_pic_simple():
|
||||
"""RD-06: PIC 9(4) → numeric, digits=4"""
|
||||
info = parse_pic("9(4)")
|
||||
assert info.type == "numeric"
|
||||
assert info.digits == 4
|
||||
assert info.decimal == 0
|
||||
|
||||
|
||||
def test_parse_pic_signed_decimal():
|
||||
"""RD-07: PIC S9(7)V99 → signed, digits=9, decimal=2"""
|
||||
info = parse_pic("S9(7)V99")
|
||||
assert info.signed is True
|
||||
assert info.digits == 7
|
||||
assert info.decimal == 2
|
||||
|
||||
|
||||
def test_parse_pic_alpha():
|
||||
"""PIC X(10) → alphanumeric, length=10"""
|
||||
info = parse_pic("X(10)")
|
||||
assert info.type == "alphanumeric"
|
||||
assert info.length == 10
|
||||
|
||||
|
||||
def test_parse_pic_alphabetic():
|
||||
"""PIC A(5) → alphabetic, length=5"""
|
||||
info = parse_pic("A(5)")
|
||||
assert info.type == "alphabetic"
|
||||
assert info.length == 5
|
||||
|
||||
|
||||
def test_parse_pic_numeric_edited():
|
||||
"""PIC Z(7).99 → numeric-edited"""
|
||||
info = parse_pic("Z(7).99")
|
||||
assert info.type == "numeric-edited"
|
||||
|
||||
|
||||
def test_parse_pic_empty():
|
||||
"""空字符串 → type=unknown"""
|
||||
info = parse_pic("")
|
||||
assert info.type == "unknown"
|
||||
|
||||
|
||||
# ── parse_data_division ──
|
||||
|
||||
def test_parse_data_division_basic():
|
||||
"""RD-09: 简单 DATA DIVISION 解析层级(需要 SECTION 头)"""
|
||||
dd = "WORKING-STORAGE SECTION.\n 01 WS-GROUP.\n 05 WS-ITEM PIC 9(4).\n 05 WS-AMOUNT PIC S9(7)V99 COMP-3.\n"
|
||||
fields = parse_data_division(dd)
|
||||
names = [f.name for f in fields]
|
||||
assert "WS-ITEM" in names
|
||||
assert "WS-AMOUNT" in names
|
||||
|
||||
|
||||
def test_parse_data_division_88():
|
||||
"""RD-10: 88-level 识别"""
|
||||
dd = "WORKING-STORAGE SECTION.\n 01 WS-STATUS PIC X.\n 88 WS-APPROVED VALUE 'A'.\n 88 WS-REJECTED VALUE 'R'.\n"
|
||||
fields = parse_data_division(dd)
|
||||
eights = [f for f in fields if f.is_88]
|
||||
assert len(eights) >= 2
|
||||
|
||||
|
||||
def test_parse_data_division_redefines():
|
||||
"""RD-11: REDEFINES 识别"""
|
||||
dd = "WORKING-STORAGE SECTION.\n 01 WS-BLOCK PIC X(10).\n 01 WS-BLOCK-REDEF REDEFINES WS-BLOCK.\n 05 WS-AMOUNT PIC 9(10).\n"
|
||||
fields = parse_data_division(dd)
|
||||
redef = [f for f in fields if f.redefines]
|
||||
assert len(redef) >= 1
|
||||
assert redef[0].redefines == "WS-BLOCK"
|
||||
|
||||
|
||||
def test_parse_data_division_occurs():
|
||||
"""RD-12: OCCURS 识别"""
|
||||
dd = "WORKING-STORAGE SECTION.\n 01 WS-TABLE.\n 05 WS-ENTRY PIC 9(5) OCCURS 10 TIMES.\n"
|
||||
fields = parse_data_division(dd)
|
||||
occurs = [f for f in fields if f.occurs_count > 0]
|
||||
assert len(occurs) >= 1
|
||||
assert occurs[0].occurs_count == 10
|
||||
|
||||
|
||||
# ── parse_file_control ──
|
||||
|
||||
def test_parse_file_control():
|
||||
"""FILE-CONTROL 解析"""
|
||||
src = "FILE-CONTROL.\n SELECT INFILE ASSIGN TO 'INPUT.DAT'.\n SELECT OUTFILE ASSIGN TO 'OUTPUT.DAT'.\nDATA DIVISION."
|
||||
fc = parse_file_control(src)
|
||||
assert "INFILE" in fc
|
||||
assert "OUTFILE" in fc
|
||||
|
||||
|
||||
def test_parse_file_control_not_found():
|
||||
"""无 FILE-CONTROL → 空 dict"""
|
||||
assert parse_file_control("DATA DIVISION.") == {}
|
||||
|
||||
|
||||
# ── scan_open_statements ──
|
||||
|
||||
def test_scan_open_statements():
|
||||
"""OPEN 语句扫描"""
|
||||
src = "PROCEDURE DIVISION.\n OPEN INPUT INFILE.\n OPEN OUTPUT OUTFILE."
|
||||
opens = scan_open_statements(src)
|
||||
assert len(opens) >= 2
|
||||
Reference in New Issue
Block a user