Files
cobol-java-v3/tests/cobol_testgen/test_core.py
hangshuo652 bc1d56d1a4 feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00

184 lines
5.6 KiB
Python

"""CE-01~09: cobol_testgen core 模块 — PROCEDURE DIVISION 解析 + 数据流"""
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from cobol_testgen.core import (
scan_paragraphs, build_branch_tree, _basename, _init_child_names,
trace_to_root,
)
from cobol_testgen.models import BrSeq, BrIf, BrEval
# ── CE-01~02: scan_paragraphs ──
def test_scan_paragraphs_normal():
"""CE-01: 3段落扫描"""
lines = [
" MAIN-PROC.",
" MOVE 1 TO A.",
" SUB-ROUTINE.",
" MOVE 2 TO B.",
" CLEANUP.",
" MOVE 0 TO C.",
]
paras = scan_paragraphs(lines)
assert len(paras) == 3
assert "MAIN-PROC" in paras
assert "SUB-ROUTINE" in paras
assert "CLEANUP" in paras
def test_scan_paragraphs_scope_enders():
"""段落不以作用域结束符命名"""
for ender in ["END-IF", "ELSE", "WHEN", "OTHER", "END-PERFORM"]:
lines = [f" {ender}."]
paras = scan_paragraphs(lines)
assert ender not in paras
def test_scan_paragraphs_section():
"""SECTION 也被识别"""
lines = [
" MAIN SECTION.",
" MOVE 1 TO A.",
" END SECTION.",
]
paras = scan_paragraphs(lines)
assert "MAIN" in paras
def test_scan_paragraphs_empty():
"""空行 → 空段落"""
assert scan_paragraphs([]) == {}
def test_scan_paragraphs_only_code():
"""无段落标记的纯代码 → 空"""
lines = [" MOVE 1 TO A.", " DISPLAY A."]
assert scan_paragraphs(lines) == {}
# ── CE-03~06: build_branch_tree ──
def test_build_branch_tree_if():
"""CE-03: IF 语句 → BrIf 节点"""
proc_text = " MAIN-PROC.\n IF A > 100\n MOVE 1 TO B\n ELSE\n MOVE 2 TO B\n END-IF."
tree, assignments = build_branch_tree(proc_text)
assert tree is not None
assert len(tree.children) > 0
# find the BrIf node
def find_if(seq):
for c in seq.children:
if isinstance(c, BrIf):
return c
return None
brif = find_if(tree)
assert brif is not None, "BrIf node should exist"
assert brif.condition is not None
def test_build_branch_tree_empty():
"""空 PROCEDURE DIVISION → BrSeq"""
tree, _ = build_branch_tree("")
assert isinstance(tree, BrSeq)
def test_build_branch_tree_no_branches():
"""纯 MOVE 语句无分支"""
proc_text = " MAIN-PROC.\n MOVE 1 TO A.\n MOVE 2 TO B."
tree, _ = build_branch_tree(proc_text)
assert isinstance(tree, BrSeq)
assert len(tree.children) >= 2
def test_build_branch_tree_evaluate():
"""CE-04: EVALUATE → BrEval 节点"""
proc_text = " MAIN-PROC.\n EVALUATE X\n WHEN 1\n MOVE 1 TO A\n WHEN 2\n MOVE 2 TO A\n WHEN OTHER\n MOVE 0 TO A\n END-EVALUATE."
tree, _ = build_branch_tree(proc_text)
def find_eval(seq):
for c in seq.children:
if isinstance(c, BrEval):
return c
return None
breval = find_eval(tree)
assert breval is not None, "BrEval node should exist"
assert breval.has_other
def test_build_branch_tree_nested_if():
"""CE-03 延伸: 嵌套 IF"""
proc_text = " MAIN-PROC.\n IF A > 0\n IF B < 5\n MOVE 1 TO C\n END-IF\n END-IF."
tree, _ = build_branch_tree(proc_text)
assert isinstance(tree, BrSeq)
assert len(tree.children) > 0
# ── _basename ──
def test_basename_simple():
"""无下标 → 原名返回"""
assert _basename("WS-AMOUNT") == "WS-AMOUNT"
def test_basename_subscript():
"""有下标 → 去除下标"""
assert _basename("WS-TABLE(1)") == "WS-TABLE"
def test_basename_nested_subscript():
"""嵌套下标 WS-TABLE(WS-INDEX)"""
assert _basename("WS-TABLE(WS-INDEX)") == "WS-TABLE"
# ── _init_child_names ──
def test_init_child_names_basic():
"""组字段收集子字段"""
fields = [
{"name": "WS-GROUP", "level": 5},
{"name": "WS-ITEM1", "level": 10, "pic_info": {"type": "numeric"}},
{"name": "WS-ITEM2", "level": 10, "pic_info": {"type": "numeric"}},
]
children = _init_child_names("WS-GROUP", fields)
assert "WS-ITEM1" in children
assert "WS-ITEM2" in children
# ── trace_to_root ──
def test_trace_to_root_direct():
"""直接赋值追溯"""
assignments = {"WS-RESULT": [{"source_vars": ["WS-INPUT"]}]}
root, chain = trace_to_root("WS-RESULT", assignments, [])
assert root == "WS-INPUT"
assert len(chain) >= 1
def test_trace_to_root_no_source():
"""无源字段 → 自身"""
assignments = {"WS-RESULT": [{"source_vars": []}]}
root, chain = trace_to_root("WS-RESULT", assignments, [])
assert root == "WS-RESULT"
def test_trace_to_root_chain():
"""多级追溯 WS-RESULT → WS-TEMP → WS-INPUT"""
assignments = {
"WS-RESULT": [{"source_vars": ["WS-TEMP"]}],
"WS-TEMP": [{"source_vars": ["WS-INPUT"]}],
}
root, chain = trace_to_root("WS-RESULT", assignments, [])
assert root == "WS-INPUT"
assert len(chain) == 2
def test_trace_to_root_cycle():
"""循环引用 → 不无限循环"""
assignments = {
"WS-A": [{"source_vars": ["WS-B"]}],
"WS-B": [{"source_vars": ["WS-A"]}],
}
root, chain = trace_to_root("WS-A", assignments, [])
assert root is not None
assert isinstance(chain, list)