bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
184 lines
5.6 KiB
Python
184 lines
5.6 KiB
Python
"""CE-01~09: cobol_testgen core 模块 — PROCEDURE DIVISION 解析 + 数据流"""
|
|
|
|
import sys, os
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
|
from cobol_testgen.core import (
|
|
scan_paragraphs, build_branch_tree, _basename, _init_child_names,
|
|
trace_to_root,
|
|
)
|
|
from cobol_testgen.models import BrSeq, BrIf, BrEval
|
|
|
|
|
|
# ── CE-01~02: scan_paragraphs ──
|
|
|
|
def test_scan_paragraphs_normal():
|
|
"""CE-01: 3段落扫描"""
|
|
lines = [
|
|
" MAIN-PROC.",
|
|
" MOVE 1 TO A.",
|
|
" SUB-ROUTINE.",
|
|
" MOVE 2 TO B.",
|
|
" CLEANUP.",
|
|
" MOVE 0 TO C.",
|
|
]
|
|
paras = scan_paragraphs(lines)
|
|
assert len(paras) == 3
|
|
assert "MAIN-PROC" in paras
|
|
assert "SUB-ROUTINE" in paras
|
|
assert "CLEANUP" in paras
|
|
|
|
|
|
def test_scan_paragraphs_scope_enders():
|
|
"""段落不以作用域结束符命名"""
|
|
for ender in ["END-IF", "ELSE", "WHEN", "OTHER", "END-PERFORM"]:
|
|
lines = [f" {ender}."]
|
|
paras = scan_paragraphs(lines)
|
|
assert ender not in paras
|
|
|
|
|
|
def test_scan_paragraphs_section():
|
|
"""SECTION 也被识别"""
|
|
lines = [
|
|
" MAIN SECTION.",
|
|
" MOVE 1 TO A.",
|
|
" END SECTION.",
|
|
]
|
|
paras = scan_paragraphs(lines)
|
|
assert "MAIN" in paras
|
|
|
|
|
|
def test_scan_paragraphs_empty():
|
|
"""空行 → 空段落"""
|
|
assert scan_paragraphs([]) == {}
|
|
|
|
|
|
def test_scan_paragraphs_only_code():
|
|
"""无段落标记的纯代码 → 空"""
|
|
lines = [" MOVE 1 TO A.", " DISPLAY A."]
|
|
assert scan_paragraphs(lines) == {}
|
|
|
|
|
|
# ── CE-03~06: build_branch_tree ──
|
|
|
|
def test_build_branch_tree_if():
|
|
"""CE-03: IF 语句 → BrIf 节点"""
|
|
proc_text = " MAIN-PROC.\n IF A > 100\n MOVE 1 TO B\n ELSE\n MOVE 2 TO B\n END-IF."
|
|
tree, assignments = build_branch_tree(proc_text)
|
|
assert tree is not None
|
|
assert len(tree.children) > 0
|
|
# find the BrIf node
|
|
def find_if(seq):
|
|
for c in seq.children:
|
|
if isinstance(c, BrIf):
|
|
return c
|
|
return None
|
|
brif = find_if(tree)
|
|
assert brif is not None, "BrIf node should exist"
|
|
assert brif.condition is not None
|
|
|
|
|
|
def test_build_branch_tree_empty():
|
|
"""空 PROCEDURE DIVISION → BrSeq"""
|
|
tree, _ = build_branch_tree("")
|
|
assert isinstance(tree, BrSeq)
|
|
|
|
|
|
def test_build_branch_tree_no_branches():
|
|
"""纯 MOVE 语句无分支"""
|
|
proc_text = " MAIN-PROC.\n MOVE 1 TO A.\n MOVE 2 TO B."
|
|
tree, _ = build_branch_tree(proc_text)
|
|
assert isinstance(tree, BrSeq)
|
|
assert len(tree.children) >= 2
|
|
|
|
|
|
def test_build_branch_tree_evaluate():
|
|
"""CE-04: EVALUATE → BrEval 节点"""
|
|
proc_text = " MAIN-PROC.\n EVALUATE X\n WHEN 1\n MOVE 1 TO A\n WHEN 2\n MOVE 2 TO A\n WHEN OTHER\n MOVE 0 TO A\n END-EVALUATE."
|
|
tree, _ = build_branch_tree(proc_text)
|
|
def find_eval(seq):
|
|
for c in seq.children:
|
|
if isinstance(c, BrEval):
|
|
return c
|
|
return None
|
|
breval = find_eval(tree)
|
|
assert breval is not None, "BrEval node should exist"
|
|
assert breval.has_other
|
|
|
|
|
|
def test_build_branch_tree_nested_if():
|
|
"""CE-03 延伸: 嵌套 IF"""
|
|
proc_text = " MAIN-PROC.\n IF A > 0\n IF B < 5\n MOVE 1 TO C\n END-IF\n END-IF."
|
|
tree, _ = build_branch_tree(proc_text)
|
|
assert isinstance(tree, BrSeq)
|
|
assert len(tree.children) > 0
|
|
|
|
|
|
# ── _basename ──
|
|
|
|
def test_basename_simple():
|
|
"""无下标 → 原名返回"""
|
|
assert _basename("WS-AMOUNT") == "WS-AMOUNT"
|
|
|
|
|
|
def test_basename_subscript():
|
|
"""有下标 → 去除下标"""
|
|
assert _basename("WS-TABLE(1)") == "WS-TABLE"
|
|
|
|
|
|
def test_basename_nested_subscript():
|
|
"""嵌套下标 WS-TABLE(WS-INDEX)"""
|
|
assert _basename("WS-TABLE(WS-INDEX)") == "WS-TABLE"
|
|
|
|
|
|
# ── _init_child_names ──
|
|
|
|
def test_init_child_names_basic():
|
|
"""组字段收集子字段"""
|
|
fields = [
|
|
{"name": "WS-GROUP", "level": 5},
|
|
{"name": "WS-ITEM1", "level": 10, "pic_info": {"type": "numeric"}},
|
|
{"name": "WS-ITEM2", "level": 10, "pic_info": {"type": "numeric"}},
|
|
]
|
|
children = _init_child_names("WS-GROUP", fields)
|
|
assert "WS-ITEM1" in children
|
|
assert "WS-ITEM2" in children
|
|
|
|
|
|
# ── trace_to_root ──
|
|
|
|
def test_trace_to_root_direct():
|
|
"""直接赋值追溯"""
|
|
assignments = {"WS-RESULT": [{"source_vars": ["WS-INPUT"]}]}
|
|
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
|
assert root == "WS-INPUT"
|
|
assert len(chain) >= 1
|
|
|
|
|
|
def test_trace_to_root_no_source():
|
|
"""无源字段 → 自身"""
|
|
assignments = {"WS-RESULT": [{"source_vars": []}]}
|
|
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
|
assert root == "WS-RESULT"
|
|
|
|
|
|
def test_trace_to_root_chain():
|
|
"""多级追溯 WS-RESULT → WS-TEMP → WS-INPUT"""
|
|
assignments = {
|
|
"WS-RESULT": [{"source_vars": ["WS-TEMP"]}],
|
|
"WS-TEMP": [{"source_vars": ["WS-INPUT"]}],
|
|
}
|
|
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
|
assert root == "WS-INPUT"
|
|
assert len(chain) == 2
|
|
|
|
|
|
def test_trace_to_root_cycle():
|
|
"""循环引用 → 不无限循环"""
|
|
assignments = {
|
|
"WS-A": [{"source_vars": ["WS-B"]}],
|
|
"WS-B": [{"source_vars": ["WS-A"]}],
|
|
}
|
|
root, chain = trace_to_root("WS-A", assignments, [])
|
|
assert root is not None
|
|
assert isinstance(chain, list)
|