feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
"""CE-01~09: cobol_testgen core 模块 — PROCEDURE DIVISION 解析 + 数据流"""
|
||||
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
from cobol_testgen.core import (
|
||||
scan_paragraphs, build_branch_tree, _basename, _init_child_names,
|
||||
trace_to_root,
|
||||
)
|
||||
from cobol_testgen.models import BrSeq, BrIf, BrEval
|
||||
|
||||
|
||||
# ── CE-01~02: scan_paragraphs ──
|
||||
|
||||
def test_scan_paragraphs_normal():
|
||||
"""CE-01: 3段落扫描"""
|
||||
lines = [
|
||||
" MAIN-PROC.",
|
||||
" MOVE 1 TO A.",
|
||||
" SUB-ROUTINE.",
|
||||
" MOVE 2 TO B.",
|
||||
" CLEANUP.",
|
||||
" MOVE 0 TO C.",
|
||||
]
|
||||
paras = scan_paragraphs(lines)
|
||||
assert len(paras) == 3
|
||||
assert "MAIN-PROC" in paras
|
||||
assert "SUB-ROUTINE" in paras
|
||||
assert "CLEANUP" in paras
|
||||
|
||||
|
||||
def test_scan_paragraphs_scope_enders():
|
||||
"""段落不以作用域结束符命名"""
|
||||
for ender in ["END-IF", "ELSE", "WHEN", "OTHER", "END-PERFORM"]:
|
||||
lines = [f" {ender}."]
|
||||
paras = scan_paragraphs(lines)
|
||||
assert ender not in paras
|
||||
|
||||
|
||||
def test_scan_paragraphs_section():
|
||||
"""SECTION 也被识别"""
|
||||
lines = [
|
||||
" MAIN SECTION.",
|
||||
" MOVE 1 TO A.",
|
||||
" END SECTION.",
|
||||
]
|
||||
paras = scan_paragraphs(lines)
|
||||
assert "MAIN" in paras
|
||||
|
||||
|
||||
def test_scan_paragraphs_empty():
|
||||
"""空行 → 空段落"""
|
||||
assert scan_paragraphs([]) == {}
|
||||
|
||||
|
||||
def test_scan_paragraphs_only_code():
|
||||
"""无段落标记的纯代码 → 空"""
|
||||
lines = [" MOVE 1 TO A.", " DISPLAY A."]
|
||||
assert scan_paragraphs(lines) == {}
|
||||
|
||||
|
||||
# ── CE-03~06: build_branch_tree ──
|
||||
|
||||
def test_build_branch_tree_if():
|
||||
"""CE-03: IF 语句 → BrIf 节点"""
|
||||
proc_text = " MAIN-PROC.\n IF A > 100\n MOVE 1 TO B\n ELSE\n MOVE 2 TO B\n END-IF."
|
||||
tree, assignments = build_branch_tree(proc_text)
|
||||
assert tree is not None
|
||||
assert len(tree.children) > 0
|
||||
# find the BrIf node
|
||||
def find_if(seq):
|
||||
for c in seq.children:
|
||||
if isinstance(c, BrIf):
|
||||
return c
|
||||
return None
|
||||
brif = find_if(tree)
|
||||
assert brif is not None, "BrIf node should exist"
|
||||
assert brif.condition is not None
|
||||
|
||||
|
||||
def test_build_branch_tree_empty():
|
||||
"""空 PROCEDURE DIVISION → BrSeq"""
|
||||
tree, _ = build_branch_tree("")
|
||||
assert isinstance(tree, BrSeq)
|
||||
|
||||
|
||||
def test_build_branch_tree_no_branches():
|
||||
"""纯 MOVE 语句无分支"""
|
||||
proc_text = " MAIN-PROC.\n MOVE 1 TO A.\n MOVE 2 TO B."
|
||||
tree, _ = build_branch_tree(proc_text)
|
||||
assert isinstance(tree, BrSeq)
|
||||
assert len(tree.children) >= 2
|
||||
|
||||
|
||||
def test_build_branch_tree_evaluate():
|
||||
"""CE-04: EVALUATE → BrEval 节点"""
|
||||
proc_text = " MAIN-PROC.\n EVALUATE X\n WHEN 1\n MOVE 1 TO A\n WHEN 2\n MOVE 2 TO A\n WHEN OTHER\n MOVE 0 TO A\n END-EVALUATE."
|
||||
tree, _ = build_branch_tree(proc_text)
|
||||
def find_eval(seq):
|
||||
for c in seq.children:
|
||||
if isinstance(c, BrEval):
|
||||
return c
|
||||
return None
|
||||
breval = find_eval(tree)
|
||||
assert breval is not None, "BrEval node should exist"
|
||||
assert breval.has_other
|
||||
|
||||
|
||||
def test_build_branch_tree_nested_if():
|
||||
"""CE-03 延伸: 嵌套 IF"""
|
||||
proc_text = " MAIN-PROC.\n IF A > 0\n IF B < 5\n MOVE 1 TO C\n END-IF\n END-IF."
|
||||
tree, _ = build_branch_tree(proc_text)
|
||||
assert isinstance(tree, BrSeq)
|
||||
assert len(tree.children) > 0
|
||||
|
||||
|
||||
# ── _basename ──
|
||||
|
||||
def test_basename_simple():
|
||||
"""无下标 → 原名返回"""
|
||||
assert _basename("WS-AMOUNT") == "WS-AMOUNT"
|
||||
|
||||
|
||||
def test_basename_subscript():
|
||||
"""有下标 → 去除下标"""
|
||||
assert _basename("WS-TABLE(1)") == "WS-TABLE"
|
||||
|
||||
|
||||
def test_basename_nested_subscript():
|
||||
"""嵌套下标 WS-TABLE(WS-INDEX)"""
|
||||
assert _basename("WS-TABLE(WS-INDEX)") == "WS-TABLE"
|
||||
|
||||
|
||||
# ── _init_child_names ──
|
||||
|
||||
def test_init_child_names_basic():
|
||||
"""组字段收集子字段"""
|
||||
fields = [
|
||||
{"name": "WS-GROUP", "level": 5},
|
||||
{"name": "WS-ITEM1", "level": 10, "pic_info": {"type": "numeric"}},
|
||||
{"name": "WS-ITEM2", "level": 10, "pic_info": {"type": "numeric"}},
|
||||
]
|
||||
children = _init_child_names("WS-GROUP", fields)
|
||||
assert "WS-ITEM1" in children
|
||||
assert "WS-ITEM2" in children
|
||||
|
||||
|
||||
# ── trace_to_root ──
|
||||
|
||||
def test_trace_to_root_direct():
|
||||
"""直接赋值追溯"""
|
||||
assignments = {"WS-RESULT": [{"source_vars": ["WS-INPUT"]}]}
|
||||
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
||||
assert root == "WS-INPUT"
|
||||
assert len(chain) >= 1
|
||||
|
||||
|
||||
def test_trace_to_root_no_source():
|
||||
"""无源字段 → 自身"""
|
||||
assignments = {"WS-RESULT": [{"source_vars": []}]}
|
||||
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
||||
assert root == "WS-RESULT"
|
||||
|
||||
|
||||
def test_trace_to_root_chain():
|
||||
"""多级追溯 WS-RESULT → WS-TEMP → WS-INPUT"""
|
||||
assignments = {
|
||||
"WS-RESULT": [{"source_vars": ["WS-TEMP"]}],
|
||||
"WS-TEMP": [{"source_vars": ["WS-INPUT"]}],
|
||||
}
|
||||
root, chain = trace_to_root("WS-RESULT", assignments, [])
|
||||
assert root == "WS-INPUT"
|
||||
assert len(chain) == 2
|
||||
|
||||
|
||||
def test_trace_to_root_cycle():
|
||||
"""循环引用 → 不无限循环"""
|
||||
assignments = {
|
||||
"WS-A": [{"source_vars": ["WS-B"]}],
|
||||
"WS-B": [{"source_vars": ["WS-A"]}],
|
||||
}
|
||||
root, chain = trace_to_root("WS-A", assignments, [])
|
||||
assert root is not None
|
||||
assert isinstance(chain, list)
|
||||
Reference in New Issue
Block a user