feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,345 @@
|
||||
"""Deep Field / FieldTree data-model scenarios — REDEFINES, OCCURS, 88-levels, nesting, from_list, performance, edge cases."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
from data.field_tree import Field, FieldTree
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. REDEFINES chain
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_redefines_chain():
|
||||
"""A REDEFINES B REDEFINES C — verify redefines attributes form a chain."""
|
||||
c = Field(name="C", level=10, pic="9(4)")
|
||||
b = Field(name="B", level=10, pic="9(4)", redefines="C")
|
||||
a = Field(name="A", level=10, pic="9(4)", redefines="B")
|
||||
assert a.redefines == "B"
|
||||
assert b.redefines == "C"
|
||||
assert c.redefines is None
|
||||
|
||||
|
||||
def test_redefines_chain_with_tree():
|
||||
"""Fields in a REDEFINES chain survive flatten()."""
|
||||
c = Field(name="C", level=10, pic="9(4)")
|
||||
b = Field(name="B", level=10, pic="9(4)", redefines="C")
|
||||
a = Field(name="A", level=10, pic="9(4)", redefines="B")
|
||||
tree = FieldTree(fields=[a, b, c])
|
||||
flat = tree.flatten()
|
||||
assert flat["A"].redefines == "B"
|
||||
assert flat["B"].redefines == "C"
|
||||
assert flat["C"].redefines is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. OCCURS 10 TIMES — subscripted fields in flatten
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_occurs_ten_times():
|
||||
"""OCCURS 10 TIMES produces 10 subscripted entries in flatten()."""
|
||||
fields = []
|
||||
for i in range(1, 11):
|
||||
fields.append(Field(name=f"A({i})", level=10, pic="9(4)", occurs=10))
|
||||
tree = FieldTree(fields=fields)
|
||||
flat = tree.flatten()
|
||||
assert len(flat) == 10
|
||||
for i in range(1, 11):
|
||||
key = f"A({i})"
|
||||
assert key in flat, f"Missing subscripted field {key}"
|
||||
assert flat[key].name == key
|
||||
assert flat[key].occurs == 10
|
||||
|
||||
|
||||
def test_occurs_ten_times_with_group_children():
|
||||
"""OCCURS 10 within a group — child fields also appear subscripted."""
|
||||
children = [
|
||||
Field(name=f"ITEM-SUB({i})", level=15, pic="9(2)") for i in range(1, 11)
|
||||
]
|
||||
group = Field(name="GRP", level=5, pic="X(20)", occurs=10, children=children)
|
||||
tree = FieldTree(fields=[group])
|
||||
flat = tree.flatten()
|
||||
assert "GRP" in flat
|
||||
for i in range(1, 11):
|
||||
assert f"ITEM-SUB({i})" in flat
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. 88-level / conditions list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_88_level_conditions():
|
||||
"""88-level field carries a non-empty conditions list."""
|
||||
cond = {"value": "Y", "meaning": "YES"}
|
||||
f88 = Field(name="WS-FLAG-88", level=88, pic="X(1)", conditions=[cond])
|
||||
assert f88.level == 88
|
||||
assert len(f88.conditions) == 1
|
||||
assert f88.conditions[0]["value"] == "Y"
|
||||
|
||||
|
||||
def test_88_level_multiple_conditions():
|
||||
"""88-level with multiple condition entries."""
|
||||
conds = [
|
||||
{"value": "Y", "meaning": "YES"},
|
||||
{"value": "N", "meaning": "NO"},
|
||||
]
|
||||
f88 = Field(name="WS-FLAG-88", level=88, pic="X(1)", conditions=conds)
|
||||
assert len(f88.conditions) == 2
|
||||
assert f88.conditions[1]["meaning"] == "NO"
|
||||
|
||||
|
||||
def test_non_88_default_empty_conditions():
|
||||
"""Non-88-level fields default to an empty conditions list."""
|
||||
f = Field(name="WS-FLAG", level=10, pic="X(1)")
|
||||
assert f.conditions == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. get_by_name — deeply nested tree (3 levels)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_get_by_name_depth_3():
|
||||
"""get_by_name locates a field nested 3 levels deep."""
|
||||
leaf = Field(name="LEAF", level=15, pic="9(4)")
|
||||
child = Field(name="CHILD", level=10, pic="X(10)", children=[leaf])
|
||||
parent = Field(name="PARENT", level=5, pic="X(20)", children=[child])
|
||||
tree = FieldTree(fields=[parent])
|
||||
assert tree.get_by_name("PARENT") is parent
|
||||
assert tree.get_by_name("CHILD") is child
|
||||
assert tree.get_by_name("LEAF") is leaf
|
||||
|
||||
|
||||
def test_get_by_name_depth_3_multiple_siblings():
|
||||
"""get_by_name finds deeply nested field among multiple siblings."""
|
||||
leaf_c = Field(name="LEAF-C", level=15, pic="9(4)")
|
||||
leaf_d = Field(name="LEAF-D", level=15, pic="X(2)")
|
||||
inner = Field(name="INNER", level=10, pic="X(10)", children=[leaf_c, leaf_d])
|
||||
outer = Field(name="OUTER", level=5, pic="X(20)", children=[inner])
|
||||
tree = FieldTree(fields=[outer])
|
||||
assert tree.get_by_name("LEAF-C") is leaf_c
|
||||
assert tree.get_by_name("LEAF-D") is leaf_d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. FieldTree.from_list class method
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_from_list_default_name():
|
||||
"""from_list with default copybook_name."""
|
||||
fields = [Field(name="A", level=5, pic="9(4)")]
|
||||
tree = FieldTree.from_list(fields)
|
||||
assert tree.fields == fields
|
||||
assert tree.copybook_name == ""
|
||||
|
||||
|
||||
def test_from_list_with_name():
|
||||
"""from_list with explicit copybook_name."""
|
||||
fields = [Field(name="A", level=5, pic="9(4)")]
|
||||
tree = FieldTree.from_list(fields, name="MYCPY")
|
||||
assert tree.copybook_name == "MYCPY"
|
||||
|
||||
|
||||
def test_from_list_multiple_fields():
|
||||
"""from_list with multiple fields — flatten works."""
|
||||
fields = [
|
||||
Field(name="A", level=5, pic="9(4)"),
|
||||
Field(name="B", level=10, pic="X(3)"),
|
||||
Field(name="C", level=10, pic="9(2)"),
|
||||
]
|
||||
tree = FieldTree.from_list(fields, name="CPY")
|
||||
flat = tree.flatten()
|
||||
assert len(flat) == 3
|
||||
for f in fields:
|
||||
assert f.name in flat
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. Performance — 1000+ fields flatten under 1 second
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_flatten_1000_fields_performance():
|
||||
"""1000+ Field objects — flatten() completes in under 1 second."""
|
||||
fields = [Field(name=f"FLD-{i}", level=10, pic="9(4)") for i in range(1000)]
|
||||
tree = FieldTree(fields=fields)
|
||||
t0 = time.perf_counter()
|
||||
flat = tree.flatten()
|
||||
elapsed = time.perf_counter() - t0
|
||||
assert len(flat) == 1000
|
||||
assert elapsed < 1.0, f"flatten() took {elapsed:.3f}s, expected < 1s"
|
||||
|
||||
|
||||
def test_flatten_1000_fields_nested_performance():
|
||||
"""1000 fields across many small nested groups — flatten() under 1s."""
|
||||
top = Field(name="TOP", level=1, pic="X(8000)")
|
||||
groups = []
|
||||
for g in range(50):
|
||||
children = [
|
||||
Field(name=f"G{g}-F{i}", level=15, pic="9(4)") for i in range(20)
|
||||
]
|
||||
groups.append(Field(name=f"GRP-{g}", level=5, pic="X(100)", children=children))
|
||||
fields = [top] + groups
|
||||
tree = FieldTree(fields=fields)
|
||||
t0 = time.perf_counter()
|
||||
flat = tree.flatten()
|
||||
elapsed = time.perf_counter() - t0
|
||||
# 1 top + 50 groups + 50*20 children = 1051 fields
|
||||
assert len(flat) == 1051
|
||||
assert elapsed < 1.0, f"nested flatten() took {elapsed:.3f}s, expected < 1s"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 7. COMP-3 with signed, decimal — full property verification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_comp3_signed_decimal():
|
||||
"""Field with usage=COMP-3, signed=True, decimal=2 — verify all properties."""
|
||||
f = Field(name="BR-AMT", level=5, pic="S9(7)V99", usage="COMP-3", offset=0, length=5, decimal=2, signed=True)
|
||||
assert f.name == "BR-AMT"
|
||||
assert f.level == 5
|
||||
assert f.pic == "S9(7)V99"
|
||||
assert f.usage == "COMP-3"
|
||||
assert f.offset == 0
|
||||
assert f.length == 5
|
||||
assert f.decimal == 2
|
||||
assert f.signed is True
|
||||
assert f.sign_separate is False
|
||||
assert f.occurs is None
|
||||
assert f.occurs_max is None
|
||||
assert f.redefines is None
|
||||
assert f.redefines_variant is None
|
||||
assert f.conditions == []
|
||||
assert f.children == []
|
||||
|
||||
|
||||
def test_comp3_signed_with_varying_offset():
|
||||
"""COMP-3 signed field with non-zero offset in a tree."""
|
||||
f = Field(name="WS-AMT", level=10, pic="S9(5)V99", usage="COMP-3", offset=12, length=4, decimal=2, signed=True)
|
||||
tree = FieldTree(fields=[Field(name="ROOT", level=1, pic="X(50)"), f])
|
||||
flat = tree.flatten()
|
||||
assert flat["WS-AMT"].offset == 12
|
||||
assert flat["WS-AMT"].decimal == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 8. sign_separate=True, occurs=5, occurs_max=10
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_sign_separate_occurs():
|
||||
"""Field with sign_separate=True, occurs=5, occurs_max=10."""
|
||||
f = Field(
|
||||
name="WS-SIGNED-ARR",
|
||||
level=10,
|
||||
pic="S9(4)",
|
||||
usage="DISPLAY",
|
||||
signed=True,
|
||||
sign_separate=True,
|
||||
occurs=5,
|
||||
occurs_max=10,
|
||||
)
|
||||
assert f.name == "WS-SIGNED-ARR"
|
||||
assert f.signed is True
|
||||
assert f.sign_separate is True
|
||||
assert f.occurs == 5
|
||||
assert f.occurs_max == 10
|
||||
|
||||
|
||||
def test_sign_separate_occurs_in_tree():
|
||||
"""sign_separate + occurs survives round-trip through flatten."""
|
||||
f = Field(
|
||||
name="ARR",
|
||||
level=10,
|
||||
pic="S9(4)",
|
||||
usage="DISPLAY",
|
||||
signed=True,
|
||||
sign_separate=True,
|
||||
occurs=5,
|
||||
occurs_max=10,
|
||||
)
|
||||
tree = FieldTree(fields=[f])
|
||||
flat = tree.flatten()
|
||||
assert flat["ARR"].sign_separate is True
|
||||
assert flat["ARR"].occurs == 5
|
||||
assert flat["ARR"].occurs_max == 10
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 9. redefines_variant
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_redefines_variant_string():
|
||||
"""Field with redefines_variant set to a string variant key."""
|
||||
f = Field(name="X", level=10, pic="9(4)", redefines="Y", redefines_variant="ALT-1")
|
||||
assert f.redefines == "Y"
|
||||
assert f.redefines_variant == "ALT-1"
|
||||
|
||||
|
||||
def test_redefines_variant_none():
|
||||
"""Field without redefines_variant defaults to None."""
|
||||
f = Field(name="A", level=10, pic="9(4)")
|
||||
assert f.redefines_variant is None
|
||||
|
||||
|
||||
def test_redefines_variant_multiple():
|
||||
"""Multiple fields with different redefines_variant values."""
|
||||
f1 = Field(name="DATA-V1", level=10, pic="9(4)", redefines="DATA", redefines_variant="V1")
|
||||
f2 = Field(name="DATA-V2", level=10, pic="9(4)", redefines="DATA", redefines_variant="V2")
|
||||
tree = FieldTree(fields=[f1, f2])
|
||||
flat = tree.flatten()
|
||||
assert flat["DATA-V1"].redefines_variant == "V1"
|
||||
assert flat["DATA-V2"].redefines_variant == "V2"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 10. Empty FieldTree — edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_empty_field_tree():
|
||||
"""Empty FieldTree — flatten() returns empty dict, get_by_name returns None."""
|
||||
tree = FieldTree()
|
||||
assert tree.flatten() == {}
|
||||
assert tree.get_by_name("ANYTHING") is None
|
||||
|
||||
|
||||
def test_empty_field_tree_with_copybook_name():
|
||||
"""Empty FieldTree with only a copybook name set."""
|
||||
tree = FieldTree(fields=[], copybook_name="EMPTYCPY")
|
||||
assert tree.flatten() == {}
|
||||
assert tree.get_by_name("X") is None
|
||||
assert tree.copybook_name == "EMPTYCPY"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 11. Additional: mixed nesting with redefines + occurs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_nested_redefines_and_occurs():
|
||||
"""Nested tree mixing redefines and occurs — flatten handles both."""
|
||||
inner = Field(name="INNER", level=15, pic="9(4)", occurs=3)
|
||||
redef = Field(name="REDEF", level=10, pic="9(8)", redefines="ORIG", redefines_variant="HIGH")
|
||||
orig = Field(name="ORIG", level=10, pic="9(8)", children=[inner])
|
||||
parent = Field(name="PARENT", level=5, pic="X(20)", children=[orig, redef])
|
||||
tree = FieldTree(fields=[parent])
|
||||
flat = tree.flatten()
|
||||
assert flat["PARENT"] is parent
|
||||
assert flat["ORIG"] is orig
|
||||
assert flat["REDEF"] is redef
|
||||
assert flat["INNER"] is inner
|
||||
assert flat["INNER"].occurs == 3
|
||||
assert flat["REDEF"].redefines_variant == "HIGH"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 12. Additional: from_list round-trip consistency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_from_list_round_trip():
|
||||
"""from_list → flatten preserves every field reference."""
|
||||
fields = [Field(name=f"F{i:03d}", level=10, pic="9(4)") for i in range(100)]
|
||||
tree = FieldTree.from_list(fields, name="RTCPY")
|
||||
flat = tree.flatten()
|
||||
assert len(flat) == 100
|
||||
for f in fields:
|
||||
assert flat[f.name] is f # same object identity
|
||||
assert tree.copybook_name == "RTCPY"
|
||||
Reference in New Issue
Block a user