Files
cobol-java-v3/tests/parametrized/test_parametrized.py
hangshuo652 bc1d56d1a4 feat: Phase 2 complete — 13 Phases of COBOL type classification and test benchmark
P0.6: gcov infrastructure
P1: extract_structure output expansion (11 new feature fields)
P2: Confusion group rule engine (8 pairs + contradiction + backtrack)
P3: 4-factor confidence calculation + quality gate update
P4: 33+2 COBOL program type test samples (22 files, 7 categories)
P5: parametrized/ test data generation engine
P6: japanese_data.py lookup tables
P7-10: Type-specific test suites (~159 parametrized tests)
P11: Full classification pipeline (classify_program) + orchestrator integration
P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix)

Architecture decisions:
- classification_pipeline/ merged to hina/pipeline/
- parametrized/ as independent module
- japanese_data.py as root-level file
- hina/__all__ only exports classify_program()

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-19 23:51:55 +08:00

279 lines
8.7 KiB
Python

"""parametrized 模块的测试。
验证每个公开函数的正常路径和关键边界条件。
"""
import os
import tempfile
import pytest
from parametrized import (
generate_matching_data,
generate_keybreak_data,
generate_division_data,
generate_zero_byte_file,
generate_boundary_values,
generate_minimal_records,
generate_sorted_records,
generate_duplicate_keys,
)
# ── generate_matching_data ──
class TestMatchingData:
def test_matching_data_basic(self):
main, sub = generate_matching_data("1:1", 5, 5)
assert len(main) == 5
assert len(sub) == 5
def test_matching_data_imbalance(self):
main, sub = generate_matching_data("1:N", 1, 100)
assert len(main) == 1
assert len(sub) == 100
def test_matching_n_to_one(self):
main, sub = generate_matching_data("N:1", 100, 1)
assert len(main) == 100
assert len(sub) == 1
def test_matching_zero_records(self):
main, sub = generate_matching_data("1:1", 0, 0)
assert len(main) == 0
assert len(sub) == 0
def test_matching_all_unmatched(self):
main, sub = generate_matching_data("1:1", 5, 5, key_match_ratio=0.0)
assert len(main) == 5
assert len(sub) == 5
# 确认没有匹配的 KEY
main_keys = {r["KEY"] for r in main}
sub_keys = {r["KEY"] for r in sub}
assert main_keys.isdisjoint(sub_keys)
def test_matching_all_matched(self):
main, sub = generate_matching_data("1:1", 5, 5, key_match_ratio=1.0)
assert len(main) == 5
assert len(sub) == 5
main_keys = {r["KEY"] for r in main}
sub_keys = {r["KEY"] for r in sub}
assert main_keys == sub_keys
def test_matching_invalid_type(self):
with pytest.raises(ValueError, match="matching_type"):
generate_matching_data("INVALID", 5, 5)
def test_matching_invalid_ratio(self):
with pytest.raises(ValueError, match="key_match_ratio"):
generate_matching_data("1:1", 5, 5, key_match_ratio=-0.5)
def test_matching_negative_count(self):
with pytest.raises(ValueError, match="记录数"):
generate_matching_data("1:1", -1, 5)
# ── generate_keybreak_data ──
class TestKeybreakData:
def test_keybreak_data_basic(self):
data = generate_keybreak_data(3, 2)
assert len(data) >= 6
# 检查 KEY 分组正确
keys = {r["KEY"] for r in data}
assert len(keys) == 3 # 3 组
def test_keybreak_data_single_group(self):
data = generate_keybreak_data(1, 5)
assert len(data) == 5
assert all(r["KEY"] == "KEY-A" for r in data)
def test_keybreak_data_accumulate(self):
data = generate_keybreak_data(2, 2, sum_type="accumulate")
assert len(data) == 4
# GROUP 1: FIELD 值 101, 102
assert data[0]["GROUP"] == 1
assert data[0]["FIELD"] == 101
assert data[1]["FIELD"] == 102
# GROUP 2: FIELD 值 201, 202
assert data[2]["GROUP"] == 2
assert data[2]["FIELD"] == 201
assert data[3]["FIELD"] == 202
def test_keybreak_data_aggregate(self):
data = generate_keybreak_data(2, 2, sum_type="aggregate")
# 每组值相同
assert data[0]["FIELD"] == 100
assert data[1]["FIELD"] == 100
assert data[2]["FIELD"] == 200
assert data[3]["FIELD"] == 200
def test_keybreak_data_mark(self):
data = generate_keybreak_data(2, 1, sum_type="mark")
assert data[0]["FIELD"] == "MARK-A"
assert data[1]["FIELD"] == "MARK-B"
def test_keybreak_invalid_group_count(self):
with pytest.raises(ValueError, match="group_count"):
generate_keybreak_data(0, 2)
def test_keybreak_invalid_sum_type(self):
with pytest.raises(ValueError, match="sum_type"):
generate_keybreak_data(3, 2, sum_type="unknown")
# ── generate_division_data ──
class TestDivisionData:
def test_division_fifty(self):
result = generate_division_data(50, 50)
assert len(result) == 2
assert len(result[0]) + len(result[1]) == 50
def test_division_one_hundred(self):
result = generate_division_data(100, 50)
assert len(result) == 1
assert len(result[0]) == 50
def test_division_twenty_five(self):
result = generate_division_data(25, 100)
assert len(result) == 4
total = sum(len(f) for f in result)
assert total == 100
def test_division_single_record(self):
result = generate_division_data(100, 1)
assert len(result) == 1
assert len(result[0]) == 1
def test_division_invalid_type(self):
with pytest.raises(ValueError, match="division_type"):
generate_division_data(99, 50)
def test_division_negative_count(self):
with pytest.raises(ValueError, match="record_count"):
generate_division_data(50, 0)
# ── generate_zero_byte_file ──
class TestZeroByteFile:
def test_zero_byte(self):
tmpdir = tempfile.mkdtemp()
p = os.path.join(tmpdir, "empty.bin")
generate_zero_byte_file(p)
assert os.path.getsize(p) == 0
os.remove(p)
def test_zero_byte_nested_dir(self):
tmpdir = tempfile.mkdtemp()
p = os.path.join(tmpdir, "sub", "nested", "empty.dat")
generate_zero_byte_file(p)
assert os.path.getsize(p) == 0
os.remove(p)
# ── generate_boundary_values ──
class TestBoundaryValues:
def test_boundary_signed_numeric(self):
result = generate_boundary_values("S9(7)V99")
assert result["max"] == 9999999.99
assert result["min"] == -9999999.99
assert result["overflow"] == 100000000.0
assert result["zero"] == 0.0
def test_boundary_unsigned_integer(self):
result = generate_boundary_values("9(4)")
assert result["max"] == 9999
assert result["min"] == 0
assert result["overflow"] == 100000
assert result["zero"] == 0
def test_boundary_string(self):
result = generate_boundary_values("X(10)")
assert result["max"] == "X" * 10
assert result["overflow"] == "X" * 11
def test_boundary_signed_integer(self):
result = generate_boundary_values("S9(3)")
assert result["max"] == 999
assert result["min"] == -999
assert result["zero"] == 0
# ── generate_minimal_records ──
class TestMinimalRecords:
def test_minimal_empty_fields(self):
records = generate_minimal_records([])
assert records == [{}]
def test_minimal_with_fields(self):
fields = [
{"name": "ID", "type": "numeric"},
{"name": "NAME", "type": "string", "length": 20},
]
records = generate_minimal_records(fields)
assert len(records) == 1
assert records[0]["ID"] == 0
assert len(records[0]["NAME"]) == 20
assert records[0]["NAME"] == "A" * 20
def test_minimal_with_defaults(self):
fields = [
{"name": "STATUS", "default": "OK"},
]
records = generate_minimal_records(fields)
assert records[0]["STATUS"] == "OK"
# ── generate_sorted_records ──
class TestSortedRecords:
def test_sorted_basic(self):
records = generate_sorted_records(5)
assert len(records) == 5
assert records[0]["KEY"] == "KEY-0000"
assert records[4]["KEY"] == "KEY-0004"
def test_sorted_single(self):
records = generate_sorted_records(1)
assert len(records) == 1
assert records[0]["SEQ"] == 1
def test_sorted_invalid_count(self):
with pytest.raises(ValueError, match="record_count"):
generate_sorted_records(0)
def test_sorted_custom_key(self):
records = generate_sorted_records(3, key_field="MYKEY")
assert "MYKEY" in records[0]
assert records[0]["MYKEY"] == "KEY-0000"
# ── generate_duplicate_keys ──
class TestDuplicateKeys:
def test_duplicate_empty(self):
result = generate_duplicate_keys([])
assert result == []
def test_duplicate_basic(self):
records = [{"KEY": "K001", "DATA": "a", "SEQ": 1}]
result = generate_duplicate_keys(records)
assert len(result) == 2
assert result[0]["KEY"] == "K001"
assert result[1]["KEY"] == "K001"
assert result[1]["DATA"] == "a_DUP"
def test_duplicate_multiple(self):
records = [
{"KEY": "K001", "DATA": "a", "SEQ": 1},
{"KEY": "K002", "DATA": "b", "SEQ": 2},
]
result = generate_duplicate_keys(records)
assert len(result) == 4
assert result[2]["KEY"] == "K001" # dup of first
assert result[3]["KEY"] == "K002" # dup of second