bc1d56d1a4
P0.6: gcov infrastructure P1: extract_structure output expansion (11 new feature fields) P2: Confusion group rule engine (8 pairs + contradiction + backtrack) P3: 4-factor confidence calculation + quality gate update P4: 33+2 COBOL program type test samples (22 files, 7 categories) P5: parametrized/ test data generation engine P6: japanese_data.py lookup tables P7-10: Type-specific test suites (~159 parametrized tests) P11: Full classification pipeline (classify_program) + orchestrator integration P12: Documentation (module-interfaces, test-plan v3.0, coverage-matrix) Architecture decisions: - classification_pipeline/ merged to hina/pipeline/ - parametrized/ as independent module - japanese_data.py as root-level file - hina/__all__ only exports classify_program() Co-Authored-By: Claude <noreply@anthropic.com>
279 lines
8.7 KiB
Python
279 lines
8.7 KiB
Python
"""parametrized 模块的测试。
|
|
|
|
验证每个公开函数的正常路径和关键边界条件。
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
from parametrized import (
|
|
generate_matching_data,
|
|
generate_keybreak_data,
|
|
generate_division_data,
|
|
generate_zero_byte_file,
|
|
generate_boundary_values,
|
|
generate_minimal_records,
|
|
generate_sorted_records,
|
|
generate_duplicate_keys,
|
|
)
|
|
|
|
|
|
# ── generate_matching_data ──
|
|
|
|
class TestMatchingData:
|
|
def test_matching_data_basic(self):
|
|
main, sub = generate_matching_data("1:1", 5, 5)
|
|
assert len(main) == 5
|
|
assert len(sub) == 5
|
|
|
|
def test_matching_data_imbalance(self):
|
|
main, sub = generate_matching_data("1:N", 1, 100)
|
|
assert len(main) == 1
|
|
assert len(sub) == 100
|
|
|
|
def test_matching_n_to_one(self):
|
|
main, sub = generate_matching_data("N:1", 100, 1)
|
|
assert len(main) == 100
|
|
assert len(sub) == 1
|
|
|
|
def test_matching_zero_records(self):
|
|
main, sub = generate_matching_data("1:1", 0, 0)
|
|
assert len(main) == 0
|
|
assert len(sub) == 0
|
|
|
|
def test_matching_all_unmatched(self):
|
|
main, sub = generate_matching_data("1:1", 5, 5, key_match_ratio=0.0)
|
|
assert len(main) == 5
|
|
assert len(sub) == 5
|
|
# 确认没有匹配的 KEY
|
|
main_keys = {r["KEY"] for r in main}
|
|
sub_keys = {r["KEY"] for r in sub}
|
|
assert main_keys.isdisjoint(sub_keys)
|
|
|
|
def test_matching_all_matched(self):
|
|
main, sub = generate_matching_data("1:1", 5, 5, key_match_ratio=1.0)
|
|
assert len(main) == 5
|
|
assert len(sub) == 5
|
|
main_keys = {r["KEY"] for r in main}
|
|
sub_keys = {r["KEY"] for r in sub}
|
|
assert main_keys == sub_keys
|
|
|
|
def test_matching_invalid_type(self):
|
|
with pytest.raises(ValueError, match="matching_type"):
|
|
generate_matching_data("INVALID", 5, 5)
|
|
|
|
def test_matching_invalid_ratio(self):
|
|
with pytest.raises(ValueError, match="key_match_ratio"):
|
|
generate_matching_data("1:1", 5, 5, key_match_ratio=-0.5)
|
|
|
|
def test_matching_negative_count(self):
|
|
with pytest.raises(ValueError, match="记录数"):
|
|
generate_matching_data("1:1", -1, 5)
|
|
|
|
|
|
# ── generate_keybreak_data ──
|
|
|
|
class TestKeybreakData:
|
|
def test_keybreak_data_basic(self):
|
|
data = generate_keybreak_data(3, 2)
|
|
assert len(data) >= 6
|
|
# 检查 KEY 分组正确
|
|
keys = {r["KEY"] for r in data}
|
|
assert len(keys) == 3 # 3 组
|
|
|
|
def test_keybreak_data_single_group(self):
|
|
data = generate_keybreak_data(1, 5)
|
|
assert len(data) == 5
|
|
assert all(r["KEY"] == "KEY-A" for r in data)
|
|
|
|
def test_keybreak_data_accumulate(self):
|
|
data = generate_keybreak_data(2, 2, sum_type="accumulate")
|
|
assert len(data) == 4
|
|
# GROUP 1: FIELD 值 101, 102
|
|
assert data[0]["GROUP"] == 1
|
|
assert data[0]["FIELD"] == 101
|
|
assert data[1]["FIELD"] == 102
|
|
# GROUP 2: FIELD 值 201, 202
|
|
assert data[2]["GROUP"] == 2
|
|
assert data[2]["FIELD"] == 201
|
|
assert data[3]["FIELD"] == 202
|
|
|
|
def test_keybreak_data_aggregate(self):
|
|
data = generate_keybreak_data(2, 2, sum_type="aggregate")
|
|
# 每组值相同
|
|
assert data[0]["FIELD"] == 100
|
|
assert data[1]["FIELD"] == 100
|
|
assert data[2]["FIELD"] == 200
|
|
assert data[3]["FIELD"] == 200
|
|
|
|
def test_keybreak_data_mark(self):
|
|
data = generate_keybreak_data(2, 1, sum_type="mark")
|
|
assert data[0]["FIELD"] == "MARK-A"
|
|
assert data[1]["FIELD"] == "MARK-B"
|
|
|
|
def test_keybreak_invalid_group_count(self):
|
|
with pytest.raises(ValueError, match="group_count"):
|
|
generate_keybreak_data(0, 2)
|
|
|
|
def test_keybreak_invalid_sum_type(self):
|
|
with pytest.raises(ValueError, match="sum_type"):
|
|
generate_keybreak_data(3, 2, sum_type="unknown")
|
|
|
|
|
|
# ── generate_division_data ──
|
|
|
|
class TestDivisionData:
|
|
def test_division_fifty(self):
|
|
result = generate_division_data(50, 50)
|
|
assert len(result) == 2
|
|
assert len(result[0]) + len(result[1]) == 50
|
|
|
|
def test_division_one_hundred(self):
|
|
result = generate_division_data(100, 50)
|
|
assert len(result) == 1
|
|
assert len(result[0]) == 50
|
|
|
|
def test_division_twenty_five(self):
|
|
result = generate_division_data(25, 100)
|
|
assert len(result) == 4
|
|
total = sum(len(f) for f in result)
|
|
assert total == 100
|
|
|
|
def test_division_single_record(self):
|
|
result = generate_division_data(100, 1)
|
|
assert len(result) == 1
|
|
assert len(result[0]) == 1
|
|
|
|
def test_division_invalid_type(self):
|
|
with pytest.raises(ValueError, match="division_type"):
|
|
generate_division_data(99, 50)
|
|
|
|
def test_division_negative_count(self):
|
|
with pytest.raises(ValueError, match="record_count"):
|
|
generate_division_data(50, 0)
|
|
|
|
|
|
# ── generate_zero_byte_file ──
|
|
|
|
class TestZeroByteFile:
|
|
def test_zero_byte(self):
|
|
tmpdir = tempfile.mkdtemp()
|
|
p = os.path.join(tmpdir, "empty.bin")
|
|
generate_zero_byte_file(p)
|
|
assert os.path.getsize(p) == 0
|
|
os.remove(p)
|
|
|
|
def test_zero_byte_nested_dir(self):
|
|
tmpdir = tempfile.mkdtemp()
|
|
p = os.path.join(tmpdir, "sub", "nested", "empty.dat")
|
|
generate_zero_byte_file(p)
|
|
assert os.path.getsize(p) == 0
|
|
os.remove(p)
|
|
|
|
|
|
# ── generate_boundary_values ──
|
|
|
|
class TestBoundaryValues:
|
|
def test_boundary_signed_numeric(self):
|
|
result = generate_boundary_values("S9(7)V99")
|
|
assert result["max"] == 9999999.99
|
|
assert result["min"] == -9999999.99
|
|
assert result["overflow"] == 100000000.0
|
|
assert result["zero"] == 0.0
|
|
|
|
def test_boundary_unsigned_integer(self):
|
|
result = generate_boundary_values("9(4)")
|
|
assert result["max"] == 9999
|
|
assert result["min"] == 0
|
|
assert result["overflow"] == 100000
|
|
assert result["zero"] == 0
|
|
|
|
def test_boundary_string(self):
|
|
result = generate_boundary_values("X(10)")
|
|
assert result["max"] == "X" * 10
|
|
assert result["overflow"] == "X" * 11
|
|
|
|
def test_boundary_signed_integer(self):
|
|
result = generate_boundary_values("S9(3)")
|
|
assert result["max"] == 999
|
|
assert result["min"] == -999
|
|
assert result["zero"] == 0
|
|
|
|
|
|
# ── generate_minimal_records ──
|
|
|
|
class TestMinimalRecords:
|
|
def test_minimal_empty_fields(self):
|
|
records = generate_minimal_records([])
|
|
assert records == [{}]
|
|
|
|
def test_minimal_with_fields(self):
|
|
fields = [
|
|
{"name": "ID", "type": "numeric"},
|
|
{"name": "NAME", "type": "string", "length": 20},
|
|
]
|
|
records = generate_minimal_records(fields)
|
|
assert len(records) == 1
|
|
assert records[0]["ID"] == 0
|
|
assert len(records[0]["NAME"]) == 20
|
|
assert records[0]["NAME"] == "A" * 20
|
|
|
|
def test_minimal_with_defaults(self):
|
|
fields = [
|
|
{"name": "STATUS", "default": "OK"},
|
|
]
|
|
records = generate_minimal_records(fields)
|
|
assert records[0]["STATUS"] == "OK"
|
|
|
|
|
|
# ── generate_sorted_records ──
|
|
|
|
class TestSortedRecords:
|
|
def test_sorted_basic(self):
|
|
records = generate_sorted_records(5)
|
|
assert len(records) == 5
|
|
assert records[0]["KEY"] == "KEY-0000"
|
|
assert records[4]["KEY"] == "KEY-0004"
|
|
|
|
def test_sorted_single(self):
|
|
records = generate_sorted_records(1)
|
|
assert len(records) == 1
|
|
assert records[0]["SEQ"] == 1
|
|
|
|
def test_sorted_invalid_count(self):
|
|
with pytest.raises(ValueError, match="record_count"):
|
|
generate_sorted_records(0)
|
|
|
|
def test_sorted_custom_key(self):
|
|
records = generate_sorted_records(3, key_field="MYKEY")
|
|
assert "MYKEY" in records[0]
|
|
assert records[0]["MYKEY"] == "KEY-0000"
|
|
|
|
|
|
# ── generate_duplicate_keys ──
|
|
|
|
class TestDuplicateKeys:
|
|
def test_duplicate_empty(self):
|
|
result = generate_duplicate_keys([])
|
|
assert result == []
|
|
|
|
def test_duplicate_basic(self):
|
|
records = [{"KEY": "K001", "DATA": "a", "SEQ": 1}]
|
|
result = generate_duplicate_keys(records)
|
|
assert len(result) == 2
|
|
assert result[0]["KEY"] == "K001"
|
|
assert result[1]["KEY"] == "K001"
|
|
assert result[1]["DATA"] == "a_DUP"
|
|
|
|
def test_duplicate_multiple(self):
|
|
records = [
|
|
{"KEY": "K001", "DATA": "a", "SEQ": 1},
|
|
{"KEY": "K002", "DATA": "b", "SEQ": 2},
|
|
]
|
|
result = generate_duplicate_keys(records)
|
|
assert len(result) == 4
|
|
assert result[2]["KEY"] == "K001" # dup of first
|
|
assert result[3]["KEY"] == "K002" # dup of second
|