"""Phase 8: SORT / MERGE 系测试 — 基于 parametrized 生成数据。 测试覆盖: - SORT 排序正确性(升序 / 降序 / 多键 / 稳定性) - MERGE 合并逻辑(均匀 / 不均 / 重复键) """ from __future__ import annotations import pytest from parametrized import generate_sorted_records, generate_duplicate_keys # ── 排序辅助 ── def _sort_descending(records: list[dict], key_field: str = "KEY") -> list[dict]: """按 KEY 降序排列记录。""" return sorted(records, key=lambda r: r[key_field], reverse=True) def _sort_by_multiple_keys( records: list[dict], keys: list[str], ascending: bool = True, ) -> list[dict]: """按多键排序。""" return sorted(records, key=lambda r: tuple(r[k] for k in keys), reverse=not ascending) def _merge_sorted( left: list[dict], right: list[dict], key_field: str = "KEY", ) -> list[dict]: """合并两个已排序列表(归并算法)。""" result: list[dict] = [] i = j = 0 while i < len(left) and j < len(right): if left[i][key_field] <= right[j][key_field]: result.append(left[i]) i += 1 else: result.append(right[j]) j += 1 result.extend(left[i:]) result.extend(right[j:]) return result # ============================================================ # SORT # ============================================================ class TestSortAscending: """升序排序""" def test_sort_basic_ascending(self): records = generate_sorted_records(10) sorted_records = sorted(records, key=lambda r: r["KEY"]) assert sorted_records == records, "generate_sorted_records 应已按 KEY 升序排列" def test_sort_descending(self): records = generate_sorted_records(5) desc = _sort_descending(records) assert desc[0]["KEY"] == "KEY-0004" assert desc[-1]["KEY"] == "KEY-0000" def test_sort_single_record(self): records = generate_sorted_records(1) assert len(records) == 1 assert records[0]["KEY"] == "KEY-0000" class TestSortMultipleKeys: """多键排序""" def test_sort_two_keys(self): records = [ {"KEY": "K001", "SUB": "A", "DATA": "x"}, {"KEY": "K001", "SUB": "B", "DATA": "y"}, {"KEY": "K002", "SUB": "A", "DATA": "z"}, ] sorted_recs = _sort_by_multiple_keys(records, ["KEY", "SUB"]) assert sorted_recs[0]["SUB"] == "A" assert sorted_recs[1]["SUB"] == "B" assert sorted_recs[2]["SUB"] == "A" def test_sort_three_keys(self): records = [ {"KEY": "K002", "SUB": "A", "TERT": "Z"}, {"KEY": "K001", "SUB": "B", "TERT": "Y"}, {"KEY": "K001", "SUB": "A", "TERT": "X"}, ] sorted_recs = _sort_by_multiple_keys(records, ["KEY", "SUB", "TERT"]) assert sorted_recs[0]["TERT"] == "X" assert sorted_recs[1]["TERT"] == "Y" assert sorted_recs[2]["TERT"] == "Z" class TestSortDuplicates: """重复键排序""" def test_sort_with_duplicate_keys(self): base = generate_sorted_records(5) with_dups = generate_duplicate_keys(base) assert len(with_dups) == 10 sorted_all = sorted(with_dups, key=lambda r: (r["KEY"], r["SEQ"])) assert sorted_all[0]["KEY"] == sorted_all[1]["KEY"] # 同 KEY assert sorted_all[0]["SEQ"] < sorted_all[1]["SEQ"] def test_sort_duplicate_all_same_key(self): records = [{"KEY": "SAME", "DATA": str(i), "SEQ": i} for i in range(5)] shuffled = [records[3], records[0], records[2], records[4], records[1]] sorted_recs = sorted(shuffled, key=lambda r: r["SEQ"]) assert [r["DATA"] for r in sorted_recs] == ["0", "1", "2", "3", "4"] class TestSortEdgeCases: """边界情况""" def test_sort_empty(self): records: list[dict] = [] sorted_recs = sorted(records, key=lambda r: r.get("KEY", "")) assert sorted_recs == [] def test_sort_invalid_count(self): with pytest.raises(ValueError, match="record_count"): generate_sorted_records(0) def test_sort_custom_key_field(self): records = generate_sorted_records(3, key_field="MYKEY") assert all("MYKEY" in r for r in records) assert [r["MYKEY"] for r in records] == ["KEY-0000", "KEY-0001", "KEY-0002"] # ============================================================ # MERGE # ============================================================ class TestMergeBasic: """基本合并""" def test_merge_two_equal_files(self): left = generate_sorted_records(5) right = generate_sorted_records(5) merged = _merge_sorted(left, right) assert len(merged) == 10 keys = [r["KEY"] for r in merged] assert keys == sorted(keys) def test_merge_one_empty(self): left = generate_sorted_records(3) right: list[dict] = [] merged = _merge_sorted(left, right) assert len(merged) == 3 assert merged == left def test_merge_both_empty(self): merged = _merge_sorted([], []) assert merged == [] class TestMergeUneven: """不均等合并""" def test_merge_left_larger(self): left = generate_sorted_records(10) right = generate_sorted_records(3) merged = _merge_sorted(left, right) assert len(merged) == 13 keys = [r["KEY"] for r in merged] assert keys == sorted(keys) def test_merge_right_larger(self): left = generate_sorted_records(2) right = generate_sorted_records(8) merged = _merge_sorted(left, right) assert len(merged) == 10 keys = [r["KEY"] for r in merged] assert keys == sorted(keys) class TestMergeDuplicates: """重复键合并""" def test_merge_with_duplicate_keys(self): left = [{"KEY": "K001", "DATA": "L1"}, {"KEY": "K002", "DATA": "L2"}] right = [{"KEY": "K001", "DATA": "R1"}, {"KEY": "K003", "DATA": "R3"}] merged = _merge_sorted(left, right) assert len(merged) == 4 assert merged[0]["KEY"] == "K001" assert merged[1]["KEY"] == "K001" def test_merge_stability(self): """稳定性: 同 KEY 时左文件先出现""" left = [{"KEY": "K001", "DATA": "LEFT"}, {"KEY": "K003", "DATA": "LEFT"}] right = [{"KEY": "K001", "DATA": "RIGHT"}] merged = _merge_sorted(left, right) assert merged[0]["DATA"] == "LEFT" assert merged[1]["DATA"] == "RIGHT"