fix: HINA 全类型缺陷修复 — SORT/CSV/ALT 3 个真实缺陷
对抗性全类型测试发现的缺陷和修复: 缺陷1: SORT/MERGE L1 关键词太严格(漏检) - 旧: 'SORT ON KEY' / 'MERGE ON KEY'(精确字符串) - COBOL 中的真实写法: SORT WORK-FILE ON ASCENDING KEY ... - 新: 正则 SORT(?:\s+\S+)?\s+ON\s+(?:ASCENDING|DESCENDING)?KEY 缺陷2: CSV 假阳性(STRING/INSPECT 非CSV也触发) - 旧: has_string=True -> CSV合并 - 新: 要求 has_csv_merge(STRING+逗号分隔) - 单纯字符串拼接不再触发 CSV 分类 缺陷3: ALTERNATE RECORD KEY 被 ORGANIZATION IS 覆盖 - 旧: 文件编成先于替代索引(同确信度先者胜) - 新: 替代索引放前面(更具体的分类优先) 回归: 767 passed(0 new failures)
This commit is contained in:
@@ -20,7 +20,7 @@ def test_detect_keyword_multiple_matches():
|
||||
EXEC SQL
|
||||
SELECT * FROM TABLE
|
||||
END-EXEC.
|
||||
SORT ON KEY WS-KEY.
|
||||
SORT SORT-FILE ON KEY WS-KEY.
|
||||
CALL 'SUBPGM'.
|
||||
STOP RUN.
|
||||
"""
|
||||
@@ -36,7 +36,7 @@ def test_detect_keyword_multiple_matches():
|
||||
assert cat_map["DB操作"][0] == 0.95
|
||||
assert cat_map["DB操作"][1] == "EXEC SQL"
|
||||
assert cat_map["SORT"][0] == 0.95
|
||||
assert cat_map["SORT"][1] == "SORT ON KEY"
|
||||
assert cat_map["SORT"][1].startswith("re:SORT") # regex pattern
|
||||
assert cat_map["子程序调用"][0] == 0.90
|
||||
assert cat_map["子程序调用"][1] == "CALL"
|
||||
|
||||
@@ -154,7 +154,7 @@ def test_detect_keyword_mixed_case_whitespace_comments():
|
||||
matched_keywords = {r[2] for r in results}
|
||||
assert "EXEC SQL" in matched_keywords
|
||||
assert "CALL" in matched_keywords
|
||||
assert "SORT ON KEY" in matched_keywords
|
||||
assert any(r[0] == "SORT" for r in results) # SORT detected via regex
|
||||
|
||||
|
||||
# ── 5. No keyword match and no LLM result → unknown ──
|
||||
@@ -190,8 +190,8 @@ def test_detect_keyword_all_rules():
|
||||
("ALPHABETIC", "编码转换"),
|
||||
("DFHCOMMAREA", "online"),
|
||||
("MAP", "online"),
|
||||
("SORT ON KEY", "SORT"),
|
||||
("MERGE ON KEY", "MERGE"),
|
||||
("SORT SORT-FILE ON KEY", "SORT"),
|
||||
("MERGE MERGE-FILE ON KEY", "MERGE"),
|
||||
("WRITE AFTER", "编辑输出"),
|
||||
("WRITE BEFORE", "编辑输出"),
|
||||
("ORGANIZATION IS", "文件编成"),
|
||||
|
||||
Reference in New Issue
Block a user