cdba324b5a
对抗性全类型测试发现的缺陷和修复: 缺陷1: SORT/MERGE L1 关键词太严格(漏检) - 旧: 'SORT ON KEY' / 'MERGE ON KEY'(精确字符串) - COBOL 中的真实写法: SORT WORK-FILE ON ASCENDING KEY ... - 新: 正则 SORT(?:\s+\S+)?\s+ON\s+(?:ASCENDING|DESCENDING)?KEY 缺陷2: CSV 假阳性(STRING/INSPECT 非CSV也触发) - 旧: has_string=True -> CSV合并 - 新: 要求 has_csv_merge(STRING+逗号分隔) - 单纯字符串拼接不再触发 CSV 分类 缺陷3: ALTERNATE RECORD KEY 被 ORGANIZATION IS 覆盖 - 旧: 文件编成先于替代索引(同确信度先者胜) - 新: 替代索引放前面(更具体的分类优先) 回归: 767 passed(0 new failures)
206 lines
7.4 KiB
Python
206 lines
7.4 KiB
Python
"""Deep classifier tests: keyword detection, confidence boundaries, edge cases"""
|
|
|
|
import sys, os
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
|
|
|
from hina.classifier import detect_keyword, compute_confidence
|
|
|
|
|
|
# ── 1. detect_keyword with SQL + SORT + CALL all present ──
|
|
|
|
def test_detect_keyword_multiple_matches():
|
|
"""Source with SQL, SORT and CALL keywords → multiple matches with correct confidence ranking"""
|
|
source = """
|
|
IDENTIFICATION DIVISION.
|
|
PROGRAM-ID. TESTPGM.
|
|
DATA DIVISION.
|
|
WORKING-STORAGE SECTION.
|
|
01 WS-A PIC X(100).
|
|
PROCEDURE DIVISION.
|
|
EXEC SQL
|
|
SELECT * FROM TABLE
|
|
END-EXEC.
|
|
SORT SORT-FILE ON KEY WS-KEY.
|
|
CALL 'SUBPGM'.
|
|
STOP RUN.
|
|
"""
|
|
results = detect_keyword(source)
|
|
|
|
categories = {r[0] for r in results}
|
|
assert "DB操作" in categories # EXEC SQL → 0.95
|
|
assert "SORT" in categories # SORT ON KEY → 0.95
|
|
assert "子程序调用" in categories # CALL → 0.90
|
|
|
|
# Verify confidence values per match
|
|
cat_map = {r[0]: (r[1], r[2]) for r in results}
|
|
assert cat_map["DB操作"][0] == 0.95
|
|
assert cat_map["DB操作"][1] == "EXEC SQL"
|
|
assert cat_map["SORT"][0] == 0.95
|
|
assert cat_map["SORT"][1].startswith("re:SORT") # regex pattern
|
|
assert cat_map["子程序调用"][0] == 0.90
|
|
assert cat_map["子程序调用"][1] == "CALL"
|
|
|
|
|
|
# ── 2. compute_confidence with hybrid (keyword + LLM) result ──
|
|
|
|
def test_compute_confidence_hybrid():
|
|
"""Keyword match below 0.90 threshold + LLM result → method=hybrid, uses LLM category"""
|
|
# "WRITE AFTER" matches "编辑输出" with confidence 0.80 (< 0.90)
|
|
source = "WRITE AFTER ADVANCING 1 LINE."
|
|
llm_result = {"category": "output_heavy", "confidence": 0.75}
|
|
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["method"] == "hybrid"
|
|
assert result["source"] == "llm"
|
|
assert result["category"] == "output_heavy"
|
|
assert result["confidence"] == 0.75
|
|
# Keyword matches are still attached to the result
|
|
assert len(result["matches"]) > 0
|
|
assert any("WRITE AFTER" in str(m) for m in result["matches"])
|
|
|
|
|
|
def test_compute_confidence_keyword_high_confidence_overrides_llm():
|
|
"""Keyword match >= 0.90 → keyword method wins, LLM ignored"""
|
|
# "EXEC SQL" matches "DB操作" with confidence 0.95 (>= 0.90)
|
|
source = "EXEC SQL SELECT * FROM TABLE"
|
|
llm_result = {"category": "something_else", "confidence": 0.50}
|
|
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["method"] == "keyword"
|
|
assert result["source"] == "l1"
|
|
assert result["category"] == "DB操作"
|
|
assert result["confidence"] == 0.95
|
|
|
|
|
|
# ── 3. compute_confidence boundaries: 0.0, 0.69, 0.70, 0.71, 1.0 ──
|
|
|
|
def test_confidence_boundary_zero():
|
|
"""No keyword match, no LLM → category=unknown, confidence=0.0"""
|
|
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
|
|
result = compute_confidence(source, llm_result=None)
|
|
|
|
assert result["category"] == "unknown"
|
|
assert result["confidence"] == 0.0
|
|
assert result["method"] == "none"
|
|
assert result["matches"] == []
|
|
|
|
|
|
def test_confidence_boundary_069():
|
|
"""LLM result with confidence 0.69 (below 0.70 boundary)"""
|
|
source = " MOVE 1 TO A."
|
|
llm_result = {"category": "custom_category", "confidence": 0.69}
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["category"] == "custom_category"
|
|
assert result["confidence"] == 0.69
|
|
assert result["method"] == "hybrid"
|
|
|
|
|
|
def test_confidence_boundary_070():
|
|
"""LLM result with confidence 0.70 (at 0.70 boundary)"""
|
|
source = " MOVE 1 TO A."
|
|
llm_result = {"category": "custom_category", "confidence": 0.70}
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["category"] == "custom_category"
|
|
assert result["confidence"] == 0.70
|
|
assert result["method"] == "hybrid"
|
|
|
|
|
|
def test_confidence_boundary_071():
|
|
"""LLM result with confidence 0.71 (above 0.70 boundary)"""
|
|
source = " MOVE 1 TO A."
|
|
llm_result = {"category": "custom_category", "confidence": 0.71}
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["category"] == "custom_category"
|
|
assert result["confidence"] == 0.71
|
|
assert result["method"] == "hybrid"
|
|
|
|
|
|
def test_confidence_boundary_max():
|
|
"""LLM result with confidence 1.0"""
|
|
source = " MOVE 1 TO A."
|
|
llm_result = {"category": "perfect", "confidence": 1.0}
|
|
result = compute_confidence(source, llm_result=llm_result)
|
|
|
|
assert result["category"] == "perfect"
|
|
assert result["confidence"] == 1.0
|
|
assert result["method"] == "hybrid"
|
|
|
|
|
|
# ── 4. Keyword source text with mixed case, extra whitespace, inline comments ──
|
|
|
|
def test_detect_keyword_mixed_case_whitespace_comments():
|
|
"""Source with mixed case, inline *> comments"""
|
|
source = """
|
|
IDENTIFICATION DIVISION.
|
|
ExEc Sql
|
|
SELECT * FROM TABLE
|
|
END-EXEC. *> inline comment
|
|
Call 'SUBPGM' *> some comment
|
|
Sort On Key WS-KEY.
|
|
"""
|
|
results = detect_keyword(source)
|
|
|
|
categories = {r[0] for r in results}
|
|
assert "DB操作" in categories # EXEC SQL (mixed case)
|
|
assert "子程序调用" in categories # CALL (mixed case)
|
|
assert "SORT" in categories # SORT ON KEY (mixed case)
|
|
|
|
# Verify matched keywords were found (function uppercases source)
|
|
matched_keywords = {r[2] for r in results}
|
|
assert "EXEC SQL" in matched_keywords
|
|
assert "CALL" in matched_keywords
|
|
assert any(r[0] == "SORT" for r in results) # SORT detected via regex
|
|
|
|
|
|
# ── 5. No keyword match and no LLM result → unknown ──
|
|
|
|
def test_detect_keyword_no_match():
|
|
"""Source with no known keywords → empty list"""
|
|
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
|
|
results = detect_keyword(source)
|
|
assert len(results) == 0
|
|
|
|
|
|
def test_compute_confidence_no_match_no_llm():
|
|
"""No keyword match and no LLM → category=unknown, confidence=0, method=none"""
|
|
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
|
|
result = compute_confidence(source, llm_result=None)
|
|
|
|
assert result["category"] == "unknown"
|
|
assert result["confidence"] == 0.0
|
|
assert result["method"] == "none"
|
|
assert result["source"] == "unknown"
|
|
assert result["matches"] == []
|
|
|
|
|
|
# ── Additional: verify L1_RULES via detect_keyword ──
|
|
|
|
def test_detect_keyword_all_rules():
|
|
"""Each L1_RULE category is detectable from a representative keyword"""
|
|
test_cases = [
|
|
("EXEC SQL", "DB操作"),
|
|
("CALL", "子程序调用"),
|
|
("IS INITIAL", "IS INITIAL"),
|
|
("SYSIN", "SYSIN"),
|
|
("ALPHABETIC", "编码转换"),
|
|
("DFHCOMMAREA", "online"),
|
|
("MAP", "online"),
|
|
("SORT SORT-FILE ON KEY", "SORT"),
|
|
("MERGE MERGE-FILE ON KEY", "MERGE"),
|
|
("WRITE AFTER", "编辑输出"),
|
|
("WRITE BEFORE", "编辑输出"),
|
|
("ORGANIZATION IS", "文件编成"),
|
|
("ALTERNATE RECORD KEY", "替代索引"),
|
|
]
|
|
for keyword, expected_category in test_cases:
|
|
source = f" {keyword} DUMMY."
|
|
results = detect_keyword(source)
|
|
categories = {r[0] for r in results}
|
|
assert expected_category in categories, \
|
|
f"Keyword '{keyword}' should trigger category '{expected_category}', got {categories}"
|