Files
cobol-java-v3/tests/hina/test_classifier_deep.py
T
NB-076 cdba324b5a fix: HINA 全类型缺陷修复 — SORT/CSV/ALT 3 个真实缺陷
对抗性全类型测试发现的缺陷和修复:

缺陷1: SORT/MERGE L1 关键词太严格(漏检)
  - 旧: 'SORT ON KEY' / 'MERGE ON KEY'(精确字符串)
  - COBOL 中的真实写法: SORT WORK-FILE ON ASCENDING KEY ...
  - 新: 正则 SORT(?:\s+\S+)?\s+ON\s+(?:ASCENDING|DESCENDING)?KEY

缺陷2: CSV 假阳性(STRING/INSPECT 非CSV也触发)
  - 旧: has_string=True -> CSV合并
  - 新: 要求 has_csv_merge(STRING+逗号分隔)
  - 单纯字符串拼接不再触发 CSV 分类

缺陷3: ALTERNATE RECORD KEY 被 ORGANIZATION IS 覆盖
  - 旧: 文件编成先于替代索引(同确信度先者胜)
  - 新: 替代索引放前面(更具体的分类优先)

回归: 767 passed(0 new failures)
2026-06-21 15:51:30 +08:00

206 lines
7.4 KiB
Python

"""Deep classifier tests: keyword detection, confidence boundaries, edge cases"""
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
from hina.classifier import detect_keyword, compute_confidence
# ── 1. detect_keyword with SQL + SORT + CALL all present ──
def test_detect_keyword_multiple_matches():
"""Source with SQL, SORT and CALL keywords → multiple matches with correct confidence ranking"""
source = """
IDENTIFICATION DIVISION.
PROGRAM-ID. TESTPGM.
DATA DIVISION.
WORKING-STORAGE SECTION.
01 WS-A PIC X(100).
PROCEDURE DIVISION.
EXEC SQL
SELECT * FROM TABLE
END-EXEC.
SORT SORT-FILE ON KEY WS-KEY.
CALL 'SUBPGM'.
STOP RUN.
"""
results = detect_keyword(source)
categories = {r[0] for r in results}
assert "DB操作" in categories # EXEC SQL → 0.95
assert "SORT" in categories # SORT ON KEY → 0.95
assert "子程序调用" in categories # CALL → 0.90
# Verify confidence values per match
cat_map = {r[0]: (r[1], r[2]) for r in results}
assert cat_map["DB操作"][0] == 0.95
assert cat_map["DB操作"][1] == "EXEC SQL"
assert cat_map["SORT"][0] == 0.95
assert cat_map["SORT"][1].startswith("re:SORT") # regex pattern
assert cat_map["子程序调用"][0] == 0.90
assert cat_map["子程序调用"][1] == "CALL"
# ── 2. compute_confidence with hybrid (keyword + LLM) result ──
def test_compute_confidence_hybrid():
"""Keyword match below 0.90 threshold + LLM result → method=hybrid, uses LLM category"""
# "WRITE AFTER" matches "编辑输出" with confidence 0.80 (< 0.90)
source = "WRITE AFTER ADVANCING 1 LINE."
llm_result = {"category": "output_heavy", "confidence": 0.75}
result = compute_confidence(source, llm_result=llm_result)
assert result["method"] == "hybrid"
assert result["source"] == "llm"
assert result["category"] == "output_heavy"
assert result["confidence"] == 0.75
# Keyword matches are still attached to the result
assert len(result["matches"]) > 0
assert any("WRITE AFTER" in str(m) for m in result["matches"])
def test_compute_confidence_keyword_high_confidence_overrides_llm():
"""Keyword match >= 0.90 → keyword method wins, LLM ignored"""
# "EXEC SQL" matches "DB操作" with confidence 0.95 (>= 0.90)
source = "EXEC SQL SELECT * FROM TABLE"
llm_result = {"category": "something_else", "confidence": 0.50}
result = compute_confidence(source, llm_result=llm_result)
assert result["method"] == "keyword"
assert result["source"] == "l1"
assert result["category"] == "DB操作"
assert result["confidence"] == 0.95
# ── 3. compute_confidence boundaries: 0.0, 0.69, 0.70, 0.71, 1.0 ──
def test_confidence_boundary_zero():
"""No keyword match, no LLM → category=unknown, confidence=0.0"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
result = compute_confidence(source, llm_result=None)
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["method"] == "none"
assert result["matches"] == []
def test_confidence_boundary_069():
"""LLM result with confidence 0.69 (below 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.69}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.69
assert result["method"] == "hybrid"
def test_confidence_boundary_070():
"""LLM result with confidence 0.70 (at 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.70}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.70
assert result["method"] == "hybrid"
def test_confidence_boundary_071():
"""LLM result with confidence 0.71 (above 0.70 boundary)"""
source = " MOVE 1 TO A."
llm_result = {"category": "custom_category", "confidence": 0.71}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "custom_category"
assert result["confidence"] == 0.71
assert result["method"] == "hybrid"
def test_confidence_boundary_max():
"""LLM result with confidence 1.0"""
source = " MOVE 1 TO A."
llm_result = {"category": "perfect", "confidence": 1.0}
result = compute_confidence(source, llm_result=llm_result)
assert result["category"] == "perfect"
assert result["confidence"] == 1.0
assert result["method"] == "hybrid"
# ── 4. Keyword source text with mixed case, extra whitespace, inline comments ──
def test_detect_keyword_mixed_case_whitespace_comments():
"""Source with mixed case, inline *> comments"""
source = """
IDENTIFICATION DIVISION.
ExEc Sql
SELECT * FROM TABLE
END-EXEC. *> inline comment
Call 'SUBPGM' *> some comment
Sort On Key WS-KEY.
"""
results = detect_keyword(source)
categories = {r[0] for r in results}
assert "DB操作" in categories # EXEC SQL (mixed case)
assert "子程序调用" in categories # CALL (mixed case)
assert "SORT" in categories # SORT ON KEY (mixed case)
# Verify matched keywords were found (function uppercases source)
matched_keywords = {r[2] for r in results}
assert "EXEC SQL" in matched_keywords
assert "CALL" in matched_keywords
assert any(r[0] == "SORT" for r in results) # SORT detected via regex
# ── 5. No keyword match and no LLM result → unknown ──
def test_detect_keyword_no_match():
"""Source with no known keywords → empty list"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
results = detect_keyword(source)
assert len(results) == 0
def test_compute_confidence_no_match_no_llm():
"""No keyword match and no LLM → category=unknown, confidence=0, method=none"""
source = " MOVE 1 TO A.\n ADD 1 TO B.\n STOP RUN."
result = compute_confidence(source, llm_result=None)
assert result["category"] == "unknown"
assert result["confidence"] == 0.0
assert result["method"] == "none"
assert result["source"] == "unknown"
assert result["matches"] == []
# ── Additional: verify L1_RULES via detect_keyword ──
def test_detect_keyword_all_rules():
"""Each L1_RULE category is detectable from a representative keyword"""
test_cases = [
("EXEC SQL", "DB操作"),
("CALL", "子程序调用"),
("IS INITIAL", "IS INITIAL"),
("SYSIN", "SYSIN"),
("ALPHABETIC", "编码转换"),
("DFHCOMMAREA", "online"),
("MAP", "online"),
("SORT SORT-FILE ON KEY", "SORT"),
("MERGE MERGE-FILE ON KEY", "MERGE"),
("WRITE AFTER", "编辑输出"),
("WRITE BEFORE", "编辑输出"),
("ORGANIZATION IS", "文件编成"),
("ALTERNATE RECORD KEY", "替代索引"),
]
for keyword, expected_category in test_cases:
source = f" {keyword} DUMMY."
results = detect_keyword(source)
categories = {r[0] for r in results}
assert expected_category in categories, \
f"Keyword '{keyword}' should trigger category '{expected_category}', got {categories}"