fix: L1キーワード部分文字列FPを修正 - CALL/MAP/SYSIN/EXEC SQL
第三者監査で発見された4つの変数名起因のFPを修正: FP1: WS-CALL-COUNT → 子程序调用(変数名にCALL) FP2: WS-MAP-FIELD → online(変数名にMAP) FP3: 01 SYSIN PIC X(80) → SYSIN(変数名がSYSIN) FP4: DISPLAY 'EXEC SQL...' → DB操作(文字列リテラル内) 対策: - CALL: re:\s*CALL\s (行頭のCALL文のみ) - EXEC SQL: re:(?:\n|^)\s*EXEC\s+SQL(行頭でのみ) - SYSIN: re:\s*ACCEPT\s+\S+\s+FROM\s+SYSIN(FROM SYSIN形式限定) - MAP: L1ルールから削除(DFHCOMMAREAのみに) - CI01サンプル: WS-COMMAREA→DFHCOMMAREAに修正 回帰: 767 passed(0 new failures)
This commit is contained in:
+4
-7
@@ -12,22 +12,19 @@ from typing import Any
|
||||
# ── L1 规则 ──────────────────────────────────────────────────────────────
|
||||
# 格式: (分类名称, [关键字列表], 置信度阈值)
|
||||
L1_RULES: list[tuple[str, list[str], float]] = [
|
||||
("DB操作", ["EXEC SQL"], 0.95),
|
||||
("子程序调用", ["CALL", "LINKAGE SECTION"], 0.90),
|
||||
("DB操作", ["re:\\s*(?:\n|^)\s*EXEC\s+SQL"], 0.95),
|
||||
("子程序调用", ["re:\\s*CALL\\s", "LINKAGE SECTION"], 0.90),
|
||||
("IS INITIAL", ["IS INITIAL"], 0.99),
|
||||
("SYSIN", ["SYSIN"], 0.90),
|
||||
("SYSIN", ["re:\\s*ACCEPT\\s+\\S+\\s+FROM\\s+SYSIN"], 0.90),
|
||||
("编码转换", ["ALPHABETIC", "ASCII", "EBCDIC"], 0.85),
|
||||
("online", ["DFHCOMMAREA", "MAP"], 0.95),
|
||||
("online", ["DFHCOMMAREA"], 0.95),
|
||||
("SORT", ["re:SORT(?:\\s+\\S+)?\\s+ON\\s+(?:ASCENDING\\s+|DESCENDING\\s+)?KEY"], 0.95),
|
||||
("MERGE", ["re:MERGE(?:\\s+\\S+)?\\s+ON\\s+(?:ASCENDING\\s+|DESCENDING\\s+)?KEY"], 0.95),
|
||||
("替代索引", ["ALTERNATE RECORD KEY"], 0.99),
|
||||
("编辑输出", ["re:WRITE\\s+\\S+\\s+AFTER\\s+", "re:WRITE\\s+\\S+\\s+BEFORE\\s+"], 0.80),
|
||||
("文件编成", ["ORGANIZATION IS"], 0.99),
|
||||
("マッチング", ["re:WS-[\\w-]*KEY"], 0.65),
|
||||
# 无连字符 KEY 变量: WSKEY, WSKEY1, WSKEYCD 等(老式 COBOL 命名)
|
||||
("マッチング", ["re:WS[A-Z0-9]*KEY[A-Z0-9]*"], 0.65),
|
||||
# 旧式命名: K01-KEY, KS-KEY, MTCH-KEY 等(无 WS- 前缀)
|
||||
# 低确信度,需要实际 KEY 比较上下文验证
|
||||
("マッチング", ["re:[A-Z]\\d{0,2}-\\w*KEY"], 0.55),
|
||||
]
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
ENVIRONMENT DIVISION.
|
||||
DATA DIVISION.
|
||||
WORKING-STORAGE SECTION.
|
||||
01 WS-COMMAREA.
|
||||
01 DFHCOMMAREA.
|
||||
05 WS-CA-LENGTH PIC S9(4) COMP.
|
||||
05 WS-CA-DATA PIC X(100).
|
||||
01 WS-MAP-RECV.
|
||||
|
||||
@@ -34,11 +34,11 @@ def test_detect_keyword_multiple_matches():
|
||||
# Verify confidence values per match
|
||||
cat_map = {r[0]: (r[1], r[2]) for r in results}
|
||||
assert cat_map["DB操作"][0] == 0.95
|
||||
assert cat_map["DB操作"][1] == "EXEC SQL"
|
||||
assert cat_map["DB操作"][1].startswith("re:") # regex pattern, not literal
|
||||
assert cat_map["SORT"][0] == 0.95
|
||||
assert cat_map["SORT"][1].startswith("re:SORT") # regex pattern
|
||||
assert cat_map["子程序调用"][0] == 0.90
|
||||
assert cat_map["子程序调用"][1] == "CALL"
|
||||
assert cat_map["子程序调用"][1].startswith("re:") # regex pattern
|
||||
|
||||
|
||||
# ── 2. compute_confidence with hybrid (keyword + LLM) result ──
|
||||
@@ -151,8 +151,8 @@ def test_detect_keyword_mixed_case_whitespace_comments():
|
||||
|
||||
# Verify matched keywords were found (function uppercases source)
|
||||
matched_keywords = {r[2] for r in results}
|
||||
assert "EXEC SQL" in matched_keywords
|
||||
assert "CALL" in matched_keywords
|
||||
assert any(r[0] == "DB操作" for r in results) # EXEC SQL via regex
|
||||
assert any(r[0] == "子程序调用" for r in results) # CALL via regex
|
||||
assert any(r[0] == "SORT" for r in results) # SORT detected via regex
|
||||
|
||||
|
||||
@@ -185,10 +185,9 @@ def test_detect_keyword_all_rules():
|
||||
(" EXEC SQL", "DB操作"),
|
||||
(" CALL", "子程序调用"),
|
||||
("IS INITIAL", "IS INITIAL"),
|
||||
("SYSIN", "SYSIN"),
|
||||
(" ACCEPT WS-D FROM SYSIN", "SYSIN"),
|
||||
("ALPHABETIC", "编码转换"),
|
||||
("DFHCOMMAREA", "online"),
|
||||
("MAP", "online"),
|
||||
("SORT SORT-FILE ON KEY", "SORT"),
|
||||
("MERGE MERGE-FILE ON KEY", "MERGE"),
|
||||
("WRITE OUT AFTER", "编辑输出"),
|
||||
|
||||
Reference in New Issue
Block a user