Files
aurak/clean_translations.py
T
Developer 0a9588abb7 feat: implement QuestionBank CRUD with pagination and template query
- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
2026-04-23 17:19:11 +08:00

88 lines
3.6 KiB
Python

import sys
import re
def clean_translations(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Split into blocks
blocks = re.split(r'(\s+\w+: \{)', content)
# Header is blocks[0]
# Then blocks[1] is " zh: {", blocks[2] is content of zh
# blocks[3] is " en: {", blocks[4] is content of en
# blocks[5] is " ja: {", blocks[6] is content of ja
header = blocks[0]
processed_blocks = []
# Missing keys to ensure (with basic English values)
missing_keys = [
"kbSettingsSaved", "failedToSaveSettings", "actionFailed", "userAddedToOrganization",
"featureUpdated", "roleTenantAdmin", "roleRegularUser", "creatingRegularUser",
"editUserRole", "targetRole", "editCategory", "totalTenants", "systemUsers",
"systemHealth", "operational", "orgManagement", "globalTenantControl",
"newTenant", "domainOptional", "saveChanges", "modelConfiguration",
"defaultLLMModel", "selectLLM", "selectEmbedding", "rerankModel", "none",
"indexingChunkingConfig", "chatHyperparameters", "temperature", "precise",
"creative", "maxResponseTokens", "retrievalSearchSettings", "topK",
"similarityThreshold", "enableHybridSearch", "hybridSearchDesc", "hybridWeight",
"pureText", "pureVector", "enableQueryExpansion", "queryExpansionDesc",
"enableHyDE", "hydeDesc", "enableReranking", "rerankingDesc", "broad",
"strict", "maxInput", "dimensions", "defaultBadge", "dims", "ctx",
"baseApi", "configured", "groupUpdated", "groupDeleted", "groupCreated",
"navCatalog", "allDocuments", "categories", "uncategorizedFiles", "category",
"statusReadyDesc", "statusIndexingDesc", "selectCategory", "noneUncategorized",
"previous", "next", "createCategory", "categoryDesc", "categoryName",
"createCategoryBtn", "newGroup", "noKnowledgeGroups", "createGroupDesc",
"noDescriptionProvided", "browseManageFiles", "filterGroupFiles"
]
for i in range(1, len(blocks), 2):
block_header = blocks[i]
block_content = blocks[i+1]
# Parse keys and values
lines = block_content.split('\n')
keys_seen = set()
new_lines = []
# Regex to match "key: value," or "key: `value`,"
# Support multiline strings too? Let's be careful.
# Most are single line: " key: \"value\","
for line in lines:
match = re.search(r'^\s+([a-zA-Z0-9_-]+):', line)
if match:
key = match.group(1)
if key in keys_seen:
continue # Skip duplicate
keys_seen.add(key)
new_lines.append(line)
# Add missing keys if they are not in keys_seen
# Remove trailing " }," or "}," to append
if new_lines and re.search(r'^\s+},?$', new_lines[-1]):
last_line = new_lines.pop()
elif new_lines and re.search(r'^\s+},?$', new_lines[-2]): # Check if last is empty
last_line = new_lines.pop(-2)
else:
last_line = " },"
for key in missing_keys:
if key not in keys_seen:
# Add a descriptive placeholder or common translation
val = f'"{key}"' # Default to key name
new_lines.append(f' {key}: {val},')
new_lines.append(last_line)
processed_blocks.append(block_header + '\n'.join(new_lines))
new_content = header + ''.join(processed_blocks)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
if __name__ == "__main__":
clean_translations(sys.argv[1])