Files
aurak/extract_cjk.py
Developer 0a9588abb7 feat: implement QuestionBank CRUD with pagination and template query
- Add pagination support to findAll (page, limit query params)
- Add findByTemplateId method to service
- Add GET /by-template/:templateId endpoint to controller
- Service already includes CRUD for QuestionBank and QuestionBankItem
2026-04-23 17:19:11 +08:00

32 lines
1.3 KiB
Python

import os
import re
import json
directories = ['d:/workspace/AuraK/web', 'd:/workspace/AuraK/server/src']
exclude_dirs = ['node_modules', '.git', 'dist', '.next']
extensions = ['.ts', '.tsx', '.js', '.jsx']
cjk_pattern = re.compile(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]+')
cjk_lines = {}
for d in directories:
for root, dirs, files in os.walk(d):
dirs[:] = [dir for dir in dirs if dir not in exclude_dirs]
for file in files:
if any(file.endswith(ext) for ext in extensions):
file_path = os.path.join(root, file)
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
for i, line in enumerate(lines):
if cjk_pattern.search(line):
if file_path not in cjk_lines:
cjk_lines[file_path] = []
cjk_lines[file_path].append({"line": i + 1, "text": line.strip()})
except Exception as e:
print(f"Error reading {file_path}: {e}")
with open('cjk_extract.json', 'w', encoding='utf-8') as f:
json.dump(cjk_lines, f, ensure_ascii=False, indent=2)
print("Extracted to cjk_extract.json")