Update 收集.py

This commit is contained in:
frxz751113
2024-08-26 11:14:57 +08:00
committed by GitHub
parent e6f918c9cf
commit 491a84b6a5
+13 -6
View File
@@ -66,14 +66,21 @@ merge_txt_files(urls)
# 打开文本文件并读取内容 # 打开文本文件并读取内容
def process_file(file_path): def process_file(file_path, encodings=['utf-8', 'gbk', 'latin1']):
with open(file_path, 'r', encoding='utf-8') as file: for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as file:
lines = file.readlines() lines = file.readlines()
# 如果没有异常发生,说明找到了正确的编码
break
except UnicodeDecodeError:
# 如果出现解码错误,尝试下一种编码
continue
# 处理每一行,去除每一行中第一个$及其后的所有内容 # 处理每一行,去除每一行中第一个$及其后的所有内容
processed_lines = [line.split('$', 1)[0] for line in lines] processed_lines = [line.split('$', 1)[0].rstrip('\n') + '\n' for line in lines]
# 将处理后的内容写回到文件 # 将处理后的内容写回到文件,使用找到的正确编码
with open(file_path, 'w', encoding='utf-8') as file: with open(file_path, 'w', encoding=encoding) as file:
file.write(''.join(processed_lines)) file.writelines(processed_lines)
# 调用函数并传入文件路径 # 调用函数并传入文件路径
file_path = '汇总.txt' # 替换为你的文件路径 file_path = '汇总.txt' # 替换为你的文件路径
process_file(file_path) process_file(file_path)