From 491a84b6a5d40b29de07772a99a947aae3640bfb Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:14:57 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=94=B6=E9=9B=86.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/收集.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/py/收集.py b/py/收集.py index 9eba892..0044bf9 100644 --- a/py/收集.py +++ b/py/收集.py @@ -66,14 +66,21 @@ merge_txt_files(urls) # 打开文本文件并读取内容 -def process_file(file_path): - with open(file_path, 'r', encoding='utf-8') as file: - lines = file.readlines() +def process_file(file_path, encodings=['utf-8', 'gbk', 'latin1']): + for encoding in encodings: + try: + with open(file_path, 'r', encoding=encoding) as file: + lines = file.readlines() + # 如果没有异常发生,说明找到了正确的编码 + break + except UnicodeDecodeError: + # 如果出现解码错误,尝试下一种编码 + continue # 处理每一行,去除每一行中第一个$及其后的所有内容 - processed_lines = [line.split('$', 1)[0] for line in lines] - # 将处理后的内容写回到文件 - with open(file_path, 'w', encoding='utf-8') as file: - file.write(''.join(processed_lines)) + processed_lines = [line.split('$', 1)[0].rstrip('\n') + '\n' for line in lines] + # 将处理后的内容写回到文件,使用找到的正确编码 + with open(file_path, 'w', encoding=encoding) as file: + file.writelines(processed_lines) # 调用函数并传入文件路径 file_path = '汇总.txt' # 替换为你的文件路径 process_file(file_path)