From 491a84b6a5d40b29de07772a99a947aae3640bfb Mon Sep 17 00:00:00 2001
From: frxz751113 <156018267+frxz751113@users.noreply.github.com>
Date: Mon, 26 Aug 2024 11:14:57 +0800
Subject: [PATCH] =?UTF-8?q?Update=20=E6=94=B6=E9=9B=86.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 py/收集.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/py/收集.py b/py/收集.py
index 9eba892..0044bf9 100644
--- a/py/收集.py
+++ b/py/收集.py
@@ -66,14 +66,21 @@ merge_txt_files(urls)
 
 
 # 打开文本文件并读取内容
-def process_file(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        lines = file.readlines()
+def process_file(file_path, encodings=['utf-8', 'gbk', 'latin1']):
+    for encoding in encodings:
+        try:
+            with open(file_path, 'r', encoding=encoding) as file:
+                lines = file.readlines()
+            # 如果没有异常发生，说明找到了正确的编码
+            break
+        except UnicodeDecodeError:
+            # 如果出现解码错误，尝试下一种编码
+            continue
     # 处理每一行，去除每一行中第一个$及其后的所有内容
-    processed_lines = [line.split('$', 1)[0] for line in lines]
-    # 将处理后的内容写回到文件
-    with open(file_path, 'w', encoding='utf-8') as file:
-        file.write(''.join(processed_lines))
+    processed_lines = [line.split('$', 1)[0].rstrip('\n') + '\n' for line in lines]
+    # 将处理后的内容写回到文件，使用找到的正确编码
+    with open(file_path, 'w', encoding=encoding) as file:
+        file.writelines(processed_lines)
 # 调用函数并传入文件路径
 file_path = '汇总.txt'  # 替换为你的文件路径
 process_file(file_path)