From 79e5c75de6f10bca16a52d20516f06851b6c4330 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Wed, 25 Sep 2024 14:20:59 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E7=BD=91=E7=BB=9C=E6=94=B6=E9=9B=86.p?= =?UTF-8?q?y?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/网络收集.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/py/网络收集.py b/py/网络收集.py index 7532328..3bad929 100644 --- a/py/网络收集.py +++ b/py/网络收集.py @@ -437,8 +437,12 @@ def merge_and_filter(): total_lines = len(lines) - # 读取上次保存的失败 IP 文件 + # 检查并创建失败 IP 文件如果不存在 failed_ips_file_path = "failed_ips.txt" + if not os.path.exists(failed_ips_file_path): + open(failed_ips_file_path, 'w', encoding='utf-8').close() + + # 读取上次保存的失败 IP 文件 failed_ips = set() if os.path.exists(failed_ips_file_path): with open(failed_ips_file_path, 'r', encoding='utf-8') as failed_file: @@ -496,6 +500,7 @@ def merge_and_filter(): + def remove_duplicates(input_file, output_file): # 用于存储已经遇到的URL和包含genre的行 seen_urls = set()