Update 网络收集.py
This commit is contained in:
+6
-1
@@ -437,8 +437,12 @@ def merge_and_filter():
|
|||||||
|
|
||||||
total_lines = len(lines)
|
total_lines = len(lines)
|
||||||
|
|
||||||
# 读取上次保存的失败 IP 文件
|
# 检查并创建失败 IP 文件如果不存在
|
||||||
failed_ips_file_path = "failed_ips.txt"
|
failed_ips_file_path = "failed_ips.txt"
|
||||||
|
if not os.path.exists(failed_ips_file_path):
|
||||||
|
open(failed_ips_file_path, 'w', encoding='utf-8').close()
|
||||||
|
|
||||||
|
# 读取上次保存的失败 IP 文件
|
||||||
failed_ips = set()
|
failed_ips = set()
|
||||||
if os.path.exists(failed_ips_file_path):
|
if os.path.exists(failed_ips_file_path):
|
||||||
with open(failed_ips_file_path, 'r', encoding='utf-8') as failed_file:
|
with open(failed_ips_file_path, 'r', encoding='utf-8') as failed_file:
|
||||||
@@ -496,6 +500,7 @@ def merge_and_filter():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def remove_duplicates(input_file, output_file):
|
def remove_duplicates(input_file, output_file):
|
||||||
# 用于存储已经遇到的URL和包含genre的行
|
# 用于存储已经遇到的URL和包含genre的行
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
|
|||||||
Reference in New Issue
Block a user