diff --git a/py/网络收集.py b/py/网络收集.py index 7532328..3bad929 100644 --- a/py/网络收集.py +++ b/py/网络收集.py @@ -437,8 +437,12 @@ def merge_and_filter(): total_lines = len(lines) - # 读取上次保存的失败 IP 文件 + # 检查并创建失败 IP 文件如果不存在 failed_ips_file_path = "failed_ips.txt" + if not os.path.exists(failed_ips_file_path): + open(failed_ips_file_path, 'w', encoding='utf-8').close() + + # 读取上次保存的失败 IP 文件 failed_ips = set() if os.path.exists(failed_ips_file_path): with open(failed_ips_file_path, 'r', encoding='utf-8') as failed_file: @@ -496,6 +500,7 @@ def merge_and_filter(): + def remove_duplicates(input_file, output_file): # 用于存储已经遇到的URL和包含genre的行 seen_urls = set()