From b325ab75bfa6c5e882d642ad05d756f59eb77d82 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:26:05 +0800 Subject: [PATCH] =?UTF-8?q?Update=20IP=E6=AF=94=E5=AF=B9.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/IP比对.py | 98 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/py/IP比对.py b/py/IP比对.py index 0907184..ec925eb 100644 --- a/py/IP比对.py +++ b/py/IP比对.py @@ -1,47 +1,69 @@ -import re +def extract_unique_lines(file1_path, file2_path, output_path): + # 用于存储两个文件中所有行的集合 + all_lines_set = set() + # 用于存储两个文件中重复行的集合 + duplicate_lines_set = set() -def compare_and_write_uniques(file1_path, file2_path, output_path): - # 正则表达式模式,用于匹配IP地址和域名 - ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' - domain_pattern = r'\b(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\b' - - # 用于存储第二个文件中的IP地址和域名集合 - file2_ips_and_domains = set() - - # 读取第二个文件,提取IP地址和域名并添加到集合 - with open(file2_path, 'r', encoding='utf-8') as file2: - content2 = file2.read() - # 查找IP地址并添加到集合 - ips = re.findall(ip_pattern, content2) - file2_ips_and_domains.update(ips) - # 查找域名并添加到集合 - domains = re.findall(domain_pattern, content2) - file2_ips_and_domains.update(domains) - - # 用于存储要写入新文件的独特IP地址和域名 - unique_ips_and_domains = set() - - # 读取第一个文件,检查IP地址和域名是否在第二个文件集合中不存在 + # 读取第一个文件的每一行,添加到集合中 with open(file1_path, 'r', encoding='utf-8') as file1: - content1 = file1.read() - # 查找IP地址 - ips_in_file1 = re.findall(ip_pattern, content1) - for ip in ips_in_file1: - if ip not in file2_ips_and_domains: - unique_ips_and_domains.add(ip) - # 查找域名 - domains_in_file1 = re.findall(domain_pattern, content1) - for domain in domains_in_file1: - if domain not in file2_ips_and_domains: - unique_ips_and_domains.add(domain) + for line in file1: + line = line.strip() + all_lines_set.add(line) + if line in duplicate_lines_set: + continue + duplicate_lines_set.add(line) - # 将独特的IP地址和域名写入新文件 + # 读取第二个文件的每一行,检查是否在第一个文件中出现过,处理后添加到相应集合 + with open(file2_path, 'r', encoding='utf-8') as file2: + for line in file2: + line = line.strip() + if line in all_lines_set: + duplicate_lines_set.add(line) + else: + all_lines_set.add(line) + + # 找到不重复的行 + unique_lines = all_lines_set - duplicate_lines_set + + # 将不重复的行写入新文件 with open(output_path, 'w', encoding='utf-8') as output_file: - for item in unique_ips_and_domains: - output_file.write(item + '\n') + for line in unique_lines: + output_file.write(line + '\n') # 示例用法 file1_path = '无效IP.txt' file2_path = '网络收集.txt' output_path = '无效IP.txt' -compare_and_write_uniques(file1_path, file2_path, output_path) +extract_unique_lines(file1_path, file2_path, output_path) + +import re + +def extract_ips_and_domains(input_file_path, output_file_path): + # 正则表达式模式,用于匹配IP地址和域名 + ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' + domain_pattern = r'\b(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\b' + + # 集合用于存储提取到的IP地址和域名,确保唯一性 + ips_and_domains = set() + + # 打开输入文件并读取内容 + with open(input_file_path, 'r', encoding='utf-8') as input_file: + content = input_file.read() + + # 查找IP地址并添加到集合 + ips = re.findall(ip_pattern, content) + ips_and_domains.update(ips) + + # 查找域名并添加到集合 + domains = re.findall(domain_pattern, content) + ips_and_domains.update(domains) + + # 打开输出文件并写入提取到的IP地址和域名 + with open(output_file_path, 'w', encoding='utf-8') as output_file: + for item in ips_and_domains: + output_file.write(item + '\n') + +# 示例用法 +input_file_path = '无效IP.txt' +output_file_path = '无效IP.txt' +extract_ips_and_domains(input_file_path, output_file_path)