From 8b2a97b0f668dafbdfd4c84daaec38888c82d3a0 Mon Sep 17 00:00:00 2001
From: frxz751113 <156018267+frxz751113@users.noreply.github.com>
Date: Wed, 25 Sep 2024 00:10:07 +0800
Subject: [PATCH] =?UTF-8?q?Update=20and=20rename=20=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E6=A0=A1=E5=AF=B9.py=20to=20IP=E6=AF=94=E5=AF=B9.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 py/IP比对.py   | 47 +++++++++++++++++++++++++++++++++
 py/文件校对.py | 70 --------------------------------------------------
 2 files changed, 47 insertions(+), 70 deletions(-)
 create mode 100644 py/IP比对.py
 delete mode 100644 py/文件校对.py

diff --git a/py/IP比对.py b/py/IP比对.py
new file mode 100644
index 0000000..9f7eaac
--- /dev/null
+++ b/py/IP比对.py
@@ -0,0 +1,47 @@
+import re
+
+def compare_and_write_uniques(file1_path, file2_path, output_path):
+    # 正则表达式模式，用于匹配IP地址和域名
+    ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
+    domain_pattern = r'\b(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\b'
+
+    # 用于存储第二个文件中的IP地址和域名集合
+    file2_ips_and_domains = set()
+
+    # 读取第二个文件，提取IP地址和域名并添加到集合
+    with open(file2_path, 'r', encoding='utf-8') as file2:
+        content2 = file2.read()
+        # 查找IP地址并添加到集合
+        ips = re.findall(ip_pattern, content2)
+        file2_ips_and_domains.update(ips)
+        # 查找域名并添加到集合
+        domains = re.findall(domain_pattern, content2)
+        file2_ips_and_domains.update(domains)
+
+    # 用于存储要写入新文件的独特IP地址和域名
+    unique_ips_and_domains = set()
+
+    # 读取第一个文件，检查IP地址和域名是否在第二个文件集合中不存在
+    with open(file1_path, 'r', encoding='utf-8') as file1:
+        content1 = file1.read()
+        # 查找IP地址
+        ips_in_file1 = re.findall(ip_pattern, content1)
+        for ip in ips_in_file1:
+            if ip not in file2_ips_and_domains:
+                unique_ips_and_domains.add(ip)
+        # 查找域名
+        domains_in_file1 = re.findall(domain_pattern, content1)
+        for domain in domains_in_file1:
+            if domain not in file2_ips_and
+                unique_ips_and_domains.add(domain)
+
+    # 将独特的IP地址和域名写入新文件
+    with open(output_path, 'w', encoding='utf-8') as output_file:
+        for item in unique_ips_and_domains:
+            output_file.write(item + '\n')
+
+# 示例用法
+file1_path = '无效IP.txt'
+file2_path = '网络收集.txt'
+output_path = '无效IP.txt'
+compare_and_write_uniques(file1_path, file2_path, output_path)
diff --git a/py/文件校对.py b/py/文件校对.py
deleted file mode 100644
index abece3c..0000000
--- a/py/文件校对.py
+++ /dev/null
@@ -1,70 +0,0 @@
-def extract_unique_lines(file1_path, file2_path, output_path):
-    # 用于存储两个文件中所有行的集合
-    all_lines_set = set()
-    # 用于存储两个文件中重复行的集合
-    duplicate_lines_set = set()
-
-    # 读取第一个文件的每一行，添加到集合中
-    with open(file1_path, 'r', encoding='utf-8') as file1:
-        for line in file1:
-            line = line.strip()
-            all_lines_set.add(line)
-            if line in duplicate_lines_set:
-                continue
-            duplicate_lines_set.add(line)
-
-    # 读取第二个文件的每一行，检查是否在第一个文件中出现过，处理后添加到相应集合
-    with open(file2_path, 'r', encoding='utf-8') as file2:
-        for line in file2:
-            line = line.strip()
-            if line in all_lines_set:
-                duplicate_lines_set.add(line)
-            else:
-                all_lines_set.add(line)
-
-    # 找到不重复的行
-    unique_lines = all_lines_set - duplicate_lines_set
-
-    # 将不重复的行写入新文件
-    with open(output_path, 'w', encoding='utf-8') as output_file:
-        for line in unique_lines:
-            output_file.write(line + '\n')
-
-# 示例用法
-file1_path = '无效IP.txt'
-file2_path = '网络收集.txt'
-output_path = '无效IP.txt'
-extract_unique_lines(file1_path, file2_path, output_path)
-
-import re
-
-def extract_ips_and_domains(input_file_path, output_file_path):
-    # 正则表达式模式，用于匹配IP地址和域名
-    ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
-    domain_pattern = r'\b(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\b'
-
-    # 集合用于存储提取到的IP地址和域名，确保唯一性
-    ips_and_domains = set()
-
-    # 打开输入文件并读取内容
-    with open(input_file_path, 'r', encoding='utf-8') as input_file:
-        content = input_file.read()
-
-        # 查找IP地址并添加到集合
-        ips = re.findall(ip_pattern, content)
-        ips_and_domains.update(ips)
-
-        # 查找域名并添加到集合
-        domains = re.findall(domain_pattern, content)
-        ips_and_domains.update(domains)
-
-    # 打开输出文件并写入提取到的IP地址和域名
-    with open(output_file_path, 'w', encoding='utf-8') as output_file:
-        for item in ips_and_domains:
-            output_file.write(item + '\n')
-
-# 示例用法
-input_file_path = '无效IP.txt'
-output_file_path = '无效IP.txt'
-extract_ips_and_domains(input_file_path, output_file_path)
-