From 0ee148bc793748bb99bf9396179424670989673a Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Tue, 6 May 2025 08:09:40 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=B5=8B=E7=BB=98=E7=AB=99=E9=87=87?= =?UTF-8?q?=E9=9B=86.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/测绘站采集.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/py/测绘站采集.py b/py/测绘站采集.py index f75d372..245023d 100644 --- a/py/测绘站采集.py +++ b/py/测绘站采集.py @@ -131,9 +131,9 @@ def main(): print(f"搜索失败: {str(e)}") continue - # 解析搜索结果 + # 解析搜索结果,修改正则表达式以匹配IP和域名 soup = BeautifulSoup(html, 'html.parser') - pattern = re.compile(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+") + pattern = re.compile(r"http://(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\w[\w.-]*\w):\d+") found_urls = set(pattern.findall(html)) print(f"找到{len(found_urls)}个有效地址") @@ -194,7 +194,7 @@ import sys detected_ips = {} def get_ip_key(url): - """从URL中提取IP地址,并构造一个唯一的键""" + """从URL中提取IP地址或域名,并构造一个唯一的键""" start = url.find('://') + 3 end = url.find('/', start) if end == -1: @@ -228,7 +228,7 @@ for filename in os.listdir(folder_path): url = url.strip() ip_key = get_ip_key(url) - # 检查IP是否已经被检测过 + # 检查IP或域名是否已经被检测过 if ip_key in detected_ips: # 如果之前检测成功,则写入该行 if detected_ips[ip_key]['status'] == 'ok':