Update 测绘站采集.py
This commit is contained in:
+4
-4
@@ -131,9 +131,9 @@ def main():
|
|||||||
print(f"搜索失败: {str(e)}")
|
print(f"搜索失败: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 解析搜索结果
|
# 解析搜索结果,修改正则表达式以匹配IP和域名
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
pattern = re.compile(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+")
|
pattern = re.compile(r"http://(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\w[\w.-]*\w):\d+")
|
||||||
found_urls = set(pattern.findall(html))
|
found_urls = set(pattern.findall(html))
|
||||||
print(f"找到{len(found_urls)}个有效地址")
|
print(f"找到{len(found_urls)}个有效地址")
|
||||||
|
|
||||||
@@ -194,7 +194,7 @@ import sys
|
|||||||
detected_ips = {}
|
detected_ips = {}
|
||||||
|
|
||||||
def get_ip_key(url):
|
def get_ip_key(url):
|
||||||
"""从URL中提取IP地址,并构造一个唯一的键"""
|
"""从URL中提取IP地址或域名,并构造一个唯一的键"""
|
||||||
start = url.find('://') + 3
|
start = url.find('://') + 3
|
||||||
end = url.find('/', start)
|
end = url.find('/', start)
|
||||||
if end == -1:
|
if end == -1:
|
||||||
@@ -228,7 +228,7 @@ for filename in os.listdir(folder_path):
|
|||||||
url = url.strip()
|
url = url.strip()
|
||||||
ip_key = get_ip_key(url)
|
ip_key = get_ip_key(url)
|
||||||
|
|
||||||
# 检查IP是否已经被检测过
|
# 检查IP或域名是否已经被检测过
|
||||||
if ip_key in detected_ips:
|
if ip_key in detected_ips:
|
||||||
# 如果之前检测成功,则写入该行
|
# 如果之前检测成功,则写入该行
|
||||||
if detected_ips[ip_key]['status'] == 'ok':
|
if detected_ips[ip_key]['status'] == 'ok':
|
||||||
|
|||||||
Reference in New Issue
Block a user