From 4079c8c70254c0ae533061565988d87158e1d746 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Tue, 3 Sep 2024 17:46:49 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=B5=8B=E8=AF=95.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/测试.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/py/测试.py b/py/测试.py index c3ab35e..5aad459 100644 --- a/py/测试.py +++ b/py/测试.py @@ -84,7 +84,9 @@ for url in urls: urls_all = re.findall(pattern, page_content) # 去重得到唯一的URL列表 unique_urls = set(urls_all) - + # 排除包含特定子字符串的域名 + unique_urls = {u for u in unique_urls if "fofa.info" not in u} + valid_urls = [] # 多线程获取可用url with ThreadPoolExecutor(max_workers=100) as executor: