From 36158e4621493c50267448f6dc6228f8bed2a36d Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:16:09 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E9=85=92=E5=BA=97=E6=BA=90.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/酒店源.py | 79 +++++++++++++++++++--------------------------------- 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/py/酒店源.py b/py/酒店源.py index a440982..ef6216b 100644 --- a/py/酒店源.py +++ b/py/酒店源.py @@ -28,72 +28,51 @@ from translate import Translator # 导入Translator类,用于文本翻译 ###################################################################################################################### ###################################################################################################################### ###########################################################ZHGX采集#################################################### -###################################################################################################################### -###################################################################################################################### -import requests - +# 创建一个会话对象 +session = requests.Session() +def is_url_accessible(url): + try: + # 使用会话对象发送GET请求,设置超时时间为3秒 + response = session.get(url, timeout=3) + if 200 <= response.status_code <= 401: + return url + except requests.exceptions.RequestException: + pass + return None +# 异步检查URL是否可访问 +def check_urls(url_list, fixed_string): + valid_urls = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(is_url_accessible, url + fixed_string): url for url in url_list} + for future in concurrent.futures.as_completed(futures): + result = future.result() + if result: + valid_urls.append(result) + return valid_urls urls = [ "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJndWFuZ2Rvbmci", #广东 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiI%3D", #河南 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiIgJiYgcG9ydD0iODA5MCI=", #河南8090 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D", #河北 ] - - - -# 定义一个函数来检查URL是否可访问 -def is_url_accessible(url): - try: - # 发送GET请求,设置超时时间为3秒 - response = requests.get(url, timeout=3) - # 如果响应状态码在200到401之间(包括200和401),则认为URL可访问 - if 200 <= response.status_code <= 401: - return url - except requests.exceptions.RequestException: - # 如果请求过程中出现异常,不做任何处理,直接跳过 - pass - return None - -# 创建一个空列表用于存储结果 -results = [] - -# 固定的字符串,你希望添加到每个URL的末尾 +# 固定的字符串 fixed_string = "/ZHGXTV/Public/json/live_interface.txt" +# 获取所有有效URL +results = [] for url in urls: - # 发送GET请求获取URL的内容 - response = requests.get(url) - # 获取响应的文本内容 + response = session.get(url) page_content = response.text - - # 查找所有符合指定格式的网址 - pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888 - # 使用正则表达式在页面内容中查找所有符合格式的URL - urls_all = re.findall(pattern, page_content) - # 去重得到唯一的URL列表 + urls_all = re.findall(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+", page_content) unique_urls = set(urls_all) - - valid_urls = [] - # 多线程获取可用url - with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: - futures = [] - for original_url in unique_urls: - # 在原始URL后面添加固定字符串 - new_url = original_url + fixed_string - # 提交任务,检查每个新构造的URL是否可访问 - futures.append(executor.submit(is_url_accessible, new_url)) - for future in concurrent.futures.as_completed(futures): - result = future.result() - if result: - # 如果URL可访问,将其添加到有效URL列表中 - valid_urls.append(result) - - # 将找到的有效URL添加到结果列表中 + valid_urls = check_urls(unique_urls, fixed_string) results.extend(valid_urls) # 打印所有有效的URL for url in results: print(url) +# 关闭会话对象 +session.close() # 遍历网址列表,获取JSON文件并解析 for url in valid_urls: try: