Update 酒店源.py

This commit is contained in:
frxz751113
2024-09-03 10:16:09 +08:00
committed by GitHub
parent 6ba2bde533
commit 36158e4621
+29 -50
View File
@@ -28,72 +28,51 @@ from translate import Translator # 导入Translator类,用于文本翻译
###################################################################################################################### ######################################################################################################################
###################################################################################################################### ######################################################################################################################
###########################################################ZHGX采集#################################################### ###########################################################ZHGX采集####################################################
###################################################################################################################### # 创建一个会话对象
###################################################################################################################### session = requests.Session()
import requests def is_url_accessible(url):
try:
# 使用会话对象发送GET请求,设置超时时间为3秒
response = session.get(url, timeout=3)
if 200 <= response.status_code <= 401:
return url
except requests.exceptions.RequestException:
pass
return None
# 异步检查URL是否可访问
def check_urls(url_list, fixed_string):
valid_urls = []
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = {executor.submit(is_url_accessible, url + fixed_string): url for url in url_list}
for future in concurrent.futures.as_completed(futures):
result = future.result()
if result:
valid_urls.append(result)
return valid_urls
urls = [ urls = [
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJndWFuZ2Rvbmci", #广东 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJndWFuZ2Rvbmci", #广东
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiI%3D", #河南 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiI%3D", #河南
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiIgJiYgcG9ydD0iODA5MCI=", #河南8090 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZW5hbiIgJiYgcG9ydD0iODA5MCI=", #河南8090
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D", #河北 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D", #河北
] ]
# 固定的字符串
# 定义一个函数来检查URL是否可访问
def is_url_accessible(url):
try:
# 发送GET请求,设置超时时间为3秒
response = requests.get(url, timeout=3)
# 如果响应状态码在200到401之间(包括200和401),则认为URL可访问
if 200 <= response.status_code <= 401:
return url
except requests.exceptions.RequestException:
# 如果请求过程中出现异常,不做任何处理,直接跳过
pass
return None
# 创建一个空列表用于存储结果
results = []
# 固定的字符串,你希望添加到每个URL的末尾
fixed_string = "/ZHGXTV/Public/json/live_interface.txt" fixed_string = "/ZHGXTV/Public/json/live_interface.txt"
# 获取所有有效URL
results = []
for url in urls: for url in urls:
# 发送GET请求获取URL的内容 response = session.get(url)
response = requests.get(url)
# 获取响应的文本内容
page_content = response.text page_content = response.text
urls_all = re.findall(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+", page_content)
# 查找所有符合指定格式的网址
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888
# 使用正则表达式在页面内容中查找所有符合格式的URL
urls_all = re.findall(pattern, page_content)
# 去重得到唯一的URL列表
unique_urls = set(urls_all) unique_urls = set(urls_all)
valid_urls = check_urls(unique_urls, fixed_string)
valid_urls = []
# 多线程获取可用url
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
futures = []
for original_url in unique_urls:
# 在原始URL后面添加固定字符串
new_url = original_url + fixed_string
# 提交任务,检查每个新构造的URL是否可访问
futures.append(executor.submit(is_url_accessible, new_url))
for future in concurrent.futures.as_completed(futures):
result = future.result()
if result:
# 如果URL可访问,将其添加到有效URL列表中
valid_urls.append(result)
# 将找到的有效URL添加到结果列表中
results.extend(valid_urls) results.extend(valid_urls)
# 打印所有有效的URL # 打印所有有效的URL
for url in results: for url in results:
print(url) print(url)
# 关闭会话对象
session.close()
# 遍历网址列表,获取JSON文件并解析 # 遍历网址列表,获取JSON文件并解析
for url in valid_urls: for url in valid_urls:
try: try: