Update 酒店源.py

This commit is contained in:
frxz751113
2024-08-30 02:44:53 +08:00
committed by GitHub
parent 9df7f73c7f
commit 3379d8db4f
+12 -21
View File
@@ -30,17 +30,18 @@ from translate import Translator # 导入Translator类,用于文本翻译
###########################################################ZHGX采集#################################################### ###########################################################ZHGX采集####################################################
###################################################################################################################### ######################################################################################################################
###################################################################################################################### ######################################################################################################################
import requests
urls = [ urls = [
#"https://fofa.info/result?qbase64=Ym9keT0i5pm65oWn5YWJ6L%2BFIg%3D%3D",#body="智慧光迅" "https://fofa.info/result?qbase64=IlpIR1hUViImJiBwb3J0PSIyMjIyMiI%3D",
"https://fofa.info/result?qbase64=IlpIR1hUViImJiBwb3J0PSIyMjIyMiI%3D",#河南22222 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D",
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D",#河北#
] ]
#定义网址替换规则
def modify_urls(url): def modify_urls(url):
modified_urls = [] modified_urls = []
ip_start_index = url.find("//") + 2 ip_start_index = url.find("//") + 2
ip_end_index = url.find(":", ip_start_index) ip_end_index = url.find(":", ip_start_index)
base_url = url[:ip_start_index] # http:// or https:// base_url = url[:ip_start_index]
ip_address = url[ip_start_index:ip_end_index] ip_address = url[ip_start_index:ip_end_index]
port = url[ip_end_index:] port = url[ip_end_index:]
ip_end = "/ZHGXTV/Public/json/live_interface.txt" ip_end = "/ZHGXTV/Public/json/live_interface.txt"
@@ -49,31 +50,21 @@ def modify_urls(url):
modified_url = f"{base_url}{modified_ip}{port}{ip_end}" modified_url = f"{base_url}{modified_ip}{port}{ip_end}"
modified_urls.append(modified_url) modified_urls.append(modified_url)
return modified_urls return modified_urls
#定义超时时间以及是否返回正确的状态码
def is_url_accessible(url): def is_url_accessible(url):
try: try:
response = requests.get(url, timeout=10) #////////////////// response = requests.get(url, timeout=10)
#if response.status_code == 200:
if 200 <= response.status_code <= 401: if 200 <= response.status_code <= 401:
return url return url
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
pass pass
return None return None
results = [] results = []
for url in urls: for url in urls:
# 创建一个Chrome WebDriver实例 response = requests.get(url)
chrome_options = Options() page_content = response.text
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=chrome_options)
# 使用WebDriver访问网页
driver.get(url) # 将网址替换为你要访问的网页地址
time.sleep(10)
# 获取网页内容
page_content = driver.page_source
# 关闭WebDriver
driver.quit()
# 查找所有符合指定格式的网址 # 查找所有符合指定格式的网址
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888 pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888
urls_all = re.findall(pattern, page_content) urls_all = re.findall(pattern, page_content)