Update 酒店源.py

This commit is contained in:
frxz751113
2024-08-29 20:53:21 +08:00
committed by GitHub
parent eec0ec4e4a
commit 1ef1726041
+25 -64
View File
@@ -1,42 +1,18 @@
#本程序只适用于酒店源的检测,请勿移植他用 import requests
import time import re
import concurrent.futures import concurrent.futures
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor
import requests
import re
import os
import threading
from queue import Queue
import queue
from datetime import datetime
import replace
import fileinput
from tqdm import tqdm
from pypinyin import lazy_pinyin
from opencc import OpenCC
import base64
import cv2
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from translate import Translator # 导入Translator类,用于文本翻译
# 扫源测绘空间地址
# 搜素关键词:"iptv/live/zh_cn.js" && country="CN" && region="Hunan" && city="changsha"
# 搜素关键词:"ZHGXTV" && country="CN" && region="Hunan" && city="changsha"
#"isShowLoginJs"智能KUTV管理
######################################################################################################################
######################################################################################################################
###########################################################ZHGX采集####################################################
######################################################################################################################
######################################################################################################################
urls = [ urls = [
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJndWFuZ2Rvbmci", # 广东 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJndWFuZ2Rvbmci", # 广东
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJIdW5hbiI%3D", # 湖南 "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJIdW5hbiI%3D", # 湖南
"https://fofa.info/result?qbase64=Ym9keT0i5pm65oWn5YWJ6L%2BFIg%3D%3D", # body="智慧光迅" "https://fofa.info/result?qbase64=Ym9keT0i5pm65oWn5YWJ6L%2BFIg%3D%3D", # body="智慧光迅"
"https://fofa.info/result?qbase64=c2VydmVyPSJuZ2lueCI%3D",#河南# "https://fofa.info/result?qbase64=c2VydmVyPSJuZ2lueCI%3D", # 河南
"https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D",#河北# "https://fofa.info/result?qbase64=IlpIR1hUViIgJiYgcmVnaW9uPSJoZWJlaSI%3D", # 河北
] ]
# 定义网址替换规则 # 定义网址替换规则
def modify_urls(url): def modify_urls(url):
modified_urls = [] modified_urls = []
@@ -51,16 +27,19 @@ def modify_urls(url):
modified_url = f"{base_url}{modified_ip}{port}{ip_end}" modified_url = f"{base_url}{modified_ip}{port}{ip_end}"
modified_urls.append(modified_url) modified_urls.append(modified_url)
return modified_urls return modified_urls
# 定义超时时间以及是否返回正确的状态码 # 定义超时时间以及是否返回正确的状态码
def is_url_accessible(url): def is_url_accessible(url):
try: try:
response = requests.get(url, timeout=1) #////////////////// response = requests.get(url, timeout=1)
#if response.status_code == 200:
if 200 <= response.status_code <= 401: if 200 <= response.status_code <= 401:
return url return url
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
pass pass
return None return None
results = [] results = []
for url in urls: for url in urls:
# 创建一个 Chrome WebDriver 实例 # 创建一个 Chrome WebDriver 实例
@@ -70,48 +49,32 @@ for url in urls:
chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=chrome_options) driver = webdriver.Chrome(options=chrome_options)
# 使用 WebDriver 访问网页 # 使用 WebDriver 访问网页
driver.get(url) # 将网址替换为你要访问的网页地址 driver.get(url)
time.sleep(10) time.sleep(10)
# 获取网页内容 # 获取网页内容
page_content = driver.page_source page_content = driver.page_source
# 关闭 WebDriver # 关闭 WebDriver
driver.quit() driver.quit()
# 查找所有符合指定格式的网址
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+"
# 查找所有符合指定格式的网址,使用正则表达式匹配页面内容中的URL urls_all = re.findall(pattern, page_content)
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888
urls_all = re.findall(pattern, page_content) # 使用findall方法查找所有匹配的URL
# 使用set去除重复的URL,得到唯一的URL列表
urls = set(urls_all) urls = set(urls_all)
# 初始化一个空列表,用于存储处理后的URL
x_urls = [] x_urls = []
# 遍历去重后的URL列表
for url in urls: for url in urls:
# 去除URL前后的空白字符
url = url.strip() url = url.strip()
# 找出URL中"http://"后面直到":"之间的部分(即IP地址部分)
ip_start_index = url.find("//") + 2 ip_start_index = url.find("//") + 2
ip_end_index = url.find(":", ip_start_index) ip_end_index = url.find(":", ip_start_index)
# 找出IP地址中各八位字节的位置
ip_dot_start = url.find(".") + 1 ip_dot_start = url.find(".") + 1
ip_dot_second = url.find(".", ip_dot_start) + 1 ip_dot_second = url.find(".", ip_dot_start) + 1
ip_dot_three = url.find(".", ip_dot_second) + 1 ip_dot_three = url.find(".", ip_dot_second) + 1
# 提取协议头部(http://或https://
base_url = url[:ip_start_index] base_url = url[:ip_start_index]
# 提取IP地址
ip_address = url[ip_start_index:ip_dot_three] ip_address = url[ip_start_index:ip_dot_three]
# 提取端口号
port = url[ip_end_index:] port = url[ip_end_index:]
# 将IP地址的最后一部分修改为"1",生成新的IP地址
ip_end = "1" ip_end = "1"
modified_ip = f"{ip_address[:-1]}{ip_end}" modified_ip = f"{ip_address}{ip_end}"
# 构造新的URL
x_url = f"{base_url}{modified_ip}{port}" x_url = f"{base_url}{modified_ip}{port}"
# 将新的URL添加到列表中
x_urls.append(x_url) x_urls.append(x_url)
# 再次使用set去除重复的URL,得到最终的唯一URL列表
urls = set(x_urls) urls = set(x_urls)
# 初始化一个空列表,用于存储验证后的可访问URL
valid_urls = [] valid_urls = []
# 多线程获取可用 url # 多线程获取可用 url
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
@@ -127,7 +90,7 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
valid_urls.append(result) valid_urls.append(result)
for url in valid_urls: for url in valid_urls:
print(url) print(url)
# 遍历网址列表,获取JSON文件并解析 # 遍历网址列表获取 JSON 文件并解析
for url in valid_urls: for url in valid_urls:
try: try:
json_url = f"{url}" json_url = f"{url}"
@@ -149,14 +112,12 @@ for url in valid_urls:
urld = (f"{urls[0]}//{urls[2]}/{urls[3]}") urld = (f"{urls[0]}//{urls[2]}/{urls[3]}")
else: else:
urld = (f"{urls[0]}//{urls[2]}") urld = (f"{urls[0]}//{urls[2]}")
print(f"{name},{urld}")
except:
pass
except:
pass
# 写入到文件中
with open('iptv.txt', 'a', encoding='utf-8') as outfile: with open('iptv.txt', 'a', encoding='utf-8') as outfile:
outfile.write(new_line + '\n') outfile.write(f"{name},{urld}\n")
except:
pass
except:
pass
print("频道列表文件 iptv.txt 获取完成!") print("频道列表文件 iptv.txt 获取完成!")