Update IPTV搜索引擎采集.py
This commit is contained in:
+28
-32
@@ -199,53 +199,49 @@ for keyword in keywords:
|
|||||||
org == "China Mobile communications corporation"
|
org == "China Mobile communications corporation"
|
||||||
isp_en = "cmcc"
|
isp_en = "cmcc"
|
||||||
|
|
||||||
current_time = datetime.now()
|
|
||||||
|
|
||||||
|
current_time = datetime.datetime.now()
|
||||||
timeout_cnt = 0
|
timeout_cnt = 0
|
||||||
result_urls = set()
|
while True:
|
||||||
while len(result_urls) == 0 and timeout_cnt <= 5:
|
|
||||||
try:
|
try:
|
||||||
search_url = 'https://fofa.info/result?qbase64='
|
search_url = 'https://fofa.info/result?qbase64='
|
||||||
search_txt = f'\"udpxy\" && country=\"CN\" && region=\"{province}\" && org=\"{org}\"' # && org=\"{org}\"
|
search_txt = f'\"udpxy\" && country=\"CN\" && region=\"{province}\" && org=\"{org}\"'
|
||||||
# 将字符串编码为字节流
|
|
||||||
bytes_string = search_txt.encode('utf-8')
|
bytes_string = search_txt.encode('utf-8')
|
||||||
# 使用 base64 进行编码
|
|
||||||
search_txt = base64.b64encode(bytes_string).decode('utf-8')
|
search_txt = base64.b64encode(bytes_string).decode('utf-8')
|
||||||
search_url += search_txt
|
search_url += search_txt
|
||||||
print(f"{current_time} 查询运营商 : {province}{isp} ,查询网址 : {search_url}")
|
print(f"{current_time} 查询运营商 : {province}{isp},查询网址 : {search_url}")
|
||||||
response = requests.get(search_url, timeout=5)
|
response = requests.get(search_url, timeout=5)
|
||||||
# 处理响应
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
# 检查请求是否成功
|
|
||||||
html_content = response.text
|
html_content = response.text
|
||||||
# 使用BeautifulSoup解析网页内容
|
|
||||||
html_soup = BeautifulSoup(html_content, "html.parser")
|
html_soup = BeautifulSoup(html_content, "html.parser")
|
||||||
# print(f"{current_time} html_content:{html_content}")
|
|
||||||
# 查找所有符合指定格式的网址
|
|
||||||
# 设置匹配的格式,如http://8.8.8.8:8888
|
|
||||||
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+"
|
pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+"
|
||||||
urls_all = re.findall(pattern, html_content)
|
urls_all = re.findall(pattern, html_content)
|
||||||
# 去重得到唯一的URL列表
|
|
||||||
result_urls = set(urls_all)
|
result_urls = set(urls_all)
|
||||||
print(f"{current_time} result_urls:{result_urls}")
|
print(f"{current_time} result_urls:{result_urls}")
|
||||||
valid_ips = []
|
if result_urls:
|
||||||
# 遍历所有视频链接
|
valid_ips = []
|
||||||
for url in result_urls:
|
for url in result_urls:
|
||||||
video_url = url + "/rtp/" + mcast
|
video_url = url + "/rtp/" + mcast
|
||||||
# 用OpenCV读取视频
|
cap = cv2.VideoCapture(video_url)
|
||||||
cap = cv2.VideoCapture(video_url)
|
if not cap.isOpened():
|
||||||
# 检查视频是否成功打开
|
print(f"{current_time} {video_url} 无效")
|
||||||
if not cap.isOpened():
|
else:
|
||||||
print(f"{current_time} {video_url} 无效")
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||||
else:
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||||
# 读取视频的宽度和高度
|
print(f"{current_time} {video_url} 的分辨率为 {width}x{height}")
|
||||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
if width > 0 and height > 0:
|
||||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
valid_ips.append(url)
|
||||||
print(f"{current_time} {video_url} 的分辨率为 {width}x{height}")
|
|
||||||
# 检查分辨率是否大于0
|
|
||||||
if width > 0 and height > 0:
|
|
||||||
valid_ips.append(url)
|
|
||||||
# 关闭视频流
|
|
||||||
cap.release()
|
cap.release()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"{current_time} 出现错误:{e}")
|
||||||
|
if timeout_cnt > 5:
|
||||||
|
break
|
||||||
|
timeout_cnt += 1
|
||||||
|
|
||||||
|
|
||||||
if valid_ips:
|
if valid_ips:
|
||||||
#生成节目列表 省份运营商.txt
|
#生成节目列表 省份运营商.txt
|
||||||
|
|||||||
Reference in New Issue
Block a user