Update 测绘站采集.py
This commit is contained in:
+132
-1
@@ -28,7 +28,138 @@ import cv2
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
from translate import Translator # 导入Translator类,用于文本翻译
|
||||
帮我去掉使用代理,因为无法获取代理地址。
|
||||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
import random
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import cv2
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
import base64
|
||||
from fake_useragent import UserAgent # 需要先安装:pip install fake-useragent
|
||||
|
||||
# 创建输出目录
|
||||
os.makedirs('playlist', exist_ok=True)
|
||||
|
||||
# 配置参数
|
||||
DELAY_RANGE = (3, 6) # 随机延迟时间范围(秒)
|
||||
MAX_RETRIES = 3 # 最大重试次数
|
||||
REQUEST_TIMEOUT = 10 # 请求超时时间(秒)
|
||||
|
||||
def get_random_header():
|
||||
"""生成随机请求头"""
|
||||
return {
|
||||
'User-Agent': UserAgent().random,
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Referer': 'https://fofa.info/'
|
||||
}
|
||||
|
||||
def safe_request(url):
|
||||
"""带重试机制的请求函数"""
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
# 随机延迟防止被封
|
||||
time.sleep(random.uniform(*DELAY_RANGE))
|
||||
|
||||
response = requests.get(
|
||||
url,
|
||||
headers=get_random_header(),
|
||||
timeout=REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
# 检查HTTP状态码
|
||||
if response.status_code == 429:
|
||||
wait_time = 30 # 遇到反爬等待30秒
|
||||
print(f"遇到反爬机制,等待{wait_time}秒后重试")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
except Exception as e:
|
||||
print(f"请求失败(第{attempt+1}次重试): {str(e)}")
|
||||
if attempt == MAX_RETRIES - 1:
|
||||
raise
|
||||
|
||||
def validate_video(url, mcast):
|
||||
"""验证视频流有效性"""
|
||||
video_url = f"{url}/rtp/{mcast}"
|
||||
print(f"正在验证: {video_url}")
|
||||
|
||||
try:
|
||||
# 设置超时参数
|
||||
cap = cv2.VideoCapture(video_url, cv2.CAP_FFMPEG)
|
||||
cap.set(cv2.CAP_PROP_TIMEOUT, 5000) # 5秒超时
|
||||
|
||||
if cap.isOpened():
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
cap.release()
|
||||
return width > 0 and height > 0
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"视频验证异常: {str(e)}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
# 获取需要处理的文件列表
|
||||
files = [f.split('.')[0] for f in os.listdir('rtp') if f.endswith('.txt')]
|
||||
print(f"待处理频道列表: {files}")
|
||||
|
||||
for filename in files:
|
||||
province_isp = filename.split('_')
|
||||
if len(province_isp) != 2:
|
||||
continue
|
||||
|
||||
province, isp = province_isp
|
||||
print(f"\n正在处理: {province}{isp}")
|
||||
|
||||
# 读取组播地址
|
||||
try:
|
||||
with open(f'rtp/{filename}.txt', 'r', encoding='utf-8') as f:
|
||||
mcast = f.readline().split('rtp://')[1].split()[0].strip()
|
||||
except Exception as e:
|
||||
print(f"文件读取失败: {str(e)}")
|
||||
continue
|
||||
|
||||
# 构造搜索请求
|
||||
search_txt = f'"udpxy" && country="CN" && region="{province}"'
|
||||
encoded_query = base64.b64encode(search_txt.encode()).decode()
|
||||
search_url = f'https://fofa.info/result?qbase64={encoded_query}'
|
||||
|
||||
# 执行搜索
|
||||
try:
|
||||
html = safe_request(search_url)
|
||||
except Exception as e:
|
||||
print(f"搜索失败: {str(e)}")
|
||||
continue
|
||||
|
||||
# 解析搜索结果
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
pattern = re.compile(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+")
|
||||
found_urls = set(pattern.findall(html))
|
||||
print(f"找到{len(found_urls)}个有效地址")
|
||||
|
||||
# 验证地址有效性
|
||||
valid_urls = [url for url in found_urls if validate_video(url, mcast)]
|
||||
print(f"验证通过{len(valid_urls)}个有效地址")
|
||||
|
||||
# 生成播放列表
|
||||
if valid_urls:
|
||||
output_file = f'playlist/{province}{isp}.txt'
|
||||
with open(f'rtp/{filename}.txt', 'r') as src, open(output_file, 'w') as dst:
|
||||
original_content = src.read()
|
||||
for url in valid_urls:
|
||||
modified = original_content.replace('rtp://', f'{url}/rtp/')
|
||||
dst.write(modified + '\n')
|
||||
print(f"已生成播放列表: {output_file}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
print('对playlist文件夹里面的所有txt文件进行去重处理')
|
||||
def remove_duplicates_keep_order(folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
|
||||
Reference in New Issue
Block a user