From 33fb747fc657c4272889ec743e7d8a547c7a4e08 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Sat, 3 May 2025 09:36:22 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=B5=8B=E7=BB=98=E7=AB=99=E9=87=87?= =?UTF-8?q?=E9=9B=86.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/测绘站采集.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/py/测绘站采集.py b/py/测绘站采集.py index 6ef6f52..82d7090 100644 --- a/py/测绘站采集.py +++ b/py/测绘站采集.py @@ -28,7 +28,138 @@ import cv2 from bs4 import BeautifulSoup from urllib.parse import urlparse from translate import Translator # 导入Translator类,用于文本翻译 -帮我去掉使用代理,因为无法获取代理地址。 +# -*- coding: utf-8 -*- +import time +import random +import requests +import re +import os +import cv2 +from datetime import datetime +from bs4 import BeautifulSoup +import base64 +from fake_useragent import UserAgent # 需要先安装:pip install fake-useragent + +# 创建输出目录 +os.makedirs('playlist', exist_ok=True) + +# 配置参数 +DELAY_RANGE = (3, 6) # 随机延迟时间范围(秒) +MAX_RETRIES = 3 # 最大重试次数 +REQUEST_TIMEOUT = 10 # 请求超时时间(秒) + +def get_random_header(): + """生成随机请求头""" + return { + 'User-Agent': UserAgent().random, + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Referer': 'https://fofa.info/' + } + +def safe_request(url): + """带重试机制的请求函数""" + for attempt in range(MAX_RETRIES): + try: + # 随机延迟防止被封 + time.sleep(random.uniform(*DELAY_RANGE)) + + response = requests.get( + url, + headers=get_random_header(), + timeout=REQUEST_TIMEOUT + ) + + # 检查HTTP状态码 + if response.status_code == 429: + wait_time = 30 # 遇到反爬等待30秒 + print(f"遇到反爬机制,等待{wait_time}秒后重试") + time.sleep(wait_time) + continue + + response.raise_for_status() + return response.text + + except Exception as e: + print(f"请求失败(第{attempt+1}次重试): {str(e)}") + if attempt == MAX_RETRIES - 1: + raise + +def validate_video(url, mcast): + """验证视频流有效性""" + video_url = f"{url}/rtp/{mcast}" + print(f"正在验证: {video_url}") + + try: + # 设置超时参数 + cap = cv2.VideoCapture(video_url, cv2.CAP_FFMPEG) + cap.set(cv2.CAP_PROP_TIMEOUT, 5000) # 5秒超时 + + if cap.isOpened(): + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + return width > 0 and height > 0 + return False + + except Exception as e: + print(f"视频验证异常: {str(e)}") + return False + +def main(): + # 获取需要处理的文件列表 + files = [f.split('.')[0] for f in os.listdir('rtp') if f.endswith('.txt')] + print(f"待处理频道列表: {files}") + + for filename in files: + province_isp = filename.split('_') + if len(province_isp) != 2: + continue + + province, isp = province_isp + print(f"\n正在处理: {province}{isp}") + + # 读取组播地址 + try: + with open(f'rtp/{filename}.txt', 'r', encoding='utf-8') as f: + mcast = f.readline().split('rtp://')[1].split()[0].strip() + except Exception as e: + print(f"文件读取失败: {str(e)}") + continue + + # 构造搜索请求 + search_txt = f'"udpxy" && country="CN" && region="{province}"' + encoded_query = base64.b64encode(search_txt.encode()).decode() + search_url = f'https://fofa.info/result?qbase64={encoded_query}' + + # 执行搜索 + try: + html = safe_request(search_url) + except Exception as e: + print(f"搜索失败: {str(e)}") + continue + + # 解析搜索结果 + soup = BeautifulSoup(html, 'html.parser') + pattern = re.compile(r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+") + found_urls = set(pattern.findall(html)) + print(f"找到{len(found_urls)}个有效地址") + + # 验证地址有效性 + valid_urls = [url for url in found_urls if validate_video(url, mcast)] + print(f"验证通过{len(valid_urls)}个有效地址") + + # 生成播放列表 + if valid_urls: + output_file = f'playlist/{province}{isp}.txt' + with open(f'rtp/{filename}.txt', 'r') as src, open(output_file, 'w') as dst: + original_content = src.read() + for url in valid_urls: + modified = original_content.replace('rtp://', f'{url}/rtp/') + dst.write(modified + '\n') + print(f"已生成播放列表: {output_file}") + +if __name__ == '__main__': + main() print('对playlist文件夹里面的所有txt文件进行去重处理') def remove_duplicates_keep_order(folder_path): for filename in os.listdir(folder_path):