From 366e9f17bf7e8b49f863c976cd1f0898686d6999 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Fri, 1 Nov 2024 15:53:46 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=B5=8B=E7=BB=98=E7=AB=99=E9=87=87?= =?UTF-8?q?=E9=9B=86.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/测绘站采集.py | 120 +++++++++++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 52 deletions(-) diff --git a/py/测绘站采集.py b/py/测绘站采集.py index 0f1f651..d08022a 100644 --- a/py/测绘站采集.py +++ b/py/测绘站采集.py @@ -31,6 +31,35 @@ from bs4 import BeautifulSoup from urllib.parse import urlparse from translate import Translator # 导入Translator类,用于文本翻译 +###################################################################################################################### +# 获取rtp目录下的文件名,组播IP采集 +files = os.listdir('rtp') +files_name = [] +# 去除后缀名并保存至provinces_isps +for file in files: + name, extension = os.path.splitext(file) + files_name.append(name) +#忽略不符合要求的文件名 +provinces_isps = [name for name in files_name if name.count('_') == 1] +print(f"本次查询:{provinces_isps}的组播节目") +keywords = [] +for province_isp in provinces_isps: + # 读取文件并删除空白行 + try: + with open(f'rtp/{province_isp}.txt', 'r', encoding='utf-8') as file: + lines = file.readlines() + lines = [line.strip() for line in lines if line.strip()] + # 获取第二行中以包含 "rtp://" 的值作为 mcast + if lines: + first_line = lines[1] + if "rtp://" in first_line: + mcast = first_line.split("rtp://")[1].split(" ")[0] + keywords.append(province_isp + "_" + mcast) + except FileNotFoundError: + # 如果文件不存在,则捕获 FileNotFoundError 异常并打印提示信息 + print(f"文件 '{province_isp}.txt' 不存在. 跳过此文件.") +requested_urls = set() # 用于记录已经请求过的地址 +parse_count = {} # 用于记录每个 URL 的解析次数 ###################################################################################################################### # 获取rtp目录下的文件名,组播IP采集 files = os.listdir('rtp') @@ -62,8 +91,8 @@ requested_urls = set() # 用于记录已经请求过的地址 parse_count = {} # 用于记录每个 URL 的解析次数 for keyword in keywords: province, isp, mcast = keyword.split("_") - #将省份转成英文小写 - # 根据不同的 isp 设置不同的 org 值 + # 将省份转成英文小写 + # 根据不同的isp设置不同的org值 if province == "北京" and isp == "联通": isp_en = "cucc" org = "China Unicom Beijing Province NeTwork" @@ -76,18 +105,18 @@ for keyword in keywords: elif isp == "移动": org = "China Mobile communications corporation" isp_en = "cmcc" - + current_time = datetime.now() timeout_cnt = 0 - result_urls = set() + result_urls = set() should_continue_while = True - while should_continue_while and len(result_urls) == 0 and timeout_cnt <= 5: + while should_continue_while and len(result_urls) == 0 and timeout_cnt <= 2: try: search_url = 'https://fofa.info/result?qbase64=' - search_txt = f'\"udpxy\" && country=\"CN\" && region=\"{province}\"' # && org=\"{org}\" + search_txt = f'"udpxy" && country="CN" && region="{province}"' # && org="{org}" # 将字符串编码为字节流 bytes_string = search_txt.encode('utf-8') - # 使用 base64 进行编码 + # 使用base64进行编码 search_txt = base64.b64encode(bytes_string).decode('utf-8') search_url += search_txt if search_url not in requested_urls: # 仅当地址未被请求过时才进行请求 @@ -119,54 +148,41 @@ for keyword in keywords: except (requests.Timeout, requests.RequestException) as e: timeout_cnt += 1 print(f"{current_time} [{province}]搜索请求发生超时,异常次数:{timeout_cnt}") - if timeout_cnt <= 5: - # 退出循环迭代 + if timeout_cnt > 2: + print(f"{current_time} 搜索IPTV频道源[{province}{isp}],超时次数过多:{timeout_cnt} 次,停止处理") break - else: - print(f"{current_time} 搜索IPTV频道源[],超时次数过多:{timeout_cnt} 次,停止处理") + valid_ips = [] + # 遍历所有视频链接 + for url in result_urls: + video_url = url + "/rtp/" + mcast + # 用OpenCV读取视频 + cap = cv2.VideoCapture(video_url) + # 检查视频是否成功打开 + if not cap.isOpened(): + print(f"{current_time} {video_url} 无效") + else: + # 读取视频的宽度和高度 + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + print(f"{current_time} {video_url} 的分辨率为 {width}x{height}") + # 检查分辨率是否大于0 + if width > 0 and height > 0: + valid_ips.append(url) + # 关闭视频流 + cap.release() - valid_ips = [] - # 遍历所有视频链接 - for url in result_urls: - video_url = url + "/rtp/" + mcast - # 用OpenCV读取视频 - cap = cv2.VideoCapture(video_url) - # 检查视频是否成功打开 - if not cap.isOpened(): - print(f"{current_time} {video_url} 无效") - else: - # 读取视频的宽度和高度 - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - print(f"{current_time} {video_url} 的分辨率为 {width}x{height}") - # 检查分辨率是否大于0 - if width > 0 and height > 0: - valid_ips.append(url) - # 关闭视频流 - cap.release() - - if valid_ips: - #生成节目列表 省份运营商.txt - rtp_filename = f'rtp/{province}_{isp}.txt' - with open(rtp_filename, 'r', encoding='utf-8') as file: - data = file.read() - txt_filename = f'playlist/{province}{isp}.txt' - with open(txt_filename, 'a') as new_file: #以追加形式写入 - for url in valid_ips: - new_data = data.replace("rtp://", f"{url}/rtp/") - new_file.write(new_data) - print(f'已生成播放列表,保存至{txt_filename}') - except (requests.Timeout, requests.RequestException) as e: - timeout_cnt += 1 - print(f"{current_time} [{province}]搜索请求发生超时,异常次数:{timeout_cnt}") - if timeout_cnt <= 5: - # 退出循环迭代 - break - else: - print(f"{current_time} 搜索IPTV频道源[],超时次数过多:{timeout_cnt} 次,停止处理") -print('节目表制作完成! 文件输出在playlist文件夹!') - + if valid_ips: + # 生成节目列表 省份运营商.txt + rtp_filename = f'rtp/{province}_{isp}.txt' + txt_filename = f'playlist/{province}{isp}.txt' + with open(rtp_filename, 'r', encoding='utf-8') as file: + data = file.read() + with open(txt_filename, 'a') as new_file: # 以追加形式写入 + for url in valid_ips: + new_data = data.replace("rtp://", f"{url}/rtp/") + new_file.write(new_data) + print(f'已生成播放列表,保存至{txt_filename}') print('对playlist文件夹里面的所有txt文件进行去重处理') def remove_duplicates_keep_order(folder_path):