From 7825d10cb487b8deba09603ea7b6a2f2e3f181d2 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Mon, 26 Aug 2024 21:07:26 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E6=94=B6=E9=9B=86.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/收集.py | 71 ++---------------------------------------------------- 1 file changed, 2 insertions(+), 69 deletions(-) diff --git a/py/收集.py b/py/收集.py index b00fef2..7b900b9 100644 --- a/py/收集.py +++ b/py/收集.py @@ -262,7 +262,7 @@ import re import os # 定义一个包含所有要排除的关键词的列表 excluded_keywords = [ - 'epg', 'mitv', 'udp', 'rtp', 'tsfile', 'hls', '[', 'P2p', 'p2p', 'p3p', 'P2P', 'P3p', 'P3P', '腔', '曲', '春节' + 'epg', 'mitv', 'udp', 'rtp', '[', 'P2p', 'p2p', 'p3p', 'P2P', 'P3p', 'P3P', '腔', '曲', '春节' ] # 定义一个包含所有要提取的关键词的列表 @@ -339,7 +339,7 @@ with open(output_file_path, 'w', encoding='utf-8') as output_file: start_time = time.time() frame_count = 0 # 尝试捕获10秒内的帧 - while frame_count < 30 and (time.time() - start_time) < 5: + while frame_count < 30 and (time.time() - start_time) < 2: ret, frame = cap.read() if not ret: break @@ -356,73 +356,6 @@ with open(output_file_path, 'w', encoding='utf-8') as output_file: for ip_key, result in detected_ips.items(): print(f"IP Key: {ip_key}, Status: {result['status']}") -######################################################################### -# 函数:获取视频分辨率 -def get_video_resolution(video_path, timeout=0.8): - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - return None - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - cap.release() - return (width, height) -# 函数:处理每一行 -def process_line(line, output_file, order_list, valid_count, invalid_count, total_lines): - parts = line.strip().split(',') - if '#genre#' in line: - # 如果行包含 '#genre#',直接写入新文件 - with threading.Lock(): - output_file.write(line) - print(f"已写入genre行:{line.strip()}") - elif len(parts) == 2: - channel_name, channel_url = parts - resolution = get_video_resolution(channel_url, timeout=8) - if resolution and resolution[1] >= 720: # 检查分辨率是否大于等于720p - with threading.Lock(): - output_file.write(f"{channel_name}[{resolution[1]}p],{channel_url}\n") - order_list.append((channel_name, resolution[1], channel_url)) - valid_count[0] += 1 - print(f"Channel '{channel_name}' accepted with resolution {resolution[1]}p at URL {channel_url}.") - else: - invalid_count[0] += 1 - with threading.Lock(): - print(f"有效: {valid_count[0]}, 无效: {invalid_count[0]}, 总数: {total_lines}, 进度: {(valid_count[0] + invalid_count[0]) / total_lines * 100:.2f}%") -# 函数:多线程工作 -def worker(task_queue, output_file, order_list, valid_count, invalid_count, total_lines): - while True: - try: - line = task_queue.get(timeout=1) - process_line(line, output_file, order_list, valid_count, invalid_count, total_lines) - except Queue.Empty: - break - finally: - task_queue.task_done() -# 主函数 -def main(source_file_path, output_file_path): - order_list = [] - valid_count = [0] - invalid_count = [0] - task_queue = Queue() - # 读取源文件 - with open(source_file_path, 'r', encoding='utf-8') as source_file: - lines = source_file.readlines() - with open(output_file_path + '.txt', 'w', encoding='utf-8') as output_file: - # 创建线程池 - with ThreadPoolExecutor(max_workers=64) as executor: - # 创建并启动工作线程 - for _ in range(64): - executor.submit(worker, task_queue, output_file, order_list, valid_count, invalid_count, len(lines)) - # 将所有行放入队列 - for line in lines: - task_queue.put(line) - # 等待队列中的所有任务完成 - task_queue.join() - print(f"任务完成,有效频道数:{valid_count[0]}, 无效频道数:{invalid_count[0]}, 总频道数:{len(lines)}") -if __name__ == "__main__": - source_file_path = '网络收集.txt' # 替换为你的源文件路径 - output_file_path = '网络收集' # 替换为你的输出文件路径,不要后缀名 - main(source_file_path, output_file_path) -