Create 源验证.py

2024-08-15 15:12:28 +08:00
parent bc92b7a812
commit 1131312ec0
1 changed files with 147 additions and 0 deletions
@@ -0,0 +1,147 @@
+import time
+import concurrent.futures
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from concurrent.futures import ThreadPoolExecutor
+import requests
+import re
+import os
+import threading
+from queue import Queue
+import queue
+from datetime import datetime
+import replace
+import fileinput
+from tqdm import tqdm
+from pypinyin import lazy_pinyin
+from opencc import OpenCC
+import base64
+import cv2
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+from translate import Translator
+
+url = "https://raw.githubusercontent.com/frxz751113/collect-tv-txt/main/merged_output.txt"          #源采集地址
+r = requests.get(url)
+open('源.txt','wb').write(r.content)   
+#################################################### 对整理好的频道列表测试HTTP连接
+def test_connectivity(url, max_attempts=1): #定义测试HTTP连接的次数
+    # 尝试连接指定次数    
+   for _ in range(max_attempts):  
+    try:
+        response = requests.head(url, timeout=3)  # 发送HEAD请求,仅支持V4,修改此行数字可定义链接超时##////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        #response = requests.get(url, timeout=1)  # 发送get请求,支持V6,修改此行数字可定义链接超时##############################//////////////////////////////////////////////////////////////////////////////////////
+        return response.status_code == 200  # 返回True如果状态码为200
+    except requests.RequestException:  # 捕获requests引发的异常
+        pass  # 发生异常时忽略
+   #return False  # 如果所有尝试都失败,返回False
+   pass   
+# 使用队列来收集结果的函数
+def process_line(line, result_queue):
+    parts = line.strip().split(",")  # 去除行首尾空白并按逗号分割
+    if len(parts) == 2 and parts[1]:  # 确保有URL,并且URL不为空
+        channel_name, channel_url = parts  # 分别赋值频道名称和URL
+        if test_connectivity(channel_url):  # 测试URL是否有效
+            result_queue.put((channel_name, channel_url, "有效"))  # 将结果放入队列
+        else:
+            result_queue.put((channel_name, channel_url, "无效"))  # 将结果放入队列
+    else:
+        # 格式不正确的行不放入队列
+        pass
+# 主函数
+def main(source_file_path, output_file_path):
+    with open(source_file_path, "r", encoding="utf-8") as source_file:  # 打开源文件
+        lines = source_file.readlines()  # 读取所有行s     
+    result_queue = queue.Queue()  # 创建队列
+    threads = []  # 初始化线程列表
+    for line in tqdm(lines, desc="检测进行中"):  # 显示进度条
+        thread = threading.Thread(target=process_line, args=(line, result_queue))  # 创建线程
+        thread.start()  # 启动线程
+        threads.append(thread)  # 将线程加入线程列表
+    for thread in threads:  # 等待所有线程完成
+        thread.join()
+    # 初始化计数器
+    valid_count = 0
+    invalid_count = 0
+    with open(output_file_path, "w", encoding="utf-8") as output_file:  # 打开输出文件
+        for _ in range(result_queue.qsize()):  # 使用队列的大小来循环
+            item = result_queue.get()  # 获取队列中的项目
+            # 只有在队列中存在有效的项目时才写入文件
+            if item[0] and item[1]:  # 确保channel_name和channel_url都不为None
+                output_file.write(f"{item[0]},{item[1]},{item[2]}\n")  # 写入文件
+                if item[2] == "有效":  # 统计有效源数量
+                    valid_count += 1
+                else:  # 统计无效源数量
+                    invalid_count += 1
+    print(f"任务完成, 有效源数量: {valid_count}, 无效源数量: {invalid_count}")  # 打印结果
+if __name__ == "__main__":
+    try:
+        source_file_path = "源.txt"  # 输入源文件路径
+        output_file_path = "源.txt"  # 设置输出文件路径
+        main(source_file_path, output_file_path)  # 调用main函数
+    except Exception as e:
+        print(f"程序发生错误: {e}")  # 打印错误信息
+# 初始化酒店源字典
+detected_ips = {}
+# 存储文件路径
+file_path = "源.txt"
+output_file_path = "优选.txt"
+def get_ip_key(url):
+    """从URL中提取IP地址,并构造一个唯一的键"""
+    # 找到'//'到第三个'.'之间的字符串
+    start = url.find('://') + 3  # '://'.length 是 3
+    end = start
+    dot_count = 0
+    while dot_count < 3:
+        end = url.find('.', end)
+        if end == -1:  # 如果没有找到第三个'.',就结束
+            break
+        dot_count += 1
+    return url[start:end] if dot_count == 3 else None
+# 打开输入文件和输出文件
+with open(file_path, 'r', encoding='utf-8') as file:
+    lines = file.readlines()
+# 获取总行数用于进度条
+total_lines = len(lines)
+# 写入通过检测的行到新文件
+with open(output_file_path, 'w', encoding='utf-8') as output_file:
+    # 使用tqdm显示进度条
+    for i, line in tqdm(enumerate(lines), total=total_lines, desc="Processing", unit='line'):
+        # 检查是否包含 'genre'
+        if 'genre' in line:
+            output_file.write(line)
+            continue
+        # 分割频道名称和URL,并去除空白字符
+        parts = line.split(',', 1)
+        if len(parts) == 2:
+            channel_name, url = parts
+            channel_name = channel_name.strip()
+            url = url.strip()
+            # 构造IP键
+            ip_key = get_ip_key(url)
+            if ip_key and ip_key in detected_ips:
+                # 如果IP键已存在,根据之前的结果决定是否写入新文件
+                if detected_ips[ip_key]['status'] == 'ok':
+                    output_file.write(line)
+            elif ip_key:  # 新IP键,进行检测
+                # 进行检测
+                cap = cv2.VideoCapture(url)
+                start_time = time.time()
+                frame_count = 0
+                # 尝试捕获5秒内的帧
+                while frame_count < 66 and (time.time() - start_time) < 3:#//////////////////////////////////////////////////////////////////////////////////////###########
+                    ret, frame = cap.read()
+                    if not ret:
+                        break
+                    frame_count += 1
+                # 释放资源
+                cap.release()
+                # 根据捕获的帧数判断状态并记录结果#////////////////////////////////////////////////////////////////////////////////////////////////////////////////###########
+                if frame_count >= 66:  #5秒内超过100帧则写入#/////////////////////////////////////////////////////////////////////////////////////////////////////###########
+                    detected_ips[ip_key] = {'status': 'ok'}
+                    output_file.write(line)  # 写入检测通过的行
+                else:
+                    detected_ips[ip_key] = {'status': 'fail'}
+# 打印酒店源
+for ip_key, result in detected_ips.items():
+    print(f"IP Key: {ip_key}, Status: {result['status']}")