Update IPTV搜索引擎采集.py

2024-09-19 21:03:35 +08:00
parent 80834d6e32
commit ee73445618
1 changed files with 28 additions and 32 deletions
@@ -199,53 +199,49 @@ for keyword in keywords:
        org == "China Mobile communications corporation"
        isp_en = "cmcc"
-    current_time = datetime.now()
+
    current_time = datetime.datetime.now()
    timeout_cnt = 0
-    result_urls = set() 
+    while True:
    while len(result_urls) == 0 and timeout_cnt <= 5:
        try:
            search_url = 'https://fofa.info/result?qbase64='
-            search_txt = f'\"udpxy\" && country=\"CN\" && region=\"{province}\" && org=\"{org}\"'  # && org=\"{org}\"
+            search_txt = f'\"udpxy\" && country=\"CN\" && region=\"{province}\" && org=\"{org}\"'
                # 将字符串编码为字节流
            bytes_string = search_txt.encode('utf-8')
                # 使用 base64 进行编码
            search_txt = base64.b64encode(bytes_string).decode('utf-8')
            search_url += search_txt
-            print(f"{current_time} 查询运营商 : {province}{isp} ,查询网址 : {search_url}")
+            print(f"{current_time} 查询运营商 : {province}{isp},查询网址 : {search_url}")
            response = requests.get(search_url, timeout=5)
            # 处理响应
            response.raise_for_status()
            # 检查请求是否成功
            html_content = response.text
            # 使用BeautifulSoup解析网页内容
            html_soup = BeautifulSoup(html_content, "html.parser")
            # print(f"{current_time} html_content:{html_content}")
            # 查找所有符合指定格式的网址
            # 设置匹配的格式,如http://8.8.8.8:8888
            pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+"
            urls_all = re.findall(pattern, html_content)
            # 去重得到唯一的URL列表
            result_urls = set(urls_all)
            print(f"{current_time} result_urls:{result_urls}")
-            valid_ips = []
+            if result_urls:
-            # 遍历所有视频链接
+                valid_ips = []
-            for url in result_urls:
+                for url in result_urls:
-                video_url = url + "/rtp/" + mcast
+                    video_url = url + "/rtp/" + mcast
-                # 用OpenCV读取视频
+                    cap = cv2.VideoCapture(video_url)
-                cap = cv2.VideoCapture(video_url)
+                    if not cap.isOpened():
-                # 检查视频是否成功打开
+                        print(f"{current_time} {video_url} 无效")
-                if not cap.isOpened():
+                    else:
-                    print(f"{current_time} {video_url} 无效")
+                        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-                else:
+                        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-                    # 读取视频的宽度和高度
+                        print(f"{current_time} {video_url} 的分辨率为 {width}x{height}")
-                    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                        if width > 0 and height > 0:
-                    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                            valid_ips.append(url)
                    print(f"{current_time} {video_url} 的分辨率为 {width}x{height}")
                    # 检查分辨率是否大于0
                    if width > 0 and height > 0:
                        valid_ips.append(url)
                    # 关闭视频流
                    cap.release()
                break
            else:
                break
        except Exception as e:
            print(f"{current_time} 出现错误：{e}")
            if timeout_cnt > 5:
                break
            timeout_cnt += 1
            if valid_ips:
                #生成节目列表 省份运营商.txt