Update iptv-cv2-check.py

2024-08-11 18:59:41 +08:00
parent 5c7eee42f9
commit 9f182b2b2f
1 changed files with 193 additions and 8 deletions
@@ -22,13 +22,199 @@ from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 from translate import Translator  # 导入Translator类，用于文本翻译
 #定义智慧桌面采集地址
 urls = [
    "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0i5Zub5bedIg%3D%3D",  #四川
 ]
 def modify_urls(url):
    modified_urls = []
    ip_start_index = url.find("//") + 2
    ip_end_index = url.find(":", ip_start_index)
    base_url = url[:ip_start_index]  # http:// or https://
    ip_address = url[ip_start_index:ip_end_index]
    port = url[ip_end_index:]
    ip_end = "/iptv/live/1000.json?key=txiptv"
    for i in range(1, 256):
        modified_ip = f"{ip_address[:-1]}{i}"
        modified_url = f"{base_url}{modified_ip}{port}{ip_end}"
        modified_urls.append(modified_url)
    return modified_urls
 def is_url_accessible(url):
    try:
        response = requests.get(url, timeout=3)          #//////////////////
        if response.status_code == 200:
            return url
    except requests.exceptions.RequestException:
        pass
    return None
 results = []
 for url in urls:
    # 创建一个Chrome WebDriver实例
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=chrome_options)
    # 使用WebDriver访问网页
    driver.get(url)  # 将网址替换为你要访问的网页地址
    time.sleep(10)
    # 获取网页内容
    page_content = driver.page_source
    # 关闭WebDriver
    driver.quit()
    # 查找所有符合指定格式的网址
    pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+"  # 设置匹配的格式，如http://8.8.8.8:8888
    urls_all = re.findall(pattern, page_content)
    # urls = list(set(urls_all))  # 去重得到唯一的URL列表
    urls = set(urls_all)  # 去重得到唯一的URL列表
    x_urls = []
    for url in urls:  # 对urls进行处理，ip第四位修改为1，并去重
        url = url.strip()
        ip_start_index = url.find("//") + 2
        ip_end_index = url.find(":", ip_start_index)
        ip_dot_start = url.find(".") + 1
        ip_dot_second = url.find(".", ip_dot_start) + 1
        ip_dot_three = url.find(".", ip_dot_second) + 1
        base_url = url[:ip_start_index]  # http:// or https://
        ip_address = url[ip_start_index:ip_dot_three]
        port = url[ip_end_index:]
        ip_end = "1"
        modified_ip = f"{ip_address}{ip_end}"
        x_url = f"{base_url}{modified_ip}{port}"
        x_urls.append(x_url)
    urls = set(x_urls)  # 去重得到唯一的URL列表
    valid_urls = []
    #   多线程获取可用url
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        futures = []
        for url in urls:
            url = url.strip()
            modified_urls = modify_urls(url)
            for modified_url in modified_urls:
                futures.append(executor.submit(is_url_accessible, modified_url))
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            if result:
                valid_urls.append(result)
    for url in valid_urls:
        print(url)
    # 遍历网址列表，获取JSON文件并解析
    for url in valid_urls:
        try:
            # 发送GET请求获取JSON文件，设置超时时间为0.5秒
            ip_start_index = url.find("//") + 2
            ip_dot_start = url.find(".") + 1
            ip_index_second = url.find("/", ip_dot_start)
            base_url = url[:ip_start_index]  # http:// or https://
            ip_address = url[ip_start_index:ip_index_second]
            url_x = f"{base_url}{ip_address}"
            json_url = f"{url}"
            response = requests.get(json_url, timeout=3)                        #///////////////
            json_data = response.json()
            try:
                # 解析JSON文件，获取name和url字段
                for item in json_data['data']:
                    if isinstance(item, dict):
                        name = item.get('name')
                        urlx = item.get('url')
                        if 'udp' in urlx or 'rtp' in urlx:
                            continue  # 跳过包含'udp'或'rtp'的行
                        if 'http' in urlx:
                            urld = f"{urlx}"
                        else:
                            urld = f"{url_x}{urlx}"
                        if name and urld:
                            name = name.replace("高清电影", "影迷电影")      
                            name = name.replace("（高清）", "")
                            name = name.replace("CHC动", "动")
                            name = name.replace("CHC家", "家")
                            name = name.replace("CHC影", "影")
                            name = name.replace("-", "")
                            name = name.replace(" ", "")
                            name = name.replace("PLUS", "+")
                            name = name.replace("＋", "+")
                            name = name.replace("CCTV1综合", "CCTV1")
                            name = name.replace("CCTV2财经", "CCTV2")
                            name = name.replace("CCTV3综艺", "CCTV3")
                            name = name.replace("CCTV4国际", "CCTV4")
                            name = name.replace("CCTV4中文国际", "CCTV4")
                            name = name.replace("CCTV4欧洲", "CCTV4")
                            name = name.replace("CCTV5体育", "CCTV5")
                            name = name.replace("CCTV5+体育", "CCTV5+")
                            name = name.replace("CCTV6电影", "CCTV6")
                            name = name.replace("CCTV7军事", "CCTV7")
                            name = name.replace("CCTV7军农", "CCTV7")
                            name = name.replace("CCTV7农业", "CCTV7")
                            name = name.replace("CCTV7国防军事", "CCTV7")
                            name = name.replace("CCTV8电视剧", "CCTV8")
                            name = name.replace("CCTV8纪录", "CCTV9")
                            name = name.replace("CCTV9记录", "CCTV9")
                            name = name.replace("CCTV9纪录", "CCTV9")
                            name = name.replace("CCTV10科教", "CCTV10")
                            name = name.replace("CCTV11戏曲", "CCTV11")
                            name = name.replace("CCTV12社会与法", "CCTV12")
                            name = name.replace("CCTV13新闻", "CCTV13")
                            name = name.replace("CCTV新闻", "CCTV13")
                            name = name.replace("CCTV14少儿", "CCTV14")
                            name = name.replace("央视14少儿", "CCTV14")
                            name = name.replace("CCTV少儿超", "CCTV14")
                            name = name.replace("CCTV15音乐", "CCTV15")
                            name = name.replace("CCTV音乐", "CCTV15")
                            name = name.replace("CCTV16奥林匹克", "CCTV16")
                            name = name.replace("CCTV17农业农村", "CCTV17")
                            name = name.replace("CCTV17军农", "CCTV17")
                            name = name.replace("CCTV17农业", "CCTV17")
                            name = name.replace("CCTV5+体育赛视", "CCTV5+")
                            name = name.replace("CCTV5+赛视", "CCTV5+")
                            name = name.replace("CCTV5+体育赛事", "CCTV5+")
                            name = name.replace("CCTV5+赛事", "CCTV5+")
                            name = name.replace("CCTV5+体育", "CCTV5+")
                            name = name.replace("CCTV5赛事", "CCTV5+")
                            results.append(f"{name},{urld}")
            except:
                continue
        except:
            continue
 channels = []
 for result in results:
    line = result.strip()
    if result:
        channel_name, channel_url = result.split(',')
        channels.append((channel_name, channel_url))
 with open("iptv.txt", 'w', encoding='utf-8') as file:           #打开文本以追加的形式写入行到ZHGX文件
    for result in results:
        file.write(result + "\n")
        print(result)
 print("频道列表文件iptv.txt追加写入成功！")
 ###################################################去除列表中的组播地址以及CCTV和卫视
 def filter_lines(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    filtered_lines = []
    for line in lines:
        #if ('hls' in line and 'm3u' in line) or ('tsfile' in line and 'm3u' in line):  #行中包含m3u的同时还要包含hls或者tsfile
        if ',' in line:  #仅提取
          if 'udp' not in line and 'rtp' not in line and 'AAAA' not in line and 'AAA' not in line:   #  排除组播地址及央卫
            filtered_lines.append(line)
    with open(output_file, 'w', encoding='utf-8') as output_file:
        output_file.writelines(filtered_lines)
 filter_lines("iptv.txt", "iptv.txt")
 print(f"文件已过滤完成")
 #定义智慧桌面采集地址
 urls = [
    #"https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgcG9ydD0iODA4Ig%3D%3D",  #808
    "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0i5Zub5bedIg%3D%3D",  #四川
    "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0iaGViZWki",#河北
    "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9Inl1bGluIg%3D%3D",    #玉林
    "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9Imd1aWdhbmci",  #贵港
@@ -261,11 +447,11 @@ for result in results:
    if result:
        channel_name, channel_url = result.split(',')
        channels.append((channel_name, channel_url))
-with open("iptv.txt", 'w', encoding='utf-8') as file:           #打开文本以追加的形式写入行到ZHGX文件
+with open("iptv1.txt", 'w', encoding='utf-8') as file:           #打开文本以追加的形式写入行到ZHGX文件
    for result in results:
        file.write(result + "\n")
        print(result)
-print("频道列表文件iptv.txt追加写入成功！")
+print("频道列表文件iptv1.txt追加写入成功！")
 ###################################################去除列表中的组播地址以及CCTV和卫视
@@ -276,13 +462,12 @@ def filter_lines(input_file, output_file):
    for line in lines:
        #if ('hls' in line and 'm3u' in line) or ('tsfile' in line and 'm3u' in line):  #行中包含m3u的同时还要包含hls或者tsfile
        if '凤凰' in line or '东森' in line or '天映' in line or '酒店' in line or '龙祥' in line or '美亚' in line or '好莱坞' in line or '珠江' in line or 'TVB' in line or '翡翠' in line \
-        or '明珠' in line or '广场舞' in line or '本港' in line or '动作' in line or '家庭' in line or '影迷' in line or '剧场' in line or '电影' in line or '影院' in line or '安徽卫视' in line or '辽宁卫视' in line \
+        or '明珠' in line or '广场舞' in line or '本港' in line or '动作' in line or '家庭' in line or '影迷' in line or '剧场' in line or '电影' in line or '影院' in line:  #仅提取港澳频道
        or 'CCTV1,' in line or 'CCTV3' in line or 'CCTV6' in line or 'CCTV8' in line or 'CCTV15' in line or '湖北卫视' in line or '湖南卫视' in line or '江苏卫视' in line or '北京卫视' in line:  #仅提取港澳频道
          if 'udp' not in line and 'rtp' not in line and 'AAAA' not in line and 'AAA' not in line:   #  排除组播地址及央卫
            filtered_lines.append(line)
-    with open(output_file, 'w', encoding='utf-8') as output_file:
+    with open(output_file, 'a', encoding='utf-8') as output_file:
        output_file.writelines(filtered_lines)
-filter_lines("iptv.txt", "iptv.txt")
+filter_lines("iptv1.txt", "iptv.txt")
 print(f"文件已过滤完成")
@@ -667,7 +852,7 @@ with open('酒店源.txt', 'w', encoding="utf-8") as file:
 #任务结束，删除不必要的过程文件
-files_to_remove = ['去重.txt', "2.txt", "iptv.txt", "e.txt", "a.txt", "b.txt", "检测结果.txt"]
+files_to_remove = ['去重.txt', "2.txt", "iptv.txt", "iptv1.txt", "e.txt", "a.txt", "b.txt", "检测结果.txt"]
 for file in files_to_remove:
    if os.path.exists(file):
        os.remove(file)