diff --git a/py/酒店源.py b/py/酒店源.py index a91db93..92283b9 100644 --- a/py/酒店源.py +++ b/py/酒店源.py @@ -292,69 +292,67 @@ urls = [ "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9Imd1aWdhbmci", #贵港 "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHBvcnQ9IjgxODEii",#8181 ] -def modify_urls(url): - modified_urls = [] - ip_start_index = url.find("//") + 2 - ip_end_index = url.find(":", ip_start_index) - base_url = url[:ip_start_index] - ip_address = url[ip_start_index:ip_end_index] - port = url[ip_end_index:] - ip_end = "/iptv/live/1000.json?key=txiptv" - for i in range(1, 256): - modified_ip = f"{ip_address[:-1]}{i}" - modified_url = f"{base_url}{modified_ip}{port}{ip_end}" - modified_urls.append(modified_url) - return modified_urls - def is_url_accessible(url): try: + # 发送 GET 请求,设置超时时间为 3 秒 response = requests.get(url, timeout=3) + # 如果响应状态码在 200 到 401 之间(包括 200 和 401),则认为 URL 可访问 if 200 <= response.status_code <= 401: return url except requests.exceptions.RequestException: + # 如果请求过程中出现异常,不做任何处理,直接跳过 pass return None +def get_content(url): + try: + response = requests.get(url, timeout=3) + response.raise_for_status() + # 确保内容类型为HTML或文本 + if 'text/html' in response.headers.get('Content-Type', ''): + return response.text + except requests.RequestException as e: + print(f"请求错误: {e}") + return None + +# 创建一个空列表用于存储结果 results = [] for url in urls: + # 发送 GET 请求获取 URL 的内容 response = requests.get(url) + # 获取响应的文本内容 + page_content = response.text + +def is_valid_url(url): + try: + response = requests.get(url, timeout=5) + if response.status_code == 200: + return True + else: + return False + except requests.RequestException: + return False + +for url in urls: + # 发送 GET 请求获取 URL 的内容 + response = requests.get(url) + # 获取响应的文本内容 page_content = response.text # 查找所有符合指定格式的网址 - pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888 + # 匹配纯域名,可能带有http://或https://前缀,但不包含端口 + # 匹配纯数字IP地址,后面跟着端口号 + pattern = r"(https?://[\w-]+(?:\.[\w-]+)*(?::\d+)?|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?::\d+)?)" + # 使用正则表达式在页面内容中查找所有符合格式的 URL urls_all = re.findall(pattern, page_content) - # urls = list(set(urls_all)) # 去重得到唯一的URL列表 - urls = set(urls_all) # 去重得到唯一的URL列表 - x_urls = [] - for url in urls: # 对urls进行处理,ip第四位修改为1,并去重 - url = url.strip() - ip_start_index = url.find("//") + 2 - ip_end_index = url.find(":", ip_start_index) - ip_dot_start = url.find(".") + 1 - ip_dot_second = url.find(".", ip_dot_start) + 1 - ip_dot_three = url.find(".", ip_dot_second) + 1 - base_url = url[:ip_start_index] # http:// or https:// - ip_address = url[ip_start_index:ip_dot_three] - port = url[ip_end_index:] - ip_end = "1" - modified_ip = f"{ip_address}{ip_end}" - x_url = f"{base_url}{modified_ip}{port}" - x_urls.append(x_url) - urls = set(x_urls) # 去重得到唯一的URL列表 - valid_urls = [] - # 多线程获取可用url - with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: - futures = [] - for url in urls: - url = url.strip() - modified_urls = modify_urls(url) - for modified_url in modified_urls: - futures.append(executor.submit(is_url_accessible, modified_url)) - for future in concurrent.futures.as_completed(futures): - result = future.result() - if result: - valid_urls.append(result) - for url in valid_urls: - print(url) + # 去重得到唯一的URL列表 + unique_urls = set(urls_all) + # 生成新的 URL 列表 + new_urls = [url + "/iptv/live/1000.json?key=txiptv" for url in unique_urls] + # 检测新 URL 列表的有效性 + valid_urls = [new_url for new_url in new_urls if is_valid_url(new_url)] + # 打印有效的 URL 列表 + print(valid_urls) + # 遍历网址列表,获取JSON文件并解析 for url in valid_urls: