From 82bfc629c1338429fefda1e4bbe13298a2fc3b99 Mon Sep 17 00:00:00 2001 From: frxz751113 <156018267+frxz751113@users.noreply.github.com> Date: Sun, 18 Aug 2024 01:37:19 +0800 Subject: [PATCH] =?UTF-8?q?Update=20iptv=E6=B5=81=E7=95=85=E5=BA=A6?= =?UTF-8?q?=E6=A3=80=E6=B5=8B.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- py/iptv流畅度检测.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/py/iptv流畅度检测.py b/py/iptv流畅度检测.py index f161b7d..aca0458 100644 --- a/py/iptv流畅度检测.py +++ b/py/iptv流畅度检测.py @@ -661,36 +661,6 @@ with open('综合源.txt', 'a', encoding="utf-8") as file: for line in unique_lines: file.write(line + '\n') # 确保每行后面有换行符 -##############################################################对生成的文件进行网址及文本去重复,避免同一个频道出现在不同的类中 -def remove_duplicates(input_file, output_file): - # 用于存储已经遇到的URL和包含genre的行 - seen_urls = set() - seen_lines_with_genre = set() - # 用于存储最终输出的行 - output_lines = [] - # 打开输入文件并读取所有行 - with open(input_file, 'r', encoding='utf-8') as f: - lines = f.readlines() - print("去重前的行数:", len(lines)) - # 遍历每一行 - for line in lines: - # 使用正则表达式查找URL和包含genre的行,默认最后一行 - urls = re.findall(r'://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line) - genre_line = re.search(r'\bgenre\b', line, re.IGNORECASE) is not None - # 如果找到URL并且该URL尚未被记录 - if urls and urls[0] not in seen_urls: - seen_urls.add(urls[0]) - output_lines.append(line) - # 如果找到包含genre的行,无论是否已被记录,都写入新文件 - if genre_line: - output_lines.append(line) - # 将结果写入输出文件 - with open(output_file, 'w', encoding='utf-8') as f: - f.writelines(output_lines) - print("去重后的行数:", len(output_lines)) -# 使用方法 -remove_duplicates('综合源.txt', '综合源.txt') - ################################################################################################任务结束,删除不必要的过程文件