Update 收集.py

2024-08-26 21:24:17 +08:00
parent 43a60ac8bf
commit e7eb5c6408
1 changed files with 2 additions and 4 deletions
@@ -389,7 +389,6 @@ def parse_file(input_file_path, output_file_name):
    # 用于生成分类名的字母和数字计数器
    alphabet_counter = 0  # 字母计数器，从0开始
    number_counter = 1     # 数字计数器，从1开始
    # 读取原始文件内容
    with open(input_file_path, 'r', encoding='utf-8') as file:
        for line in file:
@@ -408,10 +407,9 @@ def parse_file(input_file_path, output_file_name):
                if ip_or_domain not in ip_or_domain_to_lines:
                    ip_or_domain_to_lines[ip_or_domain] = []
                ip_or_domain_to_lines[ip_or_domain].append(line)
-
+    # 过滤掉小于1000字节的IP或域名段
    # 过滤掉小于5000字节的IP或域名段
    filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
-                                      if sum(len(line) for line in lines) >= 2000}
+                                      if sum(len(line) for line in lines) >= 1000}
    # 如果没有满足条件的IP或域名段，则不生成文件
    if not filtered_ip_or_domain_to_lines: