diff --git a/py/网络收集.py b/py/网络收集.py index c3d8903..baad814 100644 --- a/py/网络收集.py +++ b/py/网络收集.py @@ -103,8 +103,8 @@ def read_and_process_file(input_filename, output_filename, encodings=['utf-8', ' with open(output_filename, 'w', encoding='utf-8') as outfile: for line in lines: - if '#$' in line: - processed_line = line.split('#$')[0].rstrip('\n') + if '$' in line: + processed_line = line.split('$')[0].rstrip('\n') outfile.write(processed_line + '\n') else: outfile.write(line) @@ -349,7 +349,7 @@ def parse_file(input_file_path, output_file_name): ip_or_domain_to_lines[ip_or_domain].append(line) ############################################################################### 过滤掉小于1500字节的IP或域名段 filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items() - if sum(len(line) for line in lines) >= 1000} + if sum(len(line) for line in lines) >= 800} # 如果没有满足条件的IP或域名段,则不生成文件 if not filtered_ip_or_domain_to_lines: print("没有满足条件的IP或域名段,不生成文件。") @@ -557,7 +557,7 @@ def parse_file(input_file_path, output_file_name): ip_or_domain_to_lines[ip_or_domain].append(line) # 过滤掉小于1000字节的IP或域名段 filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items() - if sum(len(line) for line in lines) >= 800} # 过滤掉小于1000字节的IP或域名段 + if sum(len(line) for line in lines) >= 500} # 过滤掉小于1000字节的IP或域名段 # 如果没有满足条件的IP或域名段,则不生成文件 if not filtered_ip_or_domain_to_lines: print("没有满足条件的IP或域名段,不生成文件。")