Update 网络收集.py
This commit is contained in:
+4
-4
@@ -103,8 +103,8 @@ def read_and_process_file(input_filename, output_filename, encodings=['utf-8', '
|
|||||||
|
|
||||||
with open(output_filename, 'w', encoding='utf-8') as outfile:
|
with open(output_filename, 'w', encoding='utf-8') as outfile:
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if '#$' in line:
|
if '$' in line:
|
||||||
processed_line = line.split('#$')[0].rstrip('\n')
|
processed_line = line.split('$')[0].rstrip('\n')
|
||||||
outfile.write(processed_line + '\n')
|
outfile.write(processed_line + '\n')
|
||||||
else:
|
else:
|
||||||
outfile.write(line)
|
outfile.write(line)
|
||||||
@@ -349,7 +349,7 @@ def parse_file(input_file_path, output_file_name):
|
|||||||
ip_or_domain_to_lines[ip_or_domain].append(line)
|
ip_or_domain_to_lines[ip_or_domain].append(line)
|
||||||
############################################################################### 过滤掉小于1500字节的IP或域名段
|
############################################################################### 过滤掉小于1500字节的IP或域名段
|
||||||
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
|
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
|
||||||
if sum(len(line) for line in lines) >= 1000}
|
if sum(len(line) for line in lines) >= 800}
|
||||||
# 如果没有满足条件的IP或域名段,则不生成文件
|
# 如果没有满足条件的IP或域名段,则不生成文件
|
||||||
if not filtered_ip_or_domain_to_lines:
|
if not filtered_ip_or_domain_to_lines:
|
||||||
print("没有满足条件的IP或域名段,不生成文件。")
|
print("没有满足条件的IP或域名段,不生成文件。")
|
||||||
@@ -557,7 +557,7 @@ def parse_file(input_file_path, output_file_name):
|
|||||||
ip_or_domain_to_lines[ip_or_domain].append(line)
|
ip_or_domain_to_lines[ip_or_domain].append(line)
|
||||||
# 过滤掉小于1000字节的IP或域名段
|
# 过滤掉小于1000字节的IP或域名段
|
||||||
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
|
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
|
||||||
if sum(len(line) for line in lines) >= 800} # 过滤掉小于1000字节的IP或域名段
|
if sum(len(line) for line in lines) >= 500} # 过滤掉小于1000字节的IP或域名段
|
||||||
# 如果没有满足条件的IP或域名段,则不生成文件
|
# 如果没有满足条件的IP或域名段,则不生成文件
|
||||||
if not filtered_ip_or_domain_to_lines:
|
if not filtered_ip_or_domain_to_lines:
|
||||||
print("没有满足条件的IP或域名段,不生成文件。")
|
print("没有满足条件的IP或域名段,不生成文件。")
|
||||||
|
|||||||
Reference in New Issue
Block a user