Update 网络收集.py

This commit is contained in:
frxz751113
2024-08-27 19:23:42 +08:00
committed by GitHub
parent 31770e4b65
commit 2a9a77cb25
+4 -4
View File
@@ -347,7 +347,7 @@ def parse_file(input_file_path, output_file_name):
ip_or_domain_to_lines[ip_or_domain].append(line) ip_or_domain_to_lines[ip_or_domain].append(line)
############################################################################### 过滤掉小于1500字节的IP或域名段 ############################################################################### 过滤掉小于1500字节的IP或域名段
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items() filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
if sum(len(line) for line in lines) >= 200} if sum(len(line) for line in lines) >= 1000}
# 如果没有满足条件的IP或域名段,则不生成文件 # 如果没有满足条件的IP或域名段,则不生成文件
if not filtered_ip_or_domain_to_lines: if not filtered_ip_or_domain_to_lines:
print("没有满足条件的IP或域名段,不生成文件。") print("没有满足条件的IP或域名段,不生成文件。")
@@ -411,7 +411,7 @@ with open(output_file_path, 'w', encoding='utf-8') as output_file:
start_time = time.time() start_time = time.time()
frame_count = 0 frame_count = 0
# 尝试捕获10秒内的帧 # 尝试捕获10秒内的帧
while frame_count < 10 and (time.time() - start_time) < 5: while frame_count < 10 and (time.time() - start_time) < 3:
ret, frame = cap.read() ret, frame = cap.read()
if not ret: if not ret:
break break
@@ -439,7 +439,7 @@ from urllib.parse import urlparse
from tqdm import tqdm from tqdm import tqdm
# 测试HTTP连接并尝试下载数据 # 测试HTTP连接并尝试下载数据
def test_connectivity_and_download(url, initial_timeout=3, retry_timeout=5): def test_connectivity_and_download(url, initial_timeout=2, retry_timeout=3):
parsed_url = urlparse(url) parsed_url = urlparse(url)
if parsed_url.scheme not in ['http', 'https']: if parsed_url.scheme not in ['http', 'https']:
# 非HTTP(s)协议,尝试RTSP检测 # 非HTTP(s)协议,尝试RTSP检测
@@ -555,7 +555,7 @@ def parse_file(input_file_path, output_file_name):
ip_or_domain_to_lines[ip_or_domain].append(line) ip_or_domain_to_lines[ip_or_domain].append(line)
# 过滤掉小于1000字节的IP或域名段 # 过滤掉小于1000字节的IP或域名段
filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items() filtered_ip_or_domain_to_lines = {ip_or_domain: lines for ip_or_domain, lines in ip_or_domain_to_lines.items()
if sum(len(line) for line in lines) >= 200} # 过滤掉小于1000字节的IP或域名段 if sum(len(line) for line in lines) >= 800} # 过滤掉小于1000字节的IP或域名段
# 如果没有满足条件的IP或域名段,则不生成文件 # 如果没有满足条件的IP或域名段,则不生成文件
if not filtered_ip_or_domain_to_lines: if not filtered_ip_or_domain_to_lines:
print("没有满足条件的IP或域名段,不生成文件。") print("没有满足条件的IP或域名段,不生成文件。")