Update 测绘站采集.py
This commit is contained in:
+64
-2
@@ -29,6 +29,7 @@ from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
from translate import Translator # 导入Translator类,用于文本翻译
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
import random
|
||||
import requests
|
||||
@@ -47,6 +48,7 @@ os.makedirs('playlist', exist_ok=True)
|
||||
DELAY_RANGE = (3, 6) # 随机延迟时间范围(秒)
|
||||
MAX_RETRIES = 3 # 最大重试次数
|
||||
REQUEST_TIMEOUT = 10 # 请求超时时间(秒)
|
||||
PROXY_REFRESH_INTERVAL = 300 # 代理刷新间隔(秒)
|
||||
|
||||
def get_random_header():
|
||||
"""生成随机请求头"""
|
||||
@@ -56,16 +58,76 @@ def get_random_header():
|
||||
'Referer': 'https://fofa.info/'
|
||||
}
|
||||
|
||||
# 代理池相关变量
|
||||
proxies = []
|
||||
last_refresh_time = 0
|
||||
|
||||
def scrape_proxies(url):
|
||||
"""从指定URL抓取代理列表"""
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# 找到代理表格
|
||||
table = soup.find('table', {'id': 'proxylisttable'})
|
||||
if not table:
|
||||
return []
|
||||
|
||||
# 解析表格中的代理数据
|
||||
proxies = []
|
||||
for row in table.find_all('tr')[1:]: # 跳过表头
|
||||
cols = row.find_all('td')
|
||||
if len(cols) < 8:
|
||||
continue
|
||||
|
||||
ip = cols[0].text.strip()
|
||||
port = cols[1].text.strip()
|
||||
protocol = 'https' if cols[6].text.strip() == 'yes' else 'http'
|
||||
|
||||
proxies.append(f"{protocol}://{ip}:{port}")
|
||||
|
||||
return proxies
|
||||
|
||||
except Exception as e:
|
||||
print(f"抓取代理失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_proxies():
|
||||
"""获取代理列表,定期刷新"""
|
||||
global proxies, last_refresh_time
|
||||
current_time = time.time()
|
||||
|
||||
# 如果代理列表为空或超过刷新间隔,重新抓取代理
|
||||
if not proxies or current_time - last_refresh_time > PROXY_REFRESH_INTERVAL:
|
||||
print("正在刷新代理列表...")
|
||||
proxy_url = "https://free-proxy-list.net/"
|
||||
proxies = scrape_proxies(proxy_url)
|
||||
last_refresh_time = current_time
|
||||
|
||||
if not proxies:
|
||||
raise Exception("无法获取代理")
|
||||
|
||||
return proxies
|
||||
|
||||
def safe_request(url):
|
||||
"""带重试机制的请求函数"""
|
||||
"""带重试机制和代理的请求函数"""
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
# 随机延迟防止被封
|
||||
time.sleep(random.uniform(*DELAY_RANGE))
|
||||
|
||||
# 获取代理列表
|
||||
proxy_list = get_proxies()
|
||||
|
||||
# 随机选择一个代理
|
||||
proxy = random.choice(proxy_list)
|
||||
print(f"使用代理: {proxy}")
|
||||
|
||||
response = requests.get(
|
||||
url,
|
||||
headers=get_random_header(),
|
||||
proxies={"http": proxy, "https": proxy},
|
||||
timeout=REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
@@ -127,7 +189,7 @@ def main():
|
||||
continue
|
||||
|
||||
# 构造搜索请求
|
||||
search_txt = f'"udpxy" && country="CN" && region="{province}'
|
||||
search_txt = f'"udpxy" && country="CN" && region="{province}"'
|
||||
encoded_query = base64.b64encode(search_txt.encode()).decode()
|
||||
search_url = f'https://fofa.info/result?qbase64={encoded_query}'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user