Update 测绘站采集.py

2025-05-03 00:50:50 +08:00
parent 97a6b4b9f6
commit 96c3a848e7
1 changed files with 71 additions and 9 deletions
@@ -29,6 +29,7 @@ from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 from translate import Translator  # 导入Translator类,用于文本翻译
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import time
 import random
 import requests
@@ -47,6 +48,7 @@ os.makedirs('playlist', exist_ok=True)
 DELAY_RANGE = (3, 6)     # 随机延迟时间范围（秒）
 MAX_RETRIES = 3          # 最大重试次数
 REQUEST_TIMEOUT = 10     # 请求超时时间（秒）
 PROXY_REFRESH_INTERVAL = 300  # 代理刷新间隔（秒）
 def get_random_header():
    """生成随机请求头"""
@@ -56,16 +58,76 @@ def get_random_header():
        'Referer': 'https://fofa.info/'
    }
 # 代理池相关变量
 proxies = []
 last_refresh_time = 0
 def scrape_proxies(url):
    """从指定URL抓取代理列表"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        # 找到代理表格
        table = soup.find('table', {'id': 'proxylisttable'})
        if not table:
            return []
        # 解析表格中的代理数据
        proxies = []
        for row in table.find_all('tr')[1:]:  # 跳过表头
            cols = row.find_all('td')
            if len(cols) < 8:
                continue
            ip = cols[0].text.strip()
            port = cols[1].text.strip()
            protocol = 'https' if cols[6].text.strip() == 'yes' else 'http'
            proxies.append(f"{protocol}://{ip}:{port}")
        return proxies
    except Exception as e:
        print(f"抓取代理失败: {str(e)}")
        return []
 def get_proxies():
    """获取代理列表，定期刷新"""
    global proxies, last_refresh_time
    current_time = time.time()
    # 如果代理列表为空或超过刷新间隔，重新抓取代理
    if not proxies or current_time - last_refresh_time > PROXY_REFRESH_INTERVAL:
        print("正在刷新代理列表...")
        proxy_url = "https://free-proxy-list.net/"
        proxies = scrape_proxies(proxy_url)
        last_refresh_time = current_time
        if not proxies:
            raise Exception("无法获取代理")
    return proxies
 def safe_request(url):
-    """带重试机制的请求函数"""
+    """带重试机制和代理的请求函数"""
    for attempt in range(MAX_RETRIES):
        try:
            # 随机延迟防止被封
            time.sleep(random.uniform(*DELAY_RANGE))
            # 获取代理列表
            proxy_list = get_proxies()
            # 随机选择一个代理
            proxy = random.choice(proxy_list)
            print(f"使用代理: {proxy}")
            response = requests.get(
                url,
                headers=get_random_header(),
                proxies={"http": proxy, "https": proxy},
                timeout=REQUEST_TIMEOUT
            )
@@ -127,7 +189,7 @@ def main():
            continue
        # 构造搜索请求
-        search_txt = f'"udpxy" && country="CN" && region="{province}'
+        search_txt = f'"udpxy" && country="CN" && region="{province}"'
        encoded_query = base64.b64encode(search_txt.encode()).decode()
        search_url = f'https://fofa.info/result?qbase64={encoded_query}'