From 96c3a848e71175b3ba7a0fa20d8393c9e78c2fa3 Mon Sep 17 00:00:00 2001
From: frxz751113 <156018267+frxz751113@users.noreply.github.com>
Date: Sat, 3 May 2025 00:50:50 +0800
Subject: [PATCH] =?UTF-8?q?Update=20=E6=B5=8B=E7=BB=98=E7=AB=99=E9=87=87?=
 =?UTF-8?q?=E9=9B=86.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 py/测绘站采集.py | 80 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/py/测绘站采集.py b/py/测绘站采集.py
index 963a050..38c262f 100644
--- a/py/测绘站采集.py
+++ b/py/测绘站采集.py
@@ -29,6 +29,7 @@ from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 from translate import Translator  # 导入Translator类,用于文本翻译
 # -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 import time
 import random
 import requests
@@ -47,6 +48,7 @@ os.makedirs('playlist', exist_ok=True)
 DELAY_RANGE = (3, 6)     # 随机延迟时间范围（秒）
 MAX_RETRIES = 3          # 最大重试次数
 REQUEST_TIMEOUT = 10     # 请求超时时间（秒）
+PROXY_REFRESH_INTERVAL = 300  # 代理刷新间隔（秒）
 
 def get_random_header():
     """生成随机请求头"""
@@ -56,29 +58,89 @@ def get_random_header():
         'Referer': 'https://fofa.info/'
     }
 
+# 代理池相关变量
+proxies = []
+last_refresh_time = 0
+
+def scrape_proxies(url):
+    """从指定URL抓取代理列表"""
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+      
+        # 找到代理表格
+        table = soup.find('table', {'id': 'proxylisttable'})
+        if not table:
+            return []
+      
+        # 解析表格中的代理数据
+        proxies = []
+        for row in table.find_all('tr')[1:]:  # 跳过表头
+            cols = row.find_all('td')
+            if len(cols) < 8:
+                continue
+          
+            ip = cols[0].text.strip()
+            port = cols[1].text.strip()
+            protocol = 'https' if cols[6].text.strip() == 'yes' else 'http'
+          
+            proxies.append(f"{protocol}://{ip}:{port}")
+      
+        return proxies
+  
+    except Exception as e:
+        print(f"抓取代理失败: {str(e)}")
+        return []
+
+def get_proxies():
+    """获取代理列表，定期刷新"""
+    global proxies, last_refresh_time
+    current_time = time.time()
+  
+    # 如果代理列表为空或超过刷新间隔，重新抓取代理
+    if not proxies or current_time - last_refresh_time > PROXY_REFRESH_INTERVAL:
+        print("正在刷新代理列表...")
+        proxy_url = "https://free-proxy-list.net/"
+        proxies = scrape_proxies(proxy_url)
+        last_refresh_time = current_time
+      
+        if not proxies:
+            raise Exception("无法获取代理")
+  
+    return proxies
+
 def safe_request(url):
-    """带重试机制的请求函数"""
+    """带重试机制和代理的请求函数"""
     for attempt in range(MAX_RETRIES):
         try:
             # 随机延迟防止被封
             time.sleep(random.uniform(*DELAY_RANGE))
-            
+          
+            # 获取代理列表
+            proxy_list = get_proxies()
+          
+            # 随机选择一个代理
+            proxy = random.choice(proxy_list)
+            print(f"使用代理: {proxy}")
+          
             response = requests.get(
                 url,
                 headers=get_random_header(),
+                proxies={"http": proxy, "https": proxy},
                 timeout=REQUEST_TIMEOUT
             )
-            
+          
             # 检查HTTP状态码
             if response.status_code == 429:
                 wait_time = 30  # 遇到反爬等待30秒
                 print(f"遇到反爬机制，等待{wait_time}秒后重试")
                 time.sleep(wait_time)
                 continue
-                
+              
             response.raise_for_status()
             return response.text
-            
+          
         except Exception as e:
             print(f"请求失败（第{attempt+1}次重试）: {str(e)}")
             if attempt == MAX_RETRIES - 1:
@@ -88,19 +150,19 @@ def validate_video(url, mcast):
     """验证视频流有效性"""
     video_url = f"{url}/rtp/{mcast}"
     print(f"正在验证: {video_url}")
-    
+  
     try:
         # 设置超时参数
         cap = cv2.VideoCapture(video_url, cv2.CAP_FFMPEG)
         cap.set(cv2.CAP_PROP_TIMEOUT, 5000)  # 5秒超时
-        
+      
         if cap.isOpened():
             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
             cap.release()
             return width > 0 and height > 0
         return False
-        
+      
     except Exception as e:
         print(f"视频验证异常: {str(e)}")
         return False
@@ -127,7 +189,7 @@ def main():
             continue
 
         # 构造搜索请求
-        search_txt = f'"udpxy" && country="CN" && region="{province}'
+        search_txt = f'"udpxy" && country="CN" && region="{province}"'
         encoded_query = base64.b64encode(search_txt.encode()).decode()
         search_url = f'https://fofa.info/result?qbase64={encoded_query}'