import asyncio import aiohttp import re from playwright.async_api import async_playwright async def fetch_m3u8(session: aiohttp.ClientSession, name: str, link: str): """ 使用 aiohttp 高并发拉取单个直播间源码,并使用正则嗅探底层 .m3u8 流媒体链接 优化逻辑:保持底层并发稳定,避免 GitHub Actions 中 OOM """ headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } try: async with session.get(link, headers=headers, timeout=15) as response: if response.status == 200: text = await response.text() match = re.search(r'(https?:[\\/]+[^"\'\s]+\.m3u8[^"\'\s]*)', text) if match: m3u8_url = match.group(1).replace('\\/', '/') return name, m3u8_url, link except Exception: pass return name, None, link async def main(): results = [] # === 阶段 1:Playwright 全站分页抓取 === async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page(ignore_https_errors=True) page_num = 1 while True: print(f"正在加载并抓取第 {page_num} 页数据...") # 动态改变 page=&& 参数 url = f"https://sinparty.com/zh?page={page_num}" # 移除不可靠的 networkidle,使用默认导航机制 await page.goto(url) # 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点 try: # 【核心修正】:显式等待真实数据的 CSS 节点渲染到 DOM 中(最长容忍 10 秒) # 统一