import asyncio import aiohttp import re from playwright.async_api import async_playwright async def fetch_m3u8(session: aiohttp.ClientSession, name: str, link: str): """ 使用 aiohttp 高并发拉取单个直播间源码,并使用正则嗅探底层 .m3u8 流媒体链接 优化逻辑:保持底层并发稳定,避免 GitHub Actions 中 OOM """ headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } try: async with session.get(link, headers=headers, timeout=15) as response: if response.status == 200: text = await response.text() match = re.search(r'(https?:[\\/]+[^"\'\s]+\.m3u8[^"\'\s]*)', text) if match: m3u8_url = match.group(1).replace('\\/', '/') return name, m3u8_url, link except Exception: pass return name, None, link async def main(): results = [] # === 阶段 1:Playwright 全站分页抓取 === async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page(ignore_https_errors=True) # 【核心新增】:注入反爬虫绕过脚本,抹除自动化特征,欺骗防御蜘蛛的探测 await page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") page_num = 1 while True: print(f"正在加载并抓取第 {page_num} 页数据...") # 动态改变 page=&& 参数 url = f"https://sinparty.com/?page={page_num}" # 移除不可靠的 networkidle,使用默认导航机制 await page.goto(url) # 【核心新增】:强制屏蔽拦截遮罩,破坏防御弹窗,恢复“可操作、可活动、可点击”状态 await page.add_style_tag(content=''' .app-modal__overlay, .modal-auth__inner { display: none !important; z-index: -9999 !important; } body, html { pointer-events: auto !important; overflow: auto !important; user-select: auto !important; } ''') # 使用 JS 直接从 DOM 树中强制物理删除这两个拦截节点 await page.evaluate('''() => { document.querySelectorAll('.app-modal__overlay, .modal-auth__inner').forEach(el => el.remove()); }''') # 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点 try: # 显式等待真实数据的 CSS 节点渲染到 DOM 中(最长容忍 20 秒) # 统一