Update Up

2026-02-26 23:24:02 +08:00
parent 5caf76bddb
commit 7bcd245e5d
12 changed files with 200 additions and 10 deletions
@@ -0,0 +1,110 @@
+import asyncio
+import aiohttp
+import re
+from playwright.async_api import async_playwright
+
+async def fetch_m3u8(session: aiohttp.ClientSession, name: str, link: str):
+    """
+    使用 aiohttp 高并发拉取单个直播间源码，并使用正则嗅探底层 .m3u8 流媒体链接
+    优化逻辑：保持底层并发稳定，避免 GitHub Actions 中 OOM
+    """
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+    }
+    try:
+        async with session.get(link, headers=headers, timeout=15) as response:
+            if response.status == 200:
+                text = await response.text()
+                match = re.search(r'(https?:[\\/]+[^"\'\s]+\.m3u8[^"\'\s]*)', text)
+                if match:
+                    m3u8_url = match.group(1).replace('\\/', '/')
+                    return name, m3u8_url, link
+    except Exception:
+        pass 
+    
+    return name, None, link
+
+async def main():
+    results = []
+    
+    # === 阶段 1：Playwright 全站分页抓取 ===
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        page = await browser.new_page()
+
+        page_num = 1
+        while True:
+            print(f"正在加载并抓取第 {page_num} 页数据...")
+            # 动态改变 page=&& 参数
+            url = f"https://sinparty.com/zh?page={page_num}"
+            await page.goto(url, wait_until="networkidle")
+
+            # 定位目标：跳过 skeleton 骨架屏，直接锁定在线主播节点
+            # 兼容 a.cam-tile.cam-tile--online 作为独立跳转链接提取
+            elements = await page.locator("a.cam-tile.cam-tile--online").all()
+            
+            if not elements:
+                print(f"第 {page_num} 页未检测到有效在线主播数据，翻页结束。\n")
+                break
+
+            for element in elements:
+                # 抓取标题与名字：兼容 .cam-tile__title 或 .cam-tile__info
+                title_loc = element.locator(".cam-tile__title, .cam-tile__info")
+                if await title_loc.count() > 0:
+                    title = await title_loc.first.inner_text()
+                else:
+                    title = "未知用户"
+
+                # 抓取 href 跳转链接
+                href = await element.get_attribute("href")
+                
+                if href:
+                    if href.startswith("/"):
+                        href = f"https://sinparty.com{href}"
+                    
+                    results.append({
+                        "name": title.strip(),
+                        "link": href
+                    })
+
+            page_num += 1
+
+        await browser.close()
+
+    # === 阶段 2：AIOHTTP 高性能并发抓取 m3u8 流 ===
+    print(f"全站遍历完毕，共提取 {len(results)} 个直播间链接。开始高并发底层嗅探...")
+    
+    connector = aiohttp.TCPConnector(limit=100)
+    async with aiohttp.ClientSession(connector=connector) as session:
+        tasks = [fetch_m3u8(session, res["name"], res["link"]) for res in results]
+        m3u8_results = await asyncio.gather(*tasks)
+
+    # === 阶段 3：转换并格式化输出 M3U ===
+    m3u_lines = ["#EXTM3U"]
+    success_count = 0
+    
+    for name, m3u8_url, room_link in m3u8_results:
+        # M3U 标准：需要直接填入可播放的流媒体链接 (.m3u8)
+        # 如果底层抓不到 m3u8，则使用原始跳转链接作为 fallback
+        final_link = m3u8_url if m3u8_url else room_link
+        
+        # 按照要求输出 group-title="女生" 及名称
+        m3u_lines.append(f'#EXTINF:-1 group-title="女生",{name}')
+        m3u_lines.append(final_link)
+        
+        if m3u8_url:
+            success_count += 1
+            
+    m3u_content = "\n".join(m3u_lines)
+    
+    print("\n=== 转换格式 M3U 输出 ===")
+    print(m3u_content)
+    
+    with open("lib/party.m3u", "w", encoding="utf-8") as f:
+        f.write(m3u_content)
+        
+    print(f"\n并发处理完成！成功解析 {success_count} 个底层流，总计写入 {len(results)} 条数据。")
+
+if __name__ == "__main__":
+    asyncio.run(main())
+