Files
tvbox-1/py/sinparty.py
T
2026-02-26 23:24:02 +08:00

111 lines
4.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import aiohttp
import re
from playwright.async_api import async_playwright
async def fetch_m3u8(session: aiohttp.ClientSession, name: str, link: str):
"""
使用 aiohttp 高并发拉取单个直播间源码,并使用正则嗅探底层 .m3u8 流媒体链接
优化逻辑:保持底层并发稳定,避免 GitHub Actions 中 OOM
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
try:
async with session.get(link, headers=headers, timeout=15) as response:
if response.status == 200:
text = await response.text()
match = re.search(r'(https?:[\\/]+[^"\'\s]+\.m3u8[^"\'\s]*)', text)
if match:
m3u8_url = match.group(1).replace('\\/', '/')
return name, m3u8_url, link
except Exception:
pass
return name, None, link
async def main():
results = []
# === 阶段 1Playwright 全站分页抓取 ===
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
page_num = 1
while True:
print(f"正在加载并抓取第 {page_num} 页数据...")
# 动态改变 page=&& 参数
url = f"https://sinparty.com/zh?page={page_num}"
await page.goto(url, wait_until="networkidle")
# 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点
# 兼容 a.cam-tile.cam-tile--online 作为独立跳转链接提取
elements = await page.locator("a.cam-tile.cam-tile--online").all()
if not elements:
print(f"{page_num} 页未检测到有效在线主播数据,翻页结束。\n")
break
for element in elements:
# 抓取标题与名字:兼容 .cam-tile__title 或 .cam-tile__info
title_loc = element.locator(".cam-tile__title, .cam-tile__info")
if await title_loc.count() > 0:
title = await title_loc.first.inner_text()
else:
title = "未知用户"
# 抓取 href 跳转链接
href = await element.get_attribute("href")
if href:
if href.startswith("/"):
href = f"https://sinparty.com{href}"
results.append({
"name": title.strip(),
"link": href
})
page_num += 1
await browser.close()
# === 阶段 2AIOHTTP 高性能并发抓取 m3u8 流 ===
print(f"全站遍历完毕,共提取 {len(results)} 个直播间链接。开始高并发底层嗅探...")
connector = aiohttp.TCPConnector(limit=100)
async with aiohttp.ClientSession(connector=connector) as session:
tasks = [fetch_m3u8(session, res["name"], res["link"]) for res in results]
m3u8_results = await asyncio.gather(*tasks)
# === 阶段 3:转换并格式化输出 M3U ===
m3u_lines = ["#EXTM3U"]
success_count = 0
for name, m3u8_url, room_link in m3u8_results:
# M3U 标准:需要直接填入可播放的流媒体链接 (.m3u8)
# 如果底层抓不到 m3u8,则使用原始跳转链接作为 fallback
final_link = m3u8_url if m3u8_url else room_link
# 按照要求输出 group-title="女生" 及名称
m3u_lines.append(f'#EXTINF:-1 group-title="女生",{name}')
m3u_lines.append(final_link)
if m3u8_url:
success_count += 1
m3u_content = "\n".join(m3u_lines)
print("\n=== 转换格式 M3U 输出 ===")
print(m3u_content)
with open("lib/party.m3u", "w", encoding="utf-8") as f:
f.write(m3u_content)
print(f"\n并发处理完成!成功解析 {success_count} 个底层流,总计写入 {len(results)} 条数据。")
if __name__ == "__main__":
asyncio.run(main())