Update Up

This commit is contained in:
cluntop
2026-02-26 23:24:02 +08:00
parent 5caf76bddb
commit 7bcd245e5d
12 changed files with 200 additions and 10 deletions
+110
View File
@@ -0,0 +1,110 @@
import asyncio
import aiohttp
import re
from playwright.async_api import async_playwright
async def fetch_m3u8(session: aiohttp.ClientSession, name: str, link: str):
"""
使用 aiohttp 高并发拉取单个直播间源码,并使用正则嗅探底层 .m3u8 流媒体链接
优化逻辑:保持底层并发稳定,避免 GitHub Actions 中 OOM
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
try:
async with session.get(link, headers=headers, timeout=15) as response:
if response.status == 200:
text = await response.text()
match = re.search(r'(https?:[\\/]+[^"\'\s]+\.m3u8[^"\'\s]*)', text)
if match:
m3u8_url = match.group(1).replace('\\/', '/')
return name, m3u8_url, link
except Exception:
pass
return name, None, link
async def main():
results = []
# === 阶段 1Playwright 全站分页抓取 ===
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
page_num = 1
while True:
print(f"正在加载并抓取第 {page_num} 页数据...")
# 动态改变 page=&& 参数
url = f"https://sinparty.com/zh?page={page_num}"
await page.goto(url, wait_until="networkidle")
# 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点
# 兼容 a.cam-tile.cam-tile--online 作为独立跳转链接提取
elements = await page.locator("a.cam-tile.cam-tile--online").all()
if not elements:
print(f"{page_num} 页未检测到有效在线主播数据,翻页结束。\n")
break
for element in elements:
# 抓取标题与名字:兼容 .cam-tile__title 或 .cam-tile__info
title_loc = element.locator(".cam-tile__title, .cam-tile__info")
if await title_loc.count() > 0:
title = await title_loc.first.inner_text()
else:
title = "未知用户"
# 抓取 href 跳转链接
href = await element.get_attribute("href")
if href:
if href.startswith("/"):
href = f"https://sinparty.com{href}"
results.append({
"name": title.strip(),
"link": href
})
page_num += 1
await browser.close()
# === 阶段 2AIOHTTP 高性能并发抓取 m3u8 流 ===
print(f"全站遍历完毕,共提取 {len(results)} 个直播间链接。开始高并发底层嗅探...")
connector = aiohttp.TCPConnector(limit=100)
async with aiohttp.ClientSession(connector=connector) as session:
tasks = [fetch_m3u8(session, res["name"], res["link"]) for res in results]
m3u8_results = await asyncio.gather(*tasks)
# === 阶段 3:转换并格式化输出 M3U ===
m3u_lines = ["#EXTM3U"]
success_count = 0
for name, m3u8_url, room_link in m3u8_results:
# M3U 标准:需要直接填入可播放的流媒体链接 (.m3u8)
# 如果底层抓不到 m3u8,则使用原始跳转链接作为 fallback
final_link = m3u8_url if m3u8_url else room_link
# 按照要求输出 group-title="女生" 及名称
m3u_lines.append(f'#EXTINF:-1 group-title="女生",{name}')
m3u_lines.append(final_link)
if m3u8_url:
success_count += 1
m3u_content = "\n".join(m3u_lines)
print("\n=== 转换格式 M3U 输出 ===")
print(m3u_content)
with open("lib/party.m3u", "w", encoding="utf-8") as f:
f.write(m3u_content)
print(f"\n并发处理完成!成功解析 {success_count} 个底层流,总计写入 {len(results)} 条数据。")
if __name__ == "__main__":
asyncio.run(main())