From f250bb188cbd1a1c56049f963ed7744ae1063f54 Mon Sep 17 00:00:00 2001
From: cluntop <85211716+cluntop@users.noreply.github.com>
Date: Fri, 27 Feb 2026 01:23:47 +0800
Subject: [PATCH] Update Up
---
py/sinparty.py | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/py/sinparty.py b/py/sinparty.py
index c3680251b..76b8ad3d9 100644
--- a/py/sinparty.py
+++ b/py/sinparty.py
@@ -36,33 +36,36 @@ async def main():
while True:
print(f"正在加载并抓取第 {page_num} 页数据...")
# 动态改变 page=&& 参数
- # 动态改变 page=&& 参数
url = f"https://sinparty.com/zh?page={page_num}"
# 移除不可靠的 networkidle,使用默认导航机制
await page.goto(url)
- # 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点
+ # 定位目标:统一
数组
try:
- # 【核心修正】:显式等待真实数据的 CSS 节点渲染到 DOM 中(最长容忍 10 秒)
- await page.wait_for_selector("a.cam-tile.cam-tile--online", timeout=10000)
+ # 显式等待外层列表容器及内部 item 元素渲染到 DOM 中(最长容忍 10 秒)
+ await page.wait_for_selector(".content-gallery--live-listing .content-gallery__item", timeout=10000)
except Exception:
# 如果 10 秒后目标节点仍未出现,说明确实到达了没有数据的最后一页
print(f"第 {page_num} 页未检测到有效在线主播数据,翻页结束。\n")
break
- # 此时 DOM 中必定已有数据,安全执行并集提取
- elements = await page.locator("a.cam-tile.cam-tile--online").all()
+ # 每二次数组截胡
&&
+ elements = await page.locator(".content-gallery--live-listing .content-gallery__item").all()
for element in elements:
- # 抓取标题与名字:兼容 .cam-tile__title 或 .cam-tile__info
- title_loc = element.locator(".cam-tile__title, .cam-tile__info")
+ # 抓取标题与名字:
&& 或
+ title_loc = element.locator(".cam-tile__title, .cam-tile__personal-info")
if await title_loc.count() > 0:
title = await title_loc.first.inner_text()
else:
title = "未知用户"
- # 抓取 href 跳转链接
- href = await element.get_attribute("href")
+ # 抓取 href 跳转链接: 0:
+ href = await href_loc.first.get_attribute("href")
+ else:
+ href = ""
if href:
if href.startswith("/"):
@@ -113,4 +116,3 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())
-