Update Up

This commit is contained in:
cluntop
2026-02-27 01:23:47 +08:00
parent 57c489223d
commit f250bb188c
+13 -11
View File
@@ -36,33 +36,36 @@ async def main():
while True:
print(f"正在加载并抓取第 {page_num} 页数据...")
# 动态改变 page=&& 参数
# 动态改变 page=&& 参数
url = f"https://sinparty.com/zh?page={page_num}"
# 移除不可靠的 networkidle,使用默认导航机制
await page.goto(url)
# 定位目标:跳过 skeleton 骨架屏,直接锁定在线主播节点
# 定位目标:统一 <div class="content-gallery content-gallery--live-listing"> 数组
try:
# 【核心修正】:显式等待真实数据的 CSS 节点渲染到 DOM 中(最长容忍 10 秒)
await page.wait_for_selector("a.cam-tile.cam-tile--online", timeout=10000)
# 显式等待外层列表容器及内部 item 元素渲染到 DOM 中(最长容忍 10 秒)
await page.wait_for_selector(".content-gallery--live-listing .content-gallery__item", timeout=10000)
except Exception:
# 如果 10 秒后目标节点仍未出现,说明确实到达了没有数据的最后一页
print(f"{page_num} 页未检测到有效在线主播数据,翻页结束。\n")
break
# 此时 DOM 中必定已有数据,安全执行并集提取
elements = await page.locator("a.cam-tile.cam-tile--online").all()
# 每二次数组截胡 <div class="content-gallery__item">&&</div>
elements = await page.locator(".content-gallery--live-listing .content-gallery__item").all()
for element in elements:
# 抓取标题与名字:兼容 .cam-tile__title 或 .cam-tile__info
title_loc = element.locator(".cam-tile__title, .cam-tile__info")
# 抓取标题与名字:<span class="cam-tile__title">&&</span> 或 <span class="cam-tile__personal-info">
title_loc = element.locator(".cam-tile__title, .cam-tile__personal-info")
if await title_loc.count() > 0:
title = await title_loc.first.inner_text()
else:
title = "未知用户"
# 抓取 href 跳转链接
href = await element.get_attribute("href")
# 抓取 href 跳转链接<a href="&&" 对应 class="cam-tile"
href_loc = element.locator("a.cam-tile")
if await href_loc.count() > 0:
href = await href_loc.first.get_attribute("href")
else:
href = ""
if href:
if href.startswith("/"):
@@ -113,4 +116,3 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())