import sys, re, requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from base.spider import Spider requests.packages.urllib3.disable_warnings() class Spider(Spider): def getName(self): return "Jable" def init(self, extend=""): self.siteUrl = "https://jable.tv" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Referer": "https://jable.tv/", } self.sess = requests.Session() self.sess.mount('https://', HTTPAdapter(max_retries=Retry(total=3, status_forcelist=[500, 502, 503, 504]))) def fetch(self, url): try: return self.sess.get(url, headers=self.headers, timeout=15, verify=False) except: return None def homeContent(self, filter): r = self.fetch(self.siteUrl) cats = [] if r and r.ok: # 修复优化:放弃失效的 class="tag",改为直接抓取带有 categories/tags/hot 等真实路径的 A 标签 pattern = r'href=["\'](?:https://jable\.tv)?/((?:categories|tags)/[^"\'/]+|latest-updates|hot)/?["\'][^>]*>(.*?)' for m in re.finditer(pattern, r.text, re.I): tid = m.group(1).strip('/') name = re.sub(r'<[^>]+>', '', m.group(2)).strip() # 过滤掉空值及重复项,确保抓取到的分类合法 if tid and name and len(name) > 0 and not name.isspace() and tid not in [c['type_id'] for c in cats]: cats.append({"type_id": tid, "type_name": name}) # 兜底静态分类优化:增加常用分类,以防极端网络情况下首页解析为空 if not cats: cats = [ {"type_id": "latest-updates", "type_name": "最近更新"}, {"type_id": "hot", "type_name": "热门影片"}, {"type_id": "categories/chinese-subtitle", "type_name": "中文字幕"}, {"type_id": "categories/uncensored", "type_name": "無碼"}, {"type_id": "categories/lesbian", "type_name": "女同"}, {"type_id": "categories/creampie", "type_name": "中出"} ] return {'class': cats} def categoryContent(self, tid, pg, filter, extend): url = f"{self.siteUrl}/{tid}/{pg}/" if str(pg) != '1' else f"{self.siteUrl}/{tid}/" return self.postList(url, int(pg)) def searchContent(self, key, quick, pg=1): url = f"{self.siteUrl}/search/{key}/{pg}/" if str(pg) != '1' else f"{self.siteUrl}/search/{key}/" return self.postList(url, int(pg)) def postList(self, url, pg): r = self.fetch(url) l = [] if r and r.ok: blocks = r.text.split('
]*>\s*]*>(.*?)', block, re.S) t = re.sub(r'<[^>]+>', '', title_match.group(1)).strip() if title_match else "未知" pic_match = re.search(r'data-src=["\']([^"\']+)["\']', block) or re.search(r'src=["\']([^"\']+)["\']', block) p = pic_match.group(1) if pic_match else "" u = u if u.startswith("http") else f"{self.siteUrl}/{u.lstrip('/')}" l.append({ 'vod_id': f"{u}@@@{t}@@@{p}", 'vod_name': t, 'vod_pic': p, 'vod_remarks': '1080P', 'style': {"type": "rect", "ratio": 1.33} }) return {'list': l, 'page': pg, 'pagecount': pg + 1 if len(l) else pg, 'limit': 24, 'total': 9999} def detailContent(self, ids): vid = ids[0] name, pic = "未知", "" if "@@@" in vid: parts = vid.split("@@@") vid = parts[0] name = parts[1] if len(parts) > 1 else name pic = parts[2] if len(parts) > 2 else pic r = self.fetch(vid) m3u8_url = "" if r and r.ok: m_m3u8 = re.search(r"https?://[^\s'\" ]+\.m3u8", r.text) if m_m3u8: m3u8_url = m_m3u8.group(0) vod = { 'vod_id': ids[0], 'vod_name': name, 'vod_pic': pic, 'type_name': '视频', 'vod_play_from': 'Jable', 'vod_play_url': f"播放${m3u8_url}" if m3u8_url else f"播放${vid}" } return {'list': [vod]} def playerContent(self, flag, id, vipFlags): return { "parse": 0, "url": id, "header": { "Referer": "https://jable.tv/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } }