Files
tvbox-1/py/奇优动漫.py
GitHub Actions d0bd20ce39 purge history
2026-02-07 05:02:33 +00:00

379 lines
17 KiB
Python
Executable File

# coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import json
import urllib.parse
import re
from lxml import etree
from urllib.parse import urljoin
class Spider(Spider):
def getName(self):
return "奇优影院"
def init(self, extend):
pass
def homeContent(self, filter):
result = {}
cateManual = {
"电影": "1",
"电视剧": "2",
"动漫": "3",
"综艺": "4",
"午夜": "6"
}
classes = [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]
result['class'] = classes
filters = {
"1": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"2": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"3": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"4": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"6": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}]
}
result['filters'] = filters
return result
def homeVideoContent(self):
try:
rsp = self.fetch("http://qiyoudy5.com/")
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
videos = []
# 轮播图
for a in root.xpath("//div[contains(@class,'carousel')]//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath(".//span[@class='pic-text text-center']/text()")[0].strip() if a.xpath(".//span[@class='pic-text text-center']/text()") else a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
style = a.xpath("./@style")[0] if a.xpath("./@style") else ""
pic = re.search(r"background:\s*url\((.*?)\)", style).group(1) if re.search(r"background:\s*url\((.*?)\)", style) else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": "推荐"})
except:
continue
# 视频列表
for a in root.xpath("//ul[contains(@class,'stui-vodlist')]//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
except:
continue
return {'list': videos}
except:
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
result = {}
try:
order = extend.get('by', 'time') if extend else 'time'
url = f'http://qiyoudy5.com/list/{tid}_{pg}.html?order={order}'
rsp = self.fetch(url)
root = self.parse_html(rsp.content)
if not root:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
videos = []
for a in root.xpath("//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
except:
continue
current_page = int(root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()")[0]) if root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()") else pg
page_numbers = []
for link in root.xpath("//ul[contains(@class,'stui-page')]//a[contains(@href,'list')]/@href"):
match = re.search(r'list/\d+_(\d+)\.html', link)
if match:
page_numbers.append(int(match.group(1)))
total_page = max(page_numbers) if page_numbers else 1
return {
'list': videos,
'page': current_page,
'pagecount': total_page if total_page > 0 else 9999,
'limit': 90,
'total': 999999
}
except:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detailContent(self, array):
try:
tid = array[0]
url = f'http://qiyoudy5.com{tid}'
rsp = self.fetch(url)
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
# 基本信息
detail_node = root.xpath("//div[contains(@class,'stui-content__detail')]") or root.xpath("//div[@class='stui-player__detail']")
pic = title = area = director = actor = year = desc = ""
if detail_node:
detail_node = detail_node[0]
pic = self.get_first(root.xpath("//meta[@property='og:image']/@content") or detail_node.xpath(".//img/@data-original"))
title = self.get_first(detail_node.xpath(".//h1//text()"))
if not title:
page_title = self.get_first(root.xpath("//title/text()"))
title = re.search(r"《(.*?)》", page_title).group(1) if page_title and re.search(r"《(.*?)》", page_title) else ""
area = self.get_first(root.xpath("//meta[@property='og:video:area']/@content"))
director = self.get_first(root.xpath("//meta[@property='og:video:director']/@content"))
actor = self.get_first(root.xpath("//meta[@property='og:video:actor']/@content"))
year_info = self.get_first(root.xpath("//p[@class='data']//text()[contains(.,'年份:')]"))
year = re.search(r"年份:(\d{4})", year_info).group(1) if year_info and re.search(r"年份:(\d{4})", year_info) else ""
desc = self.get_first(root.xpath("//meta[@property='og:description']/@content"))
# 播放列表
playFrom, playUrl = [], []
for tab in root.xpath("//ul[contains(@class,'nav-tabs')]/li"):
tab_name = self.get_first(tab.xpath(".//a/text()"))
tab_id = self.get_first(tab.xpath(".//a/@href")).replace("#", "") if tab.xpath(".//a/@href") else ""
if tab_name and tab_id:
play_list = root.xpath(f"//div[@id='{tab_id}']//ul[contains(@class,'stui-content__playlist')]//a")
if play_list:
playFrom.append(tab_name)
episodes = []
for episode in play_list:
ep_name = self.get_first(episode.xpath("./text()")) or "播放"
ep_url = self.get_first(episode.xpath("./@href"))
if ep_url:
episodes.append(f"{ep_name}${ep_url}")
if episodes:
playUrl.append("#".join(episodes))
vod = {
"vod_id": tid,
"vod_name": title,
"vod_pic": pic,
"vod_year": year,
"vod_area": area,
"vod_actor": actor,
"vod_director": director,
"vod_content": desc
}
if playFrom and playUrl:
vod['vod_play_from'] = "$$$".join(playFrom)
vod['vod_play_url'] = "$$$".join(playUrl)
return {'list': [vod]}
except:
return {'list': []}
def searchContent(self, key, quick, page='1'):
try:
url = "http://qiyoudy5.com/search.php"
# 修复:使用正确的参数名和变量
post_data = {
'searchword': key, # 改为变量key,而不是字符串'key'
}
headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "http://qiyoudy5.com/",
"Origin": "http://qiyoudy5.com"
}
# 修复:只发送一次POST请求,删除重复的请求
rsp = self.post(url, data=post_data, headers=headers)
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
videos = []
# 多种选择器尝试获取搜索结果
selectors = [
"//ul[contains(@class,'stui-vodlist__media')]//li",
"//ul[contains(@class,'stui-vodlist')]//li",
"//a[contains(@class,'stui-vodlist__thumb')]"
]
result_items = []
for selector in selectors:
result_items = root.xpath(selector)
if result_items:
break
for item in result_items:
try:
if item.tag == 'a': # 直接是a标签
href = self.get_first(item.xpath("./@href"))
title = self.get_first(item.xpath("./@title"))
pic = self.get_first(item.xpath("./@data-original"))
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
else: # li标签
link = item.xpath(".//a[contains(@class,'stui-vodlist__thumb')]") or item.xpath(".//a")
if not link:
continue
link = link[0]
href = self.get_first(link.xpath("./@href"))
title = self.get_first(link.xpath("./@title"))
pic = self.get_first(link.xpath("./@data-original"))
if not pic:
style = self.get_first(link.xpath("./@style"))
if style and "background-image" in style:
pic_match = re.search(r"background-image:\s*url\(['\"]?(.*?)['\"]?\)", style)
if pic_match:
pic = pic_match.group(1)
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
if href and title:
videos.append({
"vod_id": href,
"vod_name": title.strip(),
"vod_pic": pic,
"vod_remarks": remark or ""
})
except Exception as e:
continue
# 备用解析方案
if not videos:
for a in root.xpath("//a[contains(@href,'/vod/')]"):
try:
href = self.get_first(a.xpath("./@href"))
title = self.get_first(a.xpath("./@title")) or self.get_first(a.xpath(".//text()"))
pic = self.get_first(a.xpath("./@data-original"))
remark = self.get_first(a.xpath(".//span[contains(@class,'pic-text')]/text()"))
if href and title:
videos.append({
"vod_id": href,
"vod_name": title.strip(),
"vod_pic": pic,
"vod_remarks": remark or ""
})
except:
continue
# 去重
seen = set()
unique_videos = []
for video in videos:
identifier = (video["vod_id"], video["vod_name"])
if identifier not in seen:
seen.add(identifier)
unique_videos.append(video)
return {'list': unique_videos}
except Exception as e:
return {'list': []}
def playerContent(self, flag, id, vipFlags):
try:
url = f"http://qiyoudy5.com{id}"
rsp = self.fetch(url)
_, html_content = self.parse_html(rsp.content, return_content=True)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": url,
}
# 多种方式查找播放地址
# 1. API链接
for pattern in [r"http://api\.yongfan99\.com:81/content\.php\?[^'\"]+", r"content\.php\?vid=[^&]+&type=[^'\"]+"]:
match = re.search(pattern, html_content)
if match:
api_url = match.group(0)
if not api_url.startswith('http'):
api_url = "http://api.yongfan99.com:81/" + api_url
try:
api_rsp = self.fetch(api_url, headers=headers)
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', api_rsp.text)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
except:
pass
# 2. iframe中的播放器
for pattern in [r'<iframe[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', r'src\s*=\s*[\'"]((?:http[^\'"]*)?/play/[^\'"]*)[\'"]']:
for iframe_src in re.findall(pattern, html_content):
if not iframe_src.startswith('http'):
iframe_src = urljoin(url, iframe_src)
try:
iframe_rsp = self.fetch(iframe_src, headers=headers)
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', iframe_rsp.text)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
except:
continue
# 3. 直接搜索m3u8链接
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', html_content)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
# 4. 返回原始URL进行外部解析
return {"parse": 1, "playUrl": "", "url": url, "header": headers}
except:
return {"parse": 1, "playUrl": "", "url": f"http://qiyoudy5.com{id}", "header": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "http://qiyoudy5.com/",
}}
# 辅助函数
def parse_html(self, content, return_content=False):
encodings = ['utf-8', 'gbk', 'gb2312', 'iso-8859-1']
html_content = None
for encoding in encodings:
try:
html_content = content.decode(encoding)
break
except UnicodeDecodeError:
continue
if html_content is None:
html_content = content.decode('utf-8', errors='replace')
html_content = self.clean_html(html_content)
root = etree.HTML(html_content)
if return_content:
return root, html_content
return root
def get_first(self, array, default=""):
return array[0] if array else default
def clean_html(self, html_content):
html_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', html_content)
replacements = {'&nbsp;': ' ', '&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'}
for old, new in replacements.items():
html_content = html_content.replace(old, new)
return html_content
def isVideoFormat(self, url):
return any(fmt in url for fmt in ['.m3u8', '.mp4', '.avi', '.mkv', '.flv', '.webm'])
def manualVideoCheck(self):
return True
def localProxy(self, param):
return {}