379 lines
17 KiB
Python
Executable File
379 lines
17 KiB
Python
Executable File
# coding=utf-8
|
|
#!/usr/bin/python
|
|
import sys
|
|
sys.path.append('..')
|
|
from base.spider import Spider
|
|
import json
|
|
import urllib.parse
|
|
import re
|
|
from lxml import etree
|
|
from urllib.parse import urljoin
|
|
|
|
class Spider(Spider):
|
|
|
|
def getName(self):
|
|
return "奇优影院"
|
|
|
|
def init(self, extend):
|
|
pass
|
|
|
|
def homeContent(self, filter):
|
|
result = {}
|
|
cateManual = {
|
|
"电影": "1",
|
|
"电视剧": "2",
|
|
"动漫": "3",
|
|
"综艺": "4",
|
|
"午夜": "6"
|
|
}
|
|
classes = [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]
|
|
result['class'] = classes
|
|
|
|
filters = {
|
|
"1": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
|
|
"2": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
|
|
"3": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
|
|
"4": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
|
|
"6": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}]
|
|
}
|
|
result['filters'] = filters
|
|
return result
|
|
|
|
def homeVideoContent(self):
|
|
try:
|
|
rsp = self.fetch("http://qiyoudy5.com/")
|
|
root = self.parse_html(rsp.content)
|
|
if not root:
|
|
return {'list': []}
|
|
|
|
videos = []
|
|
# 轮播图
|
|
for a in root.xpath("//div[contains(@class,'carousel')]//a[contains(@class,'stui-vodlist__thumb')]"):
|
|
try:
|
|
name = a.xpath(".//span[@class='pic-text text-center']/text()")[0].strip() if a.xpath(".//span[@class='pic-text text-center']/text()") else a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
|
|
style = a.xpath("./@style")[0] if a.xpath("./@style") else ""
|
|
pic = re.search(r"background:\s*url\((.*?)\)", style).group(1) if re.search(r"background:\s*url\((.*?)\)", style) else ""
|
|
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
|
|
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": "推荐"})
|
|
except:
|
|
continue
|
|
|
|
# 视频列表
|
|
for a in root.xpath("//ul[contains(@class,'stui-vodlist')]//a[contains(@class,'stui-vodlist__thumb')]"):
|
|
try:
|
|
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
|
|
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
|
|
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
|
|
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
|
|
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
|
|
except:
|
|
continue
|
|
|
|
return {'list': videos}
|
|
except:
|
|
return {'list': []}
|
|
|
|
def categoryContent(self, tid, pg, filter, extend):
|
|
result = {}
|
|
try:
|
|
order = extend.get('by', 'time') if extend else 'time'
|
|
url = f'http://qiyoudy5.com/list/{tid}_{pg}.html?order={order}'
|
|
rsp = self.fetch(url)
|
|
root = self.parse_html(rsp.content)
|
|
|
|
if not root:
|
|
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
|
|
|
|
videos = []
|
|
for a in root.xpath("//a[contains(@class,'stui-vodlist__thumb')]"):
|
|
try:
|
|
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
|
|
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
|
|
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
|
|
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
|
|
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
|
|
except:
|
|
continue
|
|
|
|
current_page = int(root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()")[0]) if root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()") else pg
|
|
|
|
page_numbers = []
|
|
for link in root.xpath("//ul[contains(@class,'stui-page')]//a[contains(@href,'list')]/@href"):
|
|
match = re.search(r'list/\d+_(\d+)\.html', link)
|
|
if match:
|
|
page_numbers.append(int(match.group(1)))
|
|
total_page = max(page_numbers) if page_numbers else 1
|
|
|
|
return {
|
|
'list': videos,
|
|
'page': current_page,
|
|
'pagecount': total_page if total_page > 0 else 9999,
|
|
'limit': 90,
|
|
'total': 999999
|
|
}
|
|
except:
|
|
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
|
|
|
|
def detailContent(self, array):
|
|
try:
|
|
tid = array[0]
|
|
url = f'http://qiyoudy5.com{tid}'
|
|
rsp = self.fetch(url)
|
|
root = self.parse_html(rsp.content)
|
|
|
|
if not root:
|
|
return {'list': []}
|
|
|
|
# 基本信息
|
|
detail_node = root.xpath("//div[contains(@class,'stui-content__detail')]") or root.xpath("//div[@class='stui-player__detail']")
|
|
pic = title = area = director = actor = year = desc = ""
|
|
|
|
if detail_node:
|
|
detail_node = detail_node[0]
|
|
pic = self.get_first(root.xpath("//meta[@property='og:image']/@content") or detail_node.xpath(".//img/@data-original"))
|
|
title = self.get_first(detail_node.xpath(".//h1//text()"))
|
|
if not title:
|
|
page_title = self.get_first(root.xpath("//title/text()"))
|
|
title = re.search(r"《(.*?)》", page_title).group(1) if page_title and re.search(r"《(.*?)》", page_title) else ""
|
|
|
|
area = self.get_first(root.xpath("//meta[@property='og:video:area']/@content"))
|
|
director = self.get_first(root.xpath("//meta[@property='og:video:director']/@content"))
|
|
actor = self.get_first(root.xpath("//meta[@property='og:video:actor']/@content"))
|
|
year_info = self.get_first(root.xpath("//p[@class='data']//text()[contains(.,'年份:')]"))
|
|
year = re.search(r"年份:(\d{4})", year_info).group(1) if year_info and re.search(r"年份:(\d{4})", year_info) else ""
|
|
desc = self.get_first(root.xpath("//meta[@property='og:description']/@content"))
|
|
|
|
# 播放列表
|
|
playFrom, playUrl = [], []
|
|
for tab in root.xpath("//ul[contains(@class,'nav-tabs')]/li"):
|
|
tab_name = self.get_first(tab.xpath(".//a/text()"))
|
|
tab_id = self.get_first(tab.xpath(".//a/@href")).replace("#", "") if tab.xpath(".//a/@href") else ""
|
|
|
|
if tab_name and tab_id:
|
|
play_list = root.xpath(f"//div[@id='{tab_id}']//ul[contains(@class,'stui-content__playlist')]//a")
|
|
if play_list:
|
|
playFrom.append(tab_name)
|
|
episodes = []
|
|
for episode in play_list:
|
|
ep_name = self.get_first(episode.xpath("./text()")) or "播放"
|
|
ep_url = self.get_first(episode.xpath("./@href"))
|
|
if ep_url:
|
|
episodes.append(f"{ep_name}${ep_url}")
|
|
if episodes:
|
|
playUrl.append("#".join(episodes))
|
|
|
|
vod = {
|
|
"vod_id": tid,
|
|
"vod_name": title,
|
|
"vod_pic": pic,
|
|
"vod_year": year,
|
|
"vod_area": area,
|
|
"vod_actor": actor,
|
|
"vod_director": director,
|
|
"vod_content": desc
|
|
}
|
|
|
|
if playFrom and playUrl:
|
|
vod['vod_play_from'] = "$$$".join(playFrom)
|
|
vod['vod_play_url'] = "$$$".join(playUrl)
|
|
|
|
return {'list': [vod]}
|
|
except:
|
|
return {'list': []}
|
|
|
|
def searchContent(self, key, quick, page='1'):
|
|
try:
|
|
url = "http://qiyoudy5.com/search.php"
|
|
# 修复:使用正确的参数名和变量
|
|
post_data = {
|
|
'searchword': key, # 改为变量key,而不是字符串'key'
|
|
}
|
|
|
|
headers = {
|
|
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Referer": "http://qiyoudy5.com/",
|
|
"Origin": "http://qiyoudy5.com"
|
|
}
|
|
|
|
# 修复:只发送一次POST请求,删除重复的请求
|
|
rsp = self.post(url, data=post_data, headers=headers)
|
|
|
|
root = self.parse_html(rsp.content)
|
|
|
|
if not root:
|
|
return {'list': []}
|
|
|
|
videos = []
|
|
|
|
# 多种选择器尝试获取搜索结果
|
|
selectors = [
|
|
"//ul[contains(@class,'stui-vodlist__media')]//li",
|
|
"//ul[contains(@class,'stui-vodlist')]//li",
|
|
"//a[contains(@class,'stui-vodlist__thumb')]"
|
|
]
|
|
|
|
result_items = []
|
|
for selector in selectors:
|
|
result_items = root.xpath(selector)
|
|
if result_items:
|
|
break
|
|
|
|
for item in result_items:
|
|
try:
|
|
if item.tag == 'a': # 直接是a标签
|
|
href = self.get_first(item.xpath("./@href"))
|
|
title = self.get_first(item.xpath("./@title"))
|
|
pic = self.get_first(item.xpath("./@data-original"))
|
|
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
|
|
else: # li标签
|
|
link = item.xpath(".//a[contains(@class,'stui-vodlist__thumb')]") or item.xpath(".//a")
|
|
if not link:
|
|
continue
|
|
link = link[0]
|
|
href = self.get_first(link.xpath("./@href"))
|
|
title = self.get_first(link.xpath("./@title"))
|
|
pic = self.get_first(link.xpath("./@data-original"))
|
|
if not pic:
|
|
style = self.get_first(link.xpath("./@style"))
|
|
if style and "background-image" in style:
|
|
pic_match = re.search(r"background-image:\s*url\(['\"]?(.*?)['\"]?\)", style)
|
|
if pic_match:
|
|
pic = pic_match.group(1)
|
|
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
|
|
|
|
if href and title:
|
|
videos.append({
|
|
"vod_id": href,
|
|
"vod_name": title.strip(),
|
|
"vod_pic": pic,
|
|
"vod_remarks": remark or ""
|
|
})
|
|
except Exception as e:
|
|
continue
|
|
|
|
# 备用解析方案
|
|
if not videos:
|
|
for a in root.xpath("//a[contains(@href,'/vod/')]"):
|
|
try:
|
|
href = self.get_first(a.xpath("./@href"))
|
|
title = self.get_first(a.xpath("./@title")) or self.get_first(a.xpath(".//text()"))
|
|
pic = self.get_first(a.xpath("./@data-original"))
|
|
remark = self.get_first(a.xpath(".//span[contains(@class,'pic-text')]/text()"))
|
|
|
|
if href and title:
|
|
videos.append({
|
|
"vod_id": href,
|
|
"vod_name": title.strip(),
|
|
"vod_pic": pic,
|
|
"vod_remarks": remark or ""
|
|
})
|
|
except:
|
|
continue
|
|
|
|
# 去重
|
|
seen = set()
|
|
unique_videos = []
|
|
for video in videos:
|
|
identifier = (video["vod_id"], video["vod_name"])
|
|
if identifier not in seen:
|
|
seen.add(identifier)
|
|
unique_videos.append(video)
|
|
|
|
return {'list': unique_videos}
|
|
|
|
except Exception as e:
|
|
return {'list': []}
|
|
|
|
def playerContent(self, flag, id, vipFlags):
|
|
try:
|
|
url = f"http://qiyoudy5.com{id}"
|
|
rsp = self.fetch(url)
|
|
_, html_content = self.parse_html(rsp.content, return_content=True)
|
|
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Referer": url,
|
|
}
|
|
|
|
# 多种方式查找播放地址
|
|
# 1. API链接
|
|
for pattern in [r"http://api\.yongfan99\.com:81/content\.php\?[^'\"]+", r"content\.php\?vid=[^&]+&type=[^'\"]+"]:
|
|
match = re.search(pattern, html_content)
|
|
if match:
|
|
api_url = match.group(0)
|
|
if not api_url.startswith('http'):
|
|
api_url = "http://api.yongfan99.com:81/" + api_url
|
|
try:
|
|
api_rsp = self.fetch(api_url, headers=headers)
|
|
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', api_rsp.text)
|
|
if m3u8_match:
|
|
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
|
|
except:
|
|
pass
|
|
|
|
# 2. iframe中的播放器
|
|
for pattern in [r'<iframe[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', r'src\s*=\s*[\'"]((?:http[^\'"]*)?/play/[^\'"]*)[\'"]']:
|
|
for iframe_src in re.findall(pattern, html_content):
|
|
if not iframe_src.startswith('http'):
|
|
iframe_src = urljoin(url, iframe_src)
|
|
try:
|
|
iframe_rsp = self.fetch(iframe_src, headers=headers)
|
|
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', iframe_rsp.text)
|
|
if m3u8_match:
|
|
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
|
|
except:
|
|
continue
|
|
|
|
# 3. 直接搜索m3u8链接
|
|
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', html_content)
|
|
if m3u8_match:
|
|
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
|
|
|
|
# 4. 返回原始URL进行外部解析
|
|
return {"parse": 1, "playUrl": "", "url": url, "header": headers}
|
|
|
|
except:
|
|
return {"parse": 1, "playUrl": "", "url": f"http://qiyoudy5.com{id}", "header": {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Referer": "http://qiyoudy5.com/",
|
|
}}
|
|
|
|
# 辅助函数
|
|
def parse_html(self, content, return_content=False):
|
|
encodings = ['utf-8', 'gbk', 'gb2312', 'iso-8859-1']
|
|
html_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
html_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if html_content is None:
|
|
html_content = content.decode('utf-8', errors='replace')
|
|
|
|
html_content = self.clean_html(html_content)
|
|
root = etree.HTML(html_content)
|
|
|
|
if return_content:
|
|
return root, html_content
|
|
return root
|
|
|
|
def get_first(self, array, default=""):
|
|
return array[0] if array else default
|
|
|
|
def clean_html(self, html_content):
|
|
html_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', html_content)
|
|
replacements = {' ': ' ', '&': '&', '<': '<', '>': '>', '"': '"'}
|
|
for old, new in replacements.items():
|
|
html_content = html_content.replace(old, new)
|
|
return html_content
|
|
|
|
def isVideoFormat(self, url):
|
|
return any(fmt in url for fmt in ['.m3u8', '.mp4', '.avi', '.mkv', '.flv', '.webm'])
|
|
|
|
def manualVideoCheck(self):
|
|
return True
|
|
|
|
def localProxy(self, param):
|
|
return {} |