更新
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import re
|
||||
import sys
|
||||
from urllib.parse import quote, urlparse
|
||||
from Crypto.Hash import SHA256
|
||||
sys.path.append("..")
|
||||
import json
|
||||
import time
|
||||
from pyquery import PyQuery as pq
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def action(self, action):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host='https://www.knvod.com'
|
||||
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'Origin': host,
|
||||
'Referer': f"{host}/",
|
||||
'Cookie':'X-Robots-Tag=CDN-VERIFY'
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
data=self.getpq(self.fetch(self.host,headers=self.headers).text)
|
||||
result = {}
|
||||
classes = []
|
||||
for k in data('.head-more.box a').items():
|
||||
i=k.attr('href')
|
||||
if i and '/show' in i:
|
||||
classes.append({
|
||||
'type_name': k.text(),
|
||||
'type_id': re.findall(r'\d+', i)[0]
|
||||
})
|
||||
result['class'] = classes
|
||||
result['list']=self.getlist(data('.border-box.public-r .public-list-div'))
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
data=self.getpq(self.fetch(f"{self.host}/show/{tid}--------{pg}---/",headers=self.headers).text)
|
||||
result = {}
|
||||
result['list'] = self.getlist(data('.border-box.public-r .public-list-div'))
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data = self.getpq(self.fetch(f"{self.host}/list/{ids[0]}/", headers=self.headers).text)
|
||||
v=data('.detail-info.lightSpeedIn .slide-info')
|
||||
vod = {
|
||||
'vod_year': v.eq(-1).text().split(':',1)[-1],
|
||||
'vod_remarks': v.eq(0),
|
||||
'vod_actor': v.eq(3).text().split(':',1)[-1],
|
||||
'vod_director': v.eq(2).text().split(':',1)[-1],
|
||||
'vod_content': data('.switch-box #height_limit').text()
|
||||
}
|
||||
np=data('.anthology.wow.fadeInUp')
|
||||
ndata=np('.anthology-tab .swiper-wrapper .swiper-slide')
|
||||
pdata=np('.anthology-list .anthology-list-box ul')
|
||||
play,names=[],[]
|
||||
for i in range(len(ndata)):
|
||||
n=ndata.eq(i)('a')
|
||||
n('span').remove()
|
||||
names.append(n.text())
|
||||
vs=[]
|
||||
for v in pdata.eq(i)('li').items():
|
||||
vs.append(f"{v.text()}${v('a').attr('href')}")
|
||||
play.append('#'.join(vs))
|
||||
vod["vod_play_from"] = "$$$".join(names)
|
||||
vod["vod_play_url"] = "$$$".join(play)
|
||||
result = {"list": [vod]}
|
||||
return result
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data = self.fetch(f"{self.host}/index.php/ajax/suggest?mid=1&wd={key}&limit=9999×tamp={int(time.time()*1000)}", headers=self.headers).json()
|
||||
videos=[]
|
||||
for i in data['list']:
|
||||
videos.append({
|
||||
'vod_id': i['id'],
|
||||
'vod_name': i['name'],
|
||||
'vod_pic': i['pic']
|
||||
})
|
||||
return {'list':videos,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
h={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.8 Mobile/15E148 Safari/604.1',
|
||||
'Origin': self.host
|
||||
}
|
||||
data = self.getpq(self.fetch(f"{self.host}{id}", headers=self.headers).text)
|
||||
try:
|
||||
jstr = data('.player-box .player-left script').eq(1).text()
|
||||
jsdata = json.loads(jstr.split('=',1)[-1])
|
||||
url = jsdata.get('url')
|
||||
if not re.search(r'\.m3u8|\.mp4',jsdata['url']):
|
||||
jxd=self.fetch(f"{self.host}/static/player/{jsdata['from']}.js", headers=self.headers).text
|
||||
jx=re.search(r'http.*?url=', jxd)
|
||||
if not jx:raise Exception('未找到jx')
|
||||
parsed_url = urlparse(jx.group())
|
||||
jxhost = parsed_url.scheme + "://" + parsed_url.netloc
|
||||
title=data('head title').eq(0).text().split('-')[0]
|
||||
next=f"{self.host.split('//')[-1]}{jsdata['link_next']}" if jsdata.get('link_next') else ''
|
||||
cd=self.fetch(f"{jx.group()}{jsdata['url']}&next=//{next}&title={quote(title)}", headers=self.headers).text
|
||||
match = re.search(r'var\s+config\s*=\s*(\{[\s\S]*?\})', cd)
|
||||
if not match:raise Exception('未找到config')
|
||||
cm=re.sub(r',\s*}(?=\s*$)', '}', match.group(1))
|
||||
config=json.loads(cm)
|
||||
config.update({'key':self.sha256(f"{self.gettime()}knvod")})
|
||||
config.pop('next',None)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.8 Mobile/15E148 Safari/604.1',
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Cache-Control': 'no-cache',
|
||||
'DNT': '1',
|
||||
'Origin': jxhost,
|
||||
'Pragma': 'no-cache',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-Storage-Access': 'active',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
h['Origin']=jxhost
|
||||
jd=self.post(f"{jxhost}/post.php", headers=headers, data=json.dumps(config))
|
||||
data=json.loads(jd.content.decode('utf-8-sig'))
|
||||
url=data.get('knvod')
|
||||
p = 0
|
||||
if not url:raise Exception('未找到播放地址')
|
||||
except Exception as e:
|
||||
print('错误信息:',e)
|
||||
p,url=1,f"{self.host}{id}"
|
||||
return {"parse": p, "url": url, "header": h}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def getlist(self,data):
|
||||
videos=[]
|
||||
for i in data.items():
|
||||
id = i('a').attr('href')
|
||||
if id:
|
||||
id = re.search(r'\d+', id).group(0)
|
||||
img = i('img').attr('data-src')
|
||||
if img and 'url=' in img and 'http' not in img: img = f'{self.host}{img}'
|
||||
videos.append({
|
||||
'vod_id': id,
|
||||
'vod_name': i('a').attr('title'),
|
||||
'vod_pic': img,
|
||||
'vod_remarks': i('.public-prt').text() or i('.public-list-prb').text()
|
||||
})
|
||||
return videos
|
||||
|
||||
def getpq(self, data):
|
||||
try:
|
||||
return pq(data)
|
||||
except Exception as e:
|
||||
print(f"{str(e)}")
|
||||
return pq(data.encode('utf-8'))
|
||||
|
||||
def gettime(self):
|
||||
current_time = int(time.time())
|
||||
hourly_timestamp = current_time - (current_time % 3600)
|
||||
return hourly_timestamp
|
||||
|
||||
def sha256(self, text):
|
||||
sha = SHA256.new()
|
||||
sha.update(text.encode())
|
||||
return sha.hexdigest()
|
||||
@@ -0,0 +1,151 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import sys
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
ahost='https://api.cenguigui.cn'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||||
'DNT': '1',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'Sec-Fetch-Site': 'cross-site',
|
||||
'Sec-Fetch-Mode': 'no-cors',
|
||||
'Sec-Fetch-Dest': 'video',
|
||||
'Sec-Fetch-Storage-Access': 'active',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {'class': [{'type_id': '推荐榜', 'type_name': '🔥 推荐榜'},
|
||||
{'type_id': '新剧', 'type_name': '🎬 新剧'},
|
||||
{'type_id': '逆袭', 'type_name': '🎬 逆袭'},
|
||||
{'type_id': '霸总', 'type_name': '🎬 霸总'},
|
||||
{'type_id': '现代言情', 'type_name': '🎬 现代言情'},
|
||||
{'type_id': '打脸虐渣', 'type_name': '🎬 打脸虐渣'},
|
||||
{'type_id': '豪门恩怨', 'type_name': '🎬 豪门恩怨'},
|
||||
{'type_id': '神豪', 'type_name': '🎬 神豪'},
|
||||
{'type_id': '马甲', 'type_name': '🎬 马甲'},
|
||||
{'type_id': '都市日常', 'type_name': '🎬 都市日常'},
|
||||
{'type_id': '战神归来', 'type_name': '🎬 战神归来'},
|
||||
{'type_id': '小人物', 'type_name': '🎬 小人物'},
|
||||
{'type_id': '女性成长', 'type_name': '🎬 女性成长'},
|
||||
{'type_id': '大女主', 'type_name': '🎬 大女主'},
|
||||
{'type_id': '穿越', 'type_name': '🎬 穿越'},
|
||||
{'type_id': '都市修仙', 'type_name': '🎬 都市修仙'},
|
||||
{'type_id': '强者回归', 'type_name': '🎬 强者回归'},
|
||||
{'type_id': '亲情', 'type_name': '🎬 亲情'},
|
||||
{'type_id': '古装', 'type_name': '🎬 古装'},
|
||||
{'type_id': '重生', 'type_name': '🎬 重生'},
|
||||
{'type_id': '闪婚', 'type_name': '🎬 闪婚'},
|
||||
{'type_id': '赘婿逆袭', 'type_name': '🎬 赘婿逆袭'},
|
||||
{'type_id': '虐恋', 'type_name': '🎬 虐恋'},
|
||||
{'type_id': '追妻', 'type_name': '🎬 追妻'},
|
||||
{'type_id': '天下无敌', 'type_name': '🎬 天下无敌'},
|
||||
{'type_id': '家庭伦理', 'type_name': '🎬 家庭伦理'},
|
||||
{'type_id': '萌宝', 'type_name': '🎬 萌宝'},
|
||||
{'type_id': '古风权谋', 'type_name': '🎬 古风权谋'},
|
||||
{'type_id': '职场', 'type_name': '🎬 职场'},
|
||||
{'type_id': '奇幻脑洞', 'type_name': '🎬 奇幻脑洞'},
|
||||
{'type_id': '异能', 'type_name': '🎬 异能'},
|
||||
{'type_id': '无敌神医', 'type_name': '🎬 无敌神医'},
|
||||
{'type_id': '古风言情', 'type_name': '🎬 古风言情'},
|
||||
{'type_id': '传承觉醒', 'type_name': '🎬 传承觉醒'},
|
||||
{'type_id': '现言甜宠', 'type_name': '🎬 现言甜宠'},
|
||||
{'type_id': '奇幻爱情', 'type_name': '🎬 奇幻爱情'},
|
||||
{'type_id': '乡村', 'type_name': '🎬 乡村'},
|
||||
{'type_id': '历史古代', 'type_name': '🎬 历史古代'},
|
||||
{'type_id': '王妃', 'type_name': '🎬 王妃'},
|
||||
{'type_id': '高手下山', 'type_name': '🎬 高手下山'},
|
||||
{'type_id': '娱乐圈', 'type_name': '🎬 娱乐圈'},
|
||||
{'type_id': '强强联合', 'type_name': '🎬 强强联合'},
|
||||
{'type_id': '破镜重圆', 'type_name': '🎬 破镜重圆'},
|
||||
{'type_id': '暗恋成真', 'type_name': '🎬 暗恋成真'},
|
||||
{'type_id': '民国', 'type_name': '🎬 民国'},
|
||||
{'type_id': '欢喜冤家', 'type_name': '🎬 欢喜冤家'},
|
||||
{'type_id': '系统', 'type_name': '🎬 系统'},
|
||||
{'type_id': '真假千金', 'type_name': '🎬 真假千金'},
|
||||
{'type_id': '龙王', 'type_name': '🎬 龙王'},
|
||||
{'type_id': '校园', 'type_name': '🎬 校园'},
|
||||
{'type_id': '穿书', 'type_name': '🎬 穿书'},
|
||||
{'type_id': '女帝', 'type_name': '🎬 女帝'},
|
||||
{'type_id': '团宠', 'type_name': '🎬 团宠'},
|
||||
{'type_id': '年代爱情', 'type_name': '🎬 年代爱情'},
|
||||
{'type_id': '玄幻仙侠', 'type_name': '🎬 玄幻仙侠'},
|
||||
{'type_id': '青梅竹马', 'type_name': '🎬 青梅竹马'},
|
||||
{'type_id': '悬疑推理', 'type_name': '🎬 悬疑推理'},
|
||||
{'type_id': '皇后', 'type_name': '🎬 皇后'},
|
||||
{'type_id': '替身', 'type_name': '🎬 替身'},
|
||||
{'type_id': '大叔', 'type_name': '🎬 大叔'},
|
||||
{'type_id': '喜剧', 'type_name': '🎬 喜剧'},
|
||||
{'type_id': '剧情', 'type_name': '🎬 剧情'}]}
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
params = {
|
||||
'classname': tid,
|
||||
'offset': str((int(pg) - 1)),
|
||||
}
|
||||
data = self.fetch(f'{self.ahost}/api/duanju/api.php', params=params, headers=self.headers).json()
|
||||
videos = []
|
||||
for k in data['data']:
|
||||
videos.append({
|
||||
'vod_id': k.get('book_id'),
|
||||
'vod_name': k.get('title'),
|
||||
'vod_pic': k.get('cover'),
|
||||
'vod_year': k.get('score'),
|
||||
'vod_remarks': f"{k.get('sub_title')}|{k.get('episode_cnt')}"
|
||||
})
|
||||
result = {}
|
||||
result['list'] = videos
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
v=self.fetch(f'{self.ahost}/api/duanju/api.php', params={'book_id': ids[0]}, headers=self.headers).json()
|
||||
vod = {
|
||||
'type_name': v.get('category'),
|
||||
'vod_year': v.get('time'),
|
||||
'vod_remarks': v.get('duration'),
|
||||
'vod_content': v.get('desc'),
|
||||
'vod_play_from': '嗷呜爱看短剧',
|
||||
'vod_play_url': '#'.join([f"{i['title']}${i['video_id']}" for i in v['data']])
|
||||
}
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
return self.categoryContent(key, pg, True, {})
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
data=self.fetch(f'{self.ahost}/api/duanju/api.php', params={'video_id': id}, headers=self.headers).json()
|
||||
return {'parse': 0, 'url': data['data']['url'], 'header': self.headers}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
@@ -0,0 +1,127 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import re
|
||||
import sys
|
||||
from pyquery import PyQuery as pq
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host='https://www.hongguodj.cc'
|
||||
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'DNT': '1',
|
||||
'Origin': host,
|
||||
'Pragma': 'no-cache',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'cross-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
classes = []
|
||||
vlist = []
|
||||
data = pq(self.fetch(self.host, headers=self.headers).text)
|
||||
for i in list(data('.slip li').items())[1:]:
|
||||
classes.append({
|
||||
'type_name': i.text(),
|
||||
'type_id': re.findall(r'\d+', i('a').attr('href'))[0]
|
||||
})
|
||||
for i in data('.wrap .rows').items():
|
||||
vlist.extend(self.getlist(i('li')))
|
||||
result['class'] = classes
|
||||
result['list'] = vlist
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
data=pq(self.fetch(f'{self.host}/type/{tid}-{pg}.html', headers=self.headers).text)
|
||||
result = {}
|
||||
result['list'] = self.getlist(data('.list ul li'))
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=pq(self.fetch(f'{self.host}{ids[0]}', headers=self.headers).text)
|
||||
v=data('.info')
|
||||
p=v('p')
|
||||
vod = {
|
||||
'vod_name': v('h1').text(),
|
||||
'type_name': p.eq(2).text(),
|
||||
'vod_year': p.eq(3).text(),
|
||||
'vod_area': p.eq(4).text(),
|
||||
'vod_remarks': v('em').text(),
|
||||
'vod_actor': p.eq(0).text(),
|
||||
'vod_director': p.eq(1).text(),
|
||||
'vod_content': data('#desc .text').text(),
|
||||
'vod_play_from': '',
|
||||
'vod_play_url': ''
|
||||
}
|
||||
names = [i.text() for i in data('.title.slip a').items()]
|
||||
plist=[]
|
||||
for i in data('.play-list ul').items():
|
||||
plist.append('#'.join([f'{j("a").text()}${j("a").attr("href")}' for j in i('li').items()]))
|
||||
vod['vod_play_from'] = '$$$'.join(names)
|
||||
vod['vod_play_url'] = '$$$'.join(plist)
|
||||
return {'list': [vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=pq(self.fetch(f'{self.host}/search/{key}----------{pg}---.html', headers=self.headers).text)
|
||||
return {'list': self.getlist(data('.show.rows li')),'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
p=0
|
||||
uid=f'{self.host}{id}'
|
||||
data=pq(self.fetch(uid, headers=self.headers).text)
|
||||
url=data('.video.ratio').attr('data-play')
|
||||
if not url:
|
||||
url = uid
|
||||
p = 1
|
||||
return {'parse': p, 'url': url, 'header': self.headers}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def getlist(self,data):
|
||||
vlist = []
|
||||
for j in data.items():
|
||||
vlist.append({
|
||||
'vod_id': j('a').attr('href'),
|
||||
'vod_name': j('img').attr('alt'),
|
||||
'vod_pic': self.host + j('img').attr('data-src'),
|
||||
'vod_year': j('.bg').text(),
|
||||
'vod_remarks': j('p').text()
|
||||
})
|
||||
return vlist
|
||||
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import base64
|
||||
import re
|
||||
import sys
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util.Padding import unpad
|
||||
from pyquery import PyQuery as pq
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host = 'https://www.jdys.art'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||||
'dnt': '1',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'origin': host,
|
||||
'sec-fetch-site': 'cross-site',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'referer': f'{host}/',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'priority': 'u=1, i',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
data = self.getpq(self.fetch(self.host, headers=self.headers).text)
|
||||
result = {}
|
||||
classes = []
|
||||
for k in list(data('.navtop .navlist li').items())[:9]:
|
||||
classes.append({
|
||||
'type_name': k('a').text(),
|
||||
'type_id': k('a').attr('href'),
|
||||
})
|
||||
result['class'] = classes
|
||||
result['list'] = self.getlist(data('.mi_btcon .bt_img ul li'))
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
data = self.getpq(self.fetch(f"{tid}{'' if pg == '1' else f'page/{pg}/'}", headers=self.headers).text)
|
||||
result = {}
|
||||
result['list'] = self.getlist(data('.mi_cont .bt_img ul li'))
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data = self.getpq(self.fetch(ids[0], headers=self.headers).text)
|
||||
data2 = data('.moviedteail_list li')
|
||||
vod = {
|
||||
'vod_name': data('.dytext h1').text(),
|
||||
'type_name': data2.eq(0).text(),
|
||||
'vod_year': data2.eq(2).text(),
|
||||
'vod_area': data2.eq(1).text(),
|
||||
'vod_remarks': data2.eq(4).text(),
|
||||
'vod_actor': data2.eq(7).text(),
|
||||
'vod_director': data2.eq(5).text(),
|
||||
'vod_content': data('.yp_context').text().strip()
|
||||
}
|
||||
vdata = data('.paly_list_btn a')
|
||||
play = []
|
||||
for i in vdata.items():
|
||||
a = i.text() + "$" + i.attr.href
|
||||
play.append(a)
|
||||
vod["vod_play_from"] = "在线播放"
|
||||
vod["vod_play_url"] = "#".join(play)
|
||||
result = {"list": [vod]}
|
||||
return result
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data = self.getpq(self.fetch(f"{self.host}/page/{pg}/?s={key}", headers=self.headers).text)
|
||||
return {'list': self.getlist(data('.mi_cont .bt_img ul li')), 'page': pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
data = self.getpq(self.fetch(id, headers=self.headers).text)
|
||||
try:
|
||||
sc = data('.videoplay script').eq(-1).text()
|
||||
strd = re.findall(r'var\s+[^=]*=\s*"([^"]*)";', sc)
|
||||
kdata = re.findall(r'parse\((.*?)\);', sc)
|
||||
jm = self.aes(strd[0], kdata[0].replace('"', ''), kdata[1].replace('"', ''))
|
||||
url = re.search(r'url: "(.*?)"', jm).group(1)
|
||||
p = 0
|
||||
except:
|
||||
p = 1
|
||||
url = id
|
||||
result = {}
|
||||
result["parse"] = p
|
||||
result["url"] = url
|
||||
result["header"] = self.headers
|
||||
return result
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def getpq(self, text):
|
||||
try:
|
||||
return pq(text)
|
||||
except Exception as e:
|
||||
print(f"{str(e)}")
|
||||
return pq(text.encode('utf-8'))
|
||||
|
||||
def getlist(self, data):
|
||||
videos = []
|
||||
for i in data.items():
|
||||
videos.append({
|
||||
'vod_id': i('a').attr('href'),
|
||||
'vod_name': i('a img').attr('alt'),
|
||||
'vod_pic': i('a img').attr('src'),
|
||||
'vod_remarks': i('.dycategory').text(),
|
||||
'vod_year': i('.dyplayinfo').text() or i('.rating').text(),
|
||||
})
|
||||
return videos
|
||||
|
||||
def aes(self, word, key, iv):
|
||||
key = key.encode('utf-8')
|
||||
iv = iv.encode('utf-8')
|
||||
encrypted_data = base64.b64decode(word)
|
||||
cipher = AES.new(key, AES.MODE_CBC, iv)
|
||||
decrypted_data = cipher.decrypt(encrypted_data)
|
||||
decrypted_data = unpad(decrypted_data, AES.block_size)
|
||||
return decrypted_data.decode('utf-8')
|
||||
@@ -0,0 +1,225 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
import time
|
||||
from Crypto.Hash import MD5, SHA1
|
||||
|
||||
class Spider(Spider):
|
||||
'''
|
||||
配置示例:
|
||||
{
|
||||
"key": "xxxx",
|
||||
"name": "xxxx",
|
||||
"type": 3,
|
||||
"api": ".所在路径/金牌.py",
|
||||
"searchable": 1,
|
||||
"quickSearch": 1,
|
||||
"filterable": 1,
|
||||
"changeable": 1,
|
||||
"ext": {
|
||||
"site": "https://www.jiabaide.cn,域名2,域名3"
|
||||
}
|
||||
},
|
||||
'''
|
||||
def init(self, extend=""):
|
||||
if extend:
|
||||
hosts=json.loads(extend)['site']
|
||||
self.host = self.host_late(hosts)
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
def homeContent(self, filter):
|
||||
cdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/get/filer/type", headers=self.getheaders()).json()
|
||||
fdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/get/filer/list", headers=self.getheaders()).json()
|
||||
result = {}
|
||||
classes = []
|
||||
filters={}
|
||||
for k in cdata['data']:
|
||||
classes.append({
|
||||
'type_name': k['typeName'],
|
||||
'type_id': str(k['typeId']),
|
||||
})
|
||||
sort_values = [{"n": "最近更新", "v": "2"},{"n": "人气高低", "v": "3"}, {"n": "评分高低", "v": "4"}]
|
||||
for tid, d in fdata['data'].items():
|
||||
current_sort_values = sort_values.copy()
|
||||
if tid == '1':
|
||||
del current_sort_values[0]
|
||||
filters[tid] = [
|
||||
{"key": "type", "name": "类型",
|
||||
"value": [{"n": i["itemText"], "v": i["itemValue"]} for i in d["typeList"]]},
|
||||
|
||||
*([] if not d["plotList"] else [{"key": "v_class", "name": "剧情",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]}
|
||||
for i in d["plotList"]]}]),
|
||||
|
||||
{"key": "area", "name": "地区",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["districtList"]]},
|
||||
|
||||
{"key": "year", "name": "年份",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["yearList"]]},
|
||||
|
||||
{"key": "lang", "name": "语言",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["languageList"]]},
|
||||
|
||||
{"key": "sort", "name": "排序", "value": current_sort_values}
|
||||
]
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data1 = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/home/all/list", headers=self.getheaders()).json()
|
||||
data2=self.fetch(f"{self.host}/api/mw-movie/anonymous/home/hotSearch",headers=self.getheaders()).json()
|
||||
data=[]
|
||||
for i in data1['data'].values():
|
||||
data.extend(i['list'])
|
||||
data.extend(data2['data'])
|
||||
vods=self.getvod(data)
|
||||
return {'list':vods}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
|
||||
params = {
|
||||
"area": extend.get('area', ''),
|
||||
"filterStatus": "1",
|
||||
"lang": extend.get('lang', ''),
|
||||
"pageNum": pg,
|
||||
"pageSize": "30",
|
||||
"sort": extend.get('sort', '1'),
|
||||
"sortBy": "1",
|
||||
"type": extend.get('type', ''),
|
||||
"type1": tid,
|
||||
"v_class": extend.get('v_class', ''),
|
||||
"year": extend.get('year', '')
|
||||
}
|
||||
data = self.fetch(f"{self.host}/api/mw-movie/anonymous/video/list?{self.js(params)}", headers=self.getheaders(params)).json()
|
||||
result = {}
|
||||
result['list'] = self.getvod(data['data']['list'])
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/detail?id={ids[0]}",headers=self.getheaders({'id':ids[0]})).json()
|
||||
vod=self.getvod([data['data']])[0]
|
||||
vod['vod_play_from']='嗷呜有金牌'
|
||||
vod['vod_play_url'] = '#'.join(
|
||||
f"{i['name'] if len(vod['episodelist']) > 1 else vod['vod_name']}${ids[0]}@@{i['nid']}" for i in
|
||||
vod['episodelist'])
|
||||
vod.pop('episodelist', None)
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
params = {
|
||||
"keyword": key,
|
||||
"pageNum": pg,
|
||||
"pageSize": "8",
|
||||
"sourceCode": "1"
|
||||
}
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/searchByWord?{self.js(params)}",headers=self.getheaders(params)).json()
|
||||
vods=self.getvod(data['data']['result']['list'])
|
||||
return {'list':vods,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
self.header = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'DNT': '1',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'Origin': self.host,
|
||||
'Referer': f'{self.host}/'
|
||||
}
|
||||
ids=id.split('@@')
|
||||
pdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v2/video/episode/url?clientType=1&id={ids[0]}&nid={ids[1]}",headers=self.getheaders({'clientType':'1','id': ids[0], 'nid': ids[1]})).json()
|
||||
vlist=[]
|
||||
for i in pdata['data']['list']:vlist.extend([i['resolutionName'],i['url']])
|
||||
return {'parse':0,'url':vlist,'header':self.header}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def host_late(self, url_list):
|
||||
if isinstance(url_list, str):
|
||||
urls = [u.strip() for u in url_list.split(',')]
|
||||
else:
|
||||
urls = url_list
|
||||
if len(urls) <= 1:
|
||||
return urls[0] if urls else ''
|
||||
|
||||
results = {}
|
||||
threads = []
|
||||
|
||||
def test_host(url):
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.head(url, timeout=1.0, allow_redirects=False)
|
||||
delay = (time.time() - start_time) * 1000
|
||||
results[url] = delay
|
||||
except Exception as e:
|
||||
results[url] = float('inf')
|
||||
for url in urls:
|
||||
t = threading.Thread(target=test_host, args=(url,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
return min(results.items(), key=lambda x: x[1])[0]
|
||||
|
||||
def md5(self, sign_key):
|
||||
md5_hash = MD5.new()
|
||||
md5_hash.update(sign_key.encode('utf-8'))
|
||||
md5_result = md5_hash.hexdigest()
|
||||
return md5_result
|
||||
|
||||
def js(self, param):
|
||||
return '&'.join(f"{k}={v}" for k, v in param.items())
|
||||
|
||||
def getheaders(self, param=None):
|
||||
if param is None:param = {}
|
||||
t=str(int(time.time()*1000))
|
||||
param['key']='cb808529bae6b6be45ecfab29a4889bc'
|
||||
param['t']=t
|
||||
sha1_hash = SHA1.new()
|
||||
sha1_hash.update(self.md5(self.js(param)).encode('utf-8'))
|
||||
sign = sha1_hash.hexdigest()
|
||||
deviceid = str(uuid.uuid4())
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'sign': sign,
|
||||
't': t,
|
||||
'deviceid':deviceid
|
||||
}
|
||||
return headers
|
||||
|
||||
def convert_field_name(self, field):
|
||||
field = field.lower()
|
||||
if field.startswith('vod') and len(field) > 3:
|
||||
field = field.replace('vod', 'vod_')
|
||||
if field.startswith('type') and len(field) > 4:
|
||||
field = field.replace('type', 'type_')
|
||||
return field
|
||||
|
||||
def getvod(self, array):
|
||||
return [{self.convert_field_name(k): v for k, v in item.items()} for item in array]
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
# coding=utf-8
|
||||
# !/usr/bin/python
|
||||
import sys
|
||||
import requests
|
||||
import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import base64
|
||||
from base.spider import Spider
|
||||
import json
|
||||
|
||||
sys.path.append('..')
|
||||
xurl = "http://xjj2.716888.xyz"
|
||||
headerx = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36',
|
||||
'Cookie':'mk_encrypt_c21f969b5f03d33d43e04f8f136e7682=390e11f0d5ae13b2787e6a72db11527f'
|
||||
}
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
global xurl
|
||||
global headerx
|
||||
|
||||
def getName(self):
|
||||
return "首页"
|
||||
|
||||
def init(self, extend):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def homeContent(self, filter):
|
||||
pass
|
||||
|
||||
def homeVideoContent(self):
|
||||
id = ['4k/4k.php', 'djxjj/dj1.php', 'zj/jipinyz/jipinyz.php', 'zj/xuejie/xuejie.php', 'zj/kawayi/kawayi.php',
|
||||
'zj/nennen/nennen.php', 'zj/heji1/heji1.php', 'zj/sihuawd/sihuawd.php', 'zj/wanmeisc/wanmeisc.php',
|
||||
'zj/manyao/manyao.php', 'zj/sihuadd/sihuadd.php', 'zj/qingchun/qingchun.php', 'zj/cos/cos.php',
|
||||
'zj/jingpinbz/jingpinbz.php', 'zj/jipinll/jipinll.php', 'zj/nideym/nideym.php', 'zj/tianmei/tianmei.php',
|
||||
'zj/yusi/yusi.php', 'zj/shuaige/shuaige.php', 'zj/rewu/rewu.php', 'zj/jingpinsc/jingpinsc.php']
|
||||
name = ['随机', 'DJ姐姐', '极品钰足', '学姐系列', '卡哇伊', '嫩嫩系列', '美女舞蹈', '丝滑舞蹈', '完美身材',
|
||||
'慢摇系列', '丝滑吊带', '清纯系列', 'COS系列', '精品变装', '极品罗丽', '你的裕梦', '甜妹系列',
|
||||
'御丝系列', '帅哥哥', '热舞系列', '精品收藏']
|
||||
pic = ['https://img0.baidu.com/it/u=2236794495,926227820&fm=253&fmt=auto&app=138&f=JPEG?w=1091&h=500',
|
||||
'https://pic.rmb.bdstatic.com/mvideo/e17d86ce4489a02870ace9a25a804c3e',
|
||||
'https://img1.baidu.com/it/u=4087009209,613234683&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=364',
|
||||
'https://img1.baidu.com/it/u=2347706654,3055017263&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||
'https://img2.baidu.com/it/u=3715511725,1094436549&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=1083',
|
||||
'https://img2.baidu.com/it/u=2560410906,3760952489&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||
'https://img0.baidu.com/it/u=4119328645,2294770712&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||
'https://img1.baidu.com/it/u=3167365498,4156845177&fm=253&fmt=auto&app=120&f=JPEG?w=355&h=631',
|
||||
'https://img2.baidu.com/it/u=2214691242,2295609938&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=973',
|
||||
'https://img1.baidu.com/it/u=3930123826,1131807820&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500',
|
||||
'https://img2.baidu.com/it/u=3998619741,1128428746&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=594',
|
||||
'https://img2.baidu.com/it/u=1507871502,2316279678&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=768',
|
||||
'https://img0.baidu.com/it/u=2245878765,4037513957&fm=253&fmt=auto&app=138&f=JPEG?w=617&h=411',
|
||||
'https://img1.baidu.com/it/u=3623293272,829752126&fm=253&fmt=auto&app=138&f=JPEG?w=285&h=285',
|
||||
'https://img2.baidu.com/it/u=1922261112,3647796435&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=542',
|
||||
'https://img1.baidu.com/it/u=3970043028,2042301564&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=889',
|
||||
'https://img2.baidu.com/it/u=3229384329,3046902124&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=800',
|
||||
'https://img1.baidu.com/it/u=3113661564,2558849413&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=500',
|
||||
'https://img1.baidu.com/it/u=2361496550,3302335162&fm=253&fmt=auto&app=138&f=JPEG?w=333&h=500',
|
||||
'https://img1.baidu.com/it/u=270105183,1595166255&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=500',
|
||||
'https://img1.baidu.com/it/u=4071105902,825241031&fm=253&fmt=auto&app=138&f=JPEG?w=235&h=340']
|
||||
list_length = len(id)
|
||||
videos = []
|
||||
for i in range(list_length):
|
||||
print(id[i])
|
||||
video = {
|
||||
"vod_id": id[i],
|
||||
"vod_name": name[i],
|
||||
"vod_pic": pic[i],
|
||||
"vod_remarks": '播放20个',
|
||||
}
|
||||
videos.append(video)
|
||||
|
||||
result = {'list': videos}
|
||||
|
||||
return result
|
||||
|
||||
def categoryContent(self, cid, pg, filter, ext):
|
||||
pass
|
||||
|
||||
def detailContent(self, ids):
|
||||
videos = []
|
||||
result = {}
|
||||
did = ids[0]
|
||||
for i in range(1, 21):
|
||||
playurl = ""
|
||||
for j in range(1, i + 1):
|
||||
playurl += f"{j}$/fenlei/{did}#"
|
||||
playurl = playurl[:-1]
|
||||
|
||||
videos.append({
|
||||
"vod_id": '',
|
||||
"vod_name": '',
|
||||
"vod_pic": "",
|
||||
"type_name": '',
|
||||
"vod_year": "",
|
||||
"vod_area": "",
|
||||
"vod_remarks": "",
|
||||
"vod_actor": "",
|
||||
"vod_director": "",
|
||||
"vod_content": "",
|
||||
"vod_play_from": "GK推荐",
|
||||
"vod_play_url": playurl
|
||||
})
|
||||
|
||||
result['list'] = videos
|
||||
return result
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
result = {}
|
||||
response = requests.get(url=xurl + id, headers=headerx, allow_redirects=False)
|
||||
|
||||
location_header = response.headers.get('Location')
|
||||
if 'http' in location_header:
|
||||
purl = location_header
|
||||
else:
|
||||
purl = 'http:' + location_header
|
||||
result["parse"] = 0
|
||||
result["playUrl"] = ''
|
||||
result["url"] = purl
|
||||
result["header"] = headerx
|
||||
return result
|
||||
|
||||
def searchContentPage(self, key, quick, page):
|
||||
pass
|
||||
|
||||
def searchContent(self, key, quick):
|
||||
return self.searchContentPage(key, quick, '1')
|
||||
|
||||
def localProxy(self, params):
|
||||
if params['type'] == "m3u8":
|
||||
return self.proxyM3u8(params)
|
||||
elif params['type'] == "media":
|
||||
return self.proxyMedia(params)
|
||||
elif params['type'] == "ts":
|
||||
return self.proxyTs(params)
|
||||
return None
|
||||
@@ -0,0 +1,218 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import re
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
from pyquery import PyQuery as pq
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
self.host=self.gethost()
|
||||
self.headers.update({'referer': f'{self.host}/'})
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="130", "Google Chrome";v="130"',
|
||||
'sec-ch-ua-platform': '"Android"',
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
data=self.getpq()
|
||||
result = {}
|
||||
classes = []
|
||||
filters = {"1": {"name": "类型","key": "tid","value": [{"n": "喜剧","v": 6},{"n": "爱情","v": 7},{"n": "恐怖","v": 8},{"n": "动作","v": 9},{"n": "科幻","v": 10},{"n": "战争","v": 11},{"n": "犯罪","v": 12},{"n": "动画","v": 13},{"n": "奇幻","v": 14},{"n": "剧情","v": 15},{"n": "冒险","v": 16},{"n": "悬疑","v": 17},{"n": "惊悚","v": 18},{"n": "其它","v": 19}]},"2": {"name": "类型","key": "tid","value": [{"n": "大陆剧","v": 20},{"n": "港剧","v": 21},{"n": "韩剧","v": 22},{"n": "美剧","v": 23},{"n": "日剧","v": 24},{"n": "英剧","v": 25},{"n": "台剧","v": 26},{"n": "其它","v": 27}]}}
|
||||
for k in data('.top_bar.clearfix a').items():
|
||||
j = k.attr('href')
|
||||
if j and 'list' in j:
|
||||
id = re.search(r'\d+', j).group(0)
|
||||
classes.append({
|
||||
'type_name': k.text(),
|
||||
'type_id': id
|
||||
})
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
result['list'] = self.getlist(data('.grid_box ul li'))
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
pass
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
data=self.getpq(f"/list/{extend.get('tid',tid)}-{pg}.html")
|
||||
result = {}
|
||||
result['list'] = self.getlist(data('.grid_box ul li'))
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=self.getpq(ids[0])
|
||||
vod = {
|
||||
'vod_remarks': data('.grid_box.v_info_box p').text(),
|
||||
'vod_content': data('.p_txt.show_part').text().split('\n')[0],
|
||||
}
|
||||
n=list(data('.play_from ul li').items())
|
||||
p=list(data('ul.play_list li').items())
|
||||
ns,ps=[],[]
|
||||
for i,j in enumerate(n):
|
||||
ns.append(j.text())
|
||||
ps.append('#'.join([f"{k.text()}${k.attr('href')}" for k in list(p[i]('a').items())[::-1]]))
|
||||
vod['vod_play_from']='$$$'.join(ns)
|
||||
vod['vod_play_url']='$$$'.join(ps)
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
pass
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
data=self.getpq(id)
|
||||
try:
|
||||
surl=data('section[style*="padding-top"] iframe').eq(0).attr('src')
|
||||
sd=pq(self.fetch(surl,headers=self.headers).text)('body script').html()
|
||||
jdata=self.extract_values(sd)
|
||||
jdata['key']=self.hhh(jdata['key'])
|
||||
parsed_url = urlparse(surl)
|
||||
durl = parsed_url.scheme + "://" + parsed_url.netloc
|
||||
headers = {
|
||||
'accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'dnt': '1',
|
||||
'origin': durl,
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': f'{surl}',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="130", "Google Chrome";v="130"',
|
||||
'sec-ch-ua-mobile': '?1',
|
||||
'sec-ch-ua-platform': '"Android"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-storage-access': 'active',
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
}
|
||||
jjb=self.post(f"{durl}/api.php",headers=headers,data=jdata).json()
|
||||
url,p=jjb['url'],0
|
||||
except Exception as e:
|
||||
self.log(f"失败: {e}")
|
||||
url,p=f'{self.host}{id}',1
|
||||
phd={
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"Android"',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="130", "Google Chrome";v="130"',
|
||||
'sec-fetch-dest': 'video',
|
||||
'referer': f'{self.host}/',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
}
|
||||
return {'parse': p, 'url': url, 'header': phd}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def liveContent(self, url):
|
||||
pass
|
||||
|
||||
def gethost(self):
|
||||
data=pq(self.fetch("http://shapp.us",headers=self.headers).text)
|
||||
for i in data('.content-top ul li').items():
|
||||
h=i('a').attr('href')
|
||||
if h:
|
||||
data = self.fetch(h, headers=self.headers, timeout=5)
|
||||
if data.status_code == 200:
|
||||
return h
|
||||
|
||||
def extract_values(self, text):
|
||||
url_match = re.search(r'var url = "([^"]+)"', text)
|
||||
url = url_match.group(1) if url_match else None
|
||||
t_match = re.search(r'var t = "([^"]+)"', text)
|
||||
t = t_match.group(1) if t_match else None
|
||||
key_match = re.search(r'var key = hhh\("([^"]+)"\)', text)
|
||||
key_param = key_match.group(1) if key_match else None
|
||||
act_match = re.search(r'var act = "([^"]+)"', text)
|
||||
act = act_match.group(1) if act_match else None
|
||||
play_match = re.search(r'var play = "([^"]+)"', text)
|
||||
play = play_match.group(1) if play_match else None
|
||||
return {
|
||||
"url": url,
|
||||
"t": t,
|
||||
"key": key_param,
|
||||
"act": act,
|
||||
"play": play
|
||||
}
|
||||
|
||||
def getlist(self,data):
|
||||
videos = []
|
||||
for i in data.items():
|
||||
videos.append({
|
||||
'vod_id': i('a').attr('href'),
|
||||
'vod_name': i('a').attr('title'),
|
||||
'vod_pic': i('a img').attr('data-original'),
|
||||
'vod_remarks': i('.v_note').text()
|
||||
})
|
||||
return videos
|
||||
|
||||
def getpq(self, path=''):
|
||||
data=self.fetch(f"{self.host}{path}",headers=self.headers).text
|
||||
try:
|
||||
return pq(data)
|
||||
except Exception as e:
|
||||
print(f"{str(e)}")
|
||||
return pq(data.encode('utf-8'))
|
||||
|
||||
def hhh(self, t):
|
||||
ee = {
|
||||
"0Oo0o0O0": "a", "1O0bO001": "b", "2OoCcO2": "c", "3O0dO0O3": "d",
|
||||
"4OoEeO4": "e", "5O0fO0O5": "f", "6OoGgO6": "g", "7O0hO0O7": "h",
|
||||
"8OoIiO8": "i", "9O0jO0O9": "j", "0OoKkO0": "k", "1O0lO0O1": "l",
|
||||
"2OoMmO2": "m", "3O0nO0O3": "n", "4OoOoO4": "o", "5O0pO0O5": "p",
|
||||
"6OoQqO6": "q", "7O0rO0O7": "r", "8OoSsO8": "s", "9O0tO0O9": "t",
|
||||
"0OoUuO0": "u", "1O0vO0O1": "v", "2OoWwO2": "w", "3O0xO0O3": "x",
|
||||
"4OoYyO4": "y", "5O0zO0O5": "z", "0OoAAO0": "A", "1O0BBO1": "B",
|
||||
"2OoCCO2": "C", "3O0DDO3": "D", "4OoEEO4": "E", "5O0FFO5": "F",
|
||||
"6OoGGO6": "G", "7O0HHO7": "H", "8OoIIO8": "I", "9O0JJO9": "J",
|
||||
"0OoKKO0": "K", "1O0LLO1": "L", "2OoMMO2": "M", "3O0NNO3": "N",
|
||||
"4OoOOO4": "O", "5O0PPO5": "P", "6OoQQO6": "Q", "7O0RRO7": "R",
|
||||
"8OoSSO8": "S", "9O0TTO9": "T", "0OoUO0": "U", "1O0VVO1": "V",
|
||||
"2OoWWO2": "W", "3O0XXO3": "X", "4OoYYO4": "Y", "5O0ZZO5": "Z"
|
||||
}
|
||||
n = ""
|
||||
o = base64.b64decode(t).decode('utf-8', errors='replace')
|
||||
i = 0
|
||||
while i < len(o):
|
||||
l = o[i]
|
||||
found = False
|
||||
for key, value in ee.items():
|
||||
if o[i:i + len(key)] == key:
|
||||
l = value
|
||||
i += len(key) - 1
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
pass
|
||||
n += l
|
||||
i += 1
|
||||
return n
|
||||
@@ -0,0 +1,301 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import quote
|
||||
from Crypto.Hash import MD5
|
||||
import requests
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(self.headers)
|
||||
self.session.cookies.update(self.cookie)
|
||||
self.get_ctoken()
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host='https://www.youku.com'
|
||||
|
||||
shost='https://search.youku.com'
|
||||
|
||||
h5host='https://acs.youku.com'
|
||||
|
||||
ihost='https://v.youku.com'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)',
|
||||
'Referer': f'{host}/'
|
||||
}
|
||||
|
||||
cookie={
|
||||
"__ysuid": "17416134165380iB",
|
||||
"__aysid": "1741613416541WbD",
|
||||
"xlly_s": "1",
|
||||
"isI18n": "false",
|
||||
"cna": "bNdVIKmmsHgCAXW9W6yrQ1/s",
|
||||
"__ayft": "1741672162330",
|
||||
"__arpvid": "1741672162331FBKgrn-1741672162342",
|
||||
"__ayscnt": "1",
|
||||
"__aypstp": "1",
|
||||
"__ayspstp": "3",
|
||||
"tfstk": "gZbiib4JpG-6DqW-B98_2rwPuFrd1fTXQt3vHEp4YpJIBA3OgrWcwOi90RTOo9XVQ5tAM5NcK_CP6Ep97K2ce1XDc59v3KXAgGFLyzC11ET2n8U8yoyib67M3xL25e8gS8pbyzC1_ET4e8URWTsSnHv2uh8VTeJBgEuN3d-ELQAWuKWV36PHGpJ2uEWVTxvicLX1ewyUXYSekxMf-CxMEqpnoqVvshvP_pABOwvXjL5wKqeulm52np_zpkfCDGW9Ot4uKFIRwZtP7vP9_gfAr3KEpDWXSIfWRay-DHIc_Z-hAzkD1i5Ooi5LZ0O5YO_1mUc476YMI3R6xzucUnRlNe_zemKdm172xMwr2L7CTgIkbvndhFAVh3_YFV9Ng__52U4SQKIdZZjc4diE4EUxlFrfKmiXbBOHeP72v7sAahuTtWm78hRB1yV3tmg9bBOEhWVnq5KwOBL5."
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
categories = ["电视剧", "电影", "综艺", "动漫", "少儿", "纪录片", "文化", "亲子", "教育", "搞笑", "生活",
|
||||
"体育", "音乐", "游戏"]
|
||||
classes = [{'type_name': category, 'type_id': category} for category in categories]
|
||||
filters = {}
|
||||
self.typeid = {}
|
||||
with ThreadPoolExecutor(max_workers=len(categories)) as executor:
|
||||
tasks = {
|
||||
executor.submit(self.cf, {'type': category}, True): category
|
||||
for category in categories
|
||||
}
|
||||
|
||||
for future in as_completed(tasks):
|
||||
try:
|
||||
category = tasks[future]
|
||||
session, ft = future.result()
|
||||
filters[category] = ft
|
||||
self.typeid[category] = session
|
||||
except Exception as e:
|
||||
print(f"处理分类 {tasks[future]} 时出错: {str(e)}")
|
||||
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
try:
|
||||
vlist = []
|
||||
params={"ms_codes":"2019061000","params":"{\"debug\":0,\"gray\":0,\"pageNo\":1,\"utdid\":\"ZYmGMAAAACkDAMU8hbiMmYdd\",\"userId\":\"\",\"bizKey\":\"YOUKU_WEB\",\"appPackageKey\":\"com.youku.YouKu\",\"showNodeList\":0,\"reqSubNode\":0,\"nodeKey\":\"WEBHOME\",\"bizContext\":\"{\\\"spmA\\\":\\\"a2hja\\\"}\"}","system_info":"{\"device\":\"pcweb\",\"os\":\"pcweb\",\"ver\":\"1.0.0.0\",\"userAgent\":\"Mozilla/5.0 (; Windows 10.0.26100.3194_64 ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Electron/14.2.0 Safari/537.36 Node/14.17.0 YoukuDesktop/9.2.60 UOSYouku (2.0.1)-Electron(UTDID ZYmGMAAAACkDAMU8hbiMmYdd;CHANNEL official;ZREAL 0;BTYPE TM2013;BRAND TIMI;BUILDVER 9.2.60.1001)\",\"guid\":\"1590141704165YXe\",\"appPackageKey\":\"com.youku.pcweb\",\"young\":0,\"brand\":\"\",\"network\":\"\",\"ouid\":\"\",\"idfa\":\"\",\"scale\":\"\",\"operator\":\"\",\"resolution\":\"\",\"pid\":\"\",\"childGender\":0,\"zx\":0}"}
|
||||
data=self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.home.query/1.0/',params)
|
||||
okey=list(data['data'].keys())[0]
|
||||
for i in data['data'][okey]['data']['nodes'][0]['nodes'][-1]['nodes'][0]['nodes']:
|
||||
if i.get('nodes') and i['nodes'][0].get('data'):
|
||||
i=i['nodes'][0]['data']
|
||||
if i.get('assignId'):
|
||||
vlist.append({
|
||||
'vod_id': i['assignId'],
|
||||
'vod_name': i.get('title'),
|
||||
'vod_pic': i.get('vImg') or i.get('img'),
|
||||
'vod_year': i.get('mark',{}).get('data',{}).get('text'),
|
||||
'vod_remarks': i.get('summary')
|
||||
})
|
||||
return {'list': vlist}
|
||||
except Exception as e:
|
||||
print(f"处理主页视频数据时出错: {str(e)}")
|
||||
return {'list': []}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
result = {}
|
||||
vlist = []
|
||||
result['page'] = pg
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
pagecount = 9999
|
||||
params = {'type': tid}
|
||||
id = self.typeid[tid]
|
||||
params.update(extend)
|
||||
if pg == '1':
|
||||
id=self.cf(params)
|
||||
data=self.session.get(f'{self.host}/category/data?session={id}¶ms={quote(json.dumps(params))}&pageNo={pg}').json()
|
||||
try:
|
||||
data=data['data']['filterData']
|
||||
for i in data['listData']:
|
||||
if i.get('videoLink') and 's=' in i['videoLink']:
|
||||
vlist.append({
|
||||
'vod_id': i.get('videoLink').split('s=')[-1],
|
||||
'vod_name': i.get('title'),
|
||||
'vod_pic': i.get('img'),
|
||||
'vod_year': i.get('rightTagText'),
|
||||
'vod_remarks': i.get('summary')
|
||||
})
|
||||
self.typeid[tid]=quote(json.dumps(data['session']))
|
||||
except:
|
||||
pagecount=pg
|
||||
result['list'] = vlist
|
||||
result['pagecount'] = pagecount
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
try:
|
||||
data=self.session.get(f'{self.ihost}/v_getvideo_info/?showId={ids[0]}').json()
|
||||
v=data['data']
|
||||
vod = {
|
||||
'type_name': v.get('showVideotype'),
|
||||
'vod_year': v.get('lastUpdate'),
|
||||
'vod_remarks': v.get('rc_title'),
|
||||
'vod_actor': v.get('_personNameStr'),
|
||||
'vod_content': v.get('showdesc'),
|
||||
'vod_play_from': '优酷',
|
||||
'vod_play_url': ''
|
||||
}
|
||||
params={"biz":"new_detail_web2","videoId":v.get('vid'),"scene":"web_page","componentVersion":"3","ip":data.get('ip'),"debug":0,"utdid":"ZYmGMAAAACkDAMU8hbiMmYdd","userId":0,"platform":"pc","nextSession":"","gray":0,"source":"pcNoPrev","showId":ids[0]}
|
||||
sdata,index=self.getinfo(params)
|
||||
pdata=sdata['nodes']
|
||||
if index > len(pdata):
|
||||
batch_size = len(pdata)
|
||||
total_batches = ((index + batch_size - 1) // batch_size) - 1
|
||||
ssj = json.loads(sdata['data']['session'])
|
||||
with ThreadPoolExecutor(max_workers=total_batches) as executor:
|
||||
futures = []
|
||||
for batch in range(total_batches):
|
||||
start = batch_size + 1 + (batch * batch_size)
|
||||
end = start + batch_size - 1
|
||||
next_session = ssj.copy()
|
||||
next_session.update({
|
||||
"itemStartStage": start,
|
||||
"itemEndStage": min(end, index)
|
||||
})
|
||||
current_params = params.copy()
|
||||
current_params['nextSession'] = json.dumps(next_session)
|
||||
futures.append((start, executor.submit(self.getvinfo, current_params)))
|
||||
futures.sort(key=lambda x: x[0])
|
||||
|
||||
for _, future in futures:
|
||||
try:
|
||||
result = future.result()
|
||||
pdata.extend(result['nodes'])
|
||||
except Exception as e:
|
||||
print(f"Error fetching data: {str(e)}")
|
||||
vod['vod_play_url'] = '#'.join([f"{i['data'].get('title')}${i['data']['action'].get('value')}" for i in pdata])
|
||||
return {'list': [vod]}
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': f'呜呜呜${self.host}'}]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=self.session.get(f'{self.shost}/api/search?pg={pg}&keyword={key}').json()
|
||||
vlist = []
|
||||
for i in data['pageComponentList']:
|
||||
if i.get('commonData') and (i['commonData'].get('showId') or i['commonData'].get('realShowId')):
|
||||
i=i['commonData']
|
||||
vlist.append({
|
||||
'vod_id': i.get('showId') or i.get('realShowId'),
|
||||
'vod_name': i['titleDTO'].get('displayName'),
|
||||
'vod_pic': i['posterDTO'].get('vThumbUrl'),
|
||||
'vod_year': i.get('feature'),
|
||||
'vod_remarks': i.get('updateNotice')
|
||||
})
|
||||
return {'list': vlist, 'page': pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
return {'jx':1,'parse': 1, 'url': f"{self.ihost}/video?vid={id}", 'header': ''}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def cf(self,params,b=False):
|
||||
response = self.session.get(f'{self.host}/category/data?params={quote(json.dumps(params))}&optionRefresh=1&pageNo=1').json()
|
||||
data=response['data']['filterData']
|
||||
session=quote(json.dumps(data['session']))
|
||||
if b:
|
||||
return session,self.get_filter_data(data['filter']['filterData'][1:])
|
||||
return session
|
||||
|
||||
def process_key(self, key):
|
||||
if '_' not in key:
|
||||
return key
|
||||
parts = key.split('_')
|
||||
result = parts[0]
|
||||
for part in parts[1:]:
|
||||
if part:
|
||||
result += part[0].upper() + part[1:]
|
||||
return result
|
||||
|
||||
def get_filter_data(self, data):
|
||||
result = []
|
||||
try:
|
||||
for item in data:
|
||||
if not item.get('subFilter'):
|
||||
continue
|
||||
first_sub = item['subFilter'][0]
|
||||
if not first_sub.get('filterType'):
|
||||
continue
|
||||
filter_item = {
|
||||
'key': self.process_key(first_sub['filterType']),
|
||||
'name': first_sub['title'],
|
||||
'value': []
|
||||
}
|
||||
for sub in item['subFilter']:
|
||||
if 'value' in sub:
|
||||
filter_item['value'].append({
|
||||
'n': sub['title'],
|
||||
'v': sub['value']
|
||||
})
|
||||
if filter_item['value']:
|
||||
result.append(filter_item)
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理筛选数据时出错: {str(e)}")
|
||||
|
||||
return result
|
||||
|
||||
def get_ctoken(self):
|
||||
data=self.session.get(f'{self.h5host}/h5/mtop.ykrec.recommendservice.recommend/1.0/?jsv=2.6.1&appKey=24679788')
|
||||
|
||||
def md5(self,t,text):
|
||||
h = MD5.new()
|
||||
token=self.session.cookies.get('_m_h5_tk').split('_')[0]
|
||||
data=f"{token}&{t}&24679788&{text}"
|
||||
h.update(data.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
def getdata(self, url, params, recursion_count=0, max_recursion=3):
|
||||
data = json.dumps(params)
|
||||
t = int(time.time() * 1000)
|
||||
jsdata = {
|
||||
'appKey': '24679788',
|
||||
't': t,
|
||||
'sign': self.md5(t, data),
|
||||
'data': data
|
||||
}
|
||||
response = self.session.get(url, params=jsdata)
|
||||
if '令牌过期' in response.text:
|
||||
if recursion_count >= max_recursion:
|
||||
raise Exception("达到最大递归次数,无法继续请求")
|
||||
self.get_ctoken()
|
||||
return self.getdata(url, params, recursion_count + 1, max_recursion)
|
||||
else:
|
||||
return response.json()
|
||||
|
||||
def getvinfo(self,params):
|
||||
body = {
|
||||
"ms_codes": "2019030100",
|
||||
"params": json.dumps(params),
|
||||
"system_info": "{\"os\":\"iku\",\"device\":\"iku\",\"ver\":\"9.2.9\",\"appPackageKey\":\"com.youku.iku\",\"appPackageId\":\"pcweb\"}"
|
||||
}
|
||||
data = self.getdata(f'{self.h5host}/h5/mtop.youku.columbus.gateway.new.execute/1.0/', body)
|
||||
okey = list(data['data'].keys())[0]
|
||||
i = data['data'][okey]['data']
|
||||
return i
|
||||
|
||||
def getinfo(self,params):
|
||||
i = self.getvinfo(params)
|
||||
jdata=i['nodes'][0]['nodes'][3]
|
||||
info=i['data']['extra']['episodeTotal']
|
||||
if i['data']['extra']['showCategory'] in ['电影','游戏']:
|
||||
jdata = i['nodes'][0]['nodes'][4]
|
||||
return jdata,info
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import random
|
||||
import sys
|
||||
from base64 import b64encode, b64decode
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import quote
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
self.did = 'f8da348e186e6ee574d647918f5a7114'
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
rhost = 'https://www.iqiyi.com'
|
||||
|
||||
hhost='https://mesh.if.iqiyi.com'
|
||||
|
||||
dhost='https://miniapp.iqiyi.com'
|
||||
|
||||
headers = {
|
||||
'Origin': rhost,
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
'Referer': f'{rhost}/',
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
cateManual = {
|
||||
"全部": "1009",
|
||||
"电影": "1",
|
||||
"剧集": "2",
|
||||
"综艺": "6",
|
||||
"动漫": "4",
|
||||
"儿童": "15",
|
||||
"微剧": "35",
|
||||
"纪录片": "3"
|
||||
}
|
||||
classes = []
|
||||
filters = {}
|
||||
for k in cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': cateManual[k]
|
||||
})
|
||||
with ThreadPoolExecutor(max_workers=len(classes)) as executor:
|
||||
results = executor.map(self.getf, classes)
|
||||
for id, ft in results:
|
||||
if len(ft):filters[id] = ft
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data=self.fetch(f'{self.hhost}/portal/lw/v5/channel/recommend?v=13.014.21150', headers=self.headers).json()
|
||||
vlist = []
|
||||
for i in data['items'][1:]:
|
||||
for j in i['video'][0]['data']:
|
||||
id = j.get('firstId')
|
||||
pic=j.get('prevue',{}).get('image_url') or j.get('album_image_url_hover')
|
||||
if id and pic:
|
||||
pu=j.get('prevue',{}).get('page_url') or j.get('page_url').split('?')[0]
|
||||
id = f'{id}@{self.e64(pu)}'
|
||||
vlist.append({
|
||||
'vod_id': id,
|
||||
'vod_name': j.get('display_name'),
|
||||
'vod_pic': pic,
|
||||
'vod_year': j.get('sns_score'),
|
||||
'vod_remarks': j.get('dq_updatestatus') or j.get('rank_prefix')
|
||||
})
|
||||
return {'list':vlist}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
if pg == "1":
|
||||
self.sid = ''
|
||||
new_data = {'mode':'24'}
|
||||
for key, value in extend.items():
|
||||
if value:
|
||||
key_value_pairs = self.d64(value).split(',')
|
||||
for pair in key_value_pairs:
|
||||
k, v = pair.split('=')
|
||||
if k in new_data:
|
||||
new_data[k] += "," + v
|
||||
else:
|
||||
new_data[k] = v
|
||||
path=f"/portal/lw/videolib/data?uid=&passport_id=&ret_num=60&version=13.034.21571&device_id={self.did}&channel_id={tid}&page_id={pg}&session={self.sid}&os=&conduit_id=&vip=0&auth=&recent_selected_tag=&ad=%5B%7B%22lm%22%3A%225%22%2C%22ai%22%3A%225%22%2C%22fp%22%3A%226%22%2C%22sei%22%3A%22S78ff51b694677e17af4b19368dadb7bd%22%2C%22position%22%3A%22library%22%7D%5D&adExt=%7B%22r%22%3A%221.2.1-ares6-pure%22%7D&dfp=a00b3c577e541c41149be7cde9320500b0a11307e61a8445448f7f4a9e895ced0f&filter={quote(json.dumps(new_data))}"
|
||||
data=self.fetch(f'{self.hhost}{path}', headers=self.headers).json()
|
||||
self.sid = data['session']
|
||||
videos = []
|
||||
for i in data['data']:
|
||||
id = i.get('firstId') or i.get('tv_id')
|
||||
if not id:
|
||||
id=i.get('play_url').split(';')[0].split('=')[-1]
|
||||
if id and not i.get('h'):
|
||||
id=f'{id}@{self.e64(i.get("page_url"))}'
|
||||
videos.append({
|
||||
'vod_id': id,
|
||||
'vod_name': i.get('display_name'),
|
||||
'vod_pic': i.get('album_image_url_hover'),
|
||||
'vod_year': i.get('sns_score'),
|
||||
'vod_remarks': i.get('dq_updatestatus') or i.get('pay_mark')
|
||||
})
|
||||
result = {}
|
||||
result['list'] = videos
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
ids = ids[0].split('@')
|
||||
ids[-1] = self.d64(ids[-1])
|
||||
data = self.fetch(f'{self.dhost}/h5/mina/baidu/play/body/v1/{ids[0]}/', headers=self.headers).json()
|
||||
v=data['data']['playInfo']
|
||||
vod = {
|
||||
'vod_name': v.get('albumName'),
|
||||
'type_name': v.get('tags'),
|
||||
'vod_year': v.get('albumYear'),
|
||||
'vod_remarks': v.get('updateStrategy'),
|
||||
'vod_actor': v.get('mainActors'),
|
||||
'vod_director': v.get('directors'),
|
||||
'vod_content': v.get('albumDesc'),
|
||||
'vod_play_from': '爱奇艺',
|
||||
'vod_play_url': ''
|
||||
}
|
||||
if data.get('data') and data['data'].get('videoList') and data['data']['videoList'].get('videos'):
|
||||
purl=[f'{i["shortTitle"]}${i["pageUrl"]}' for i in data['data']['videoList']['videos']]
|
||||
pg=data['data']['videoList'].get('totalPages')
|
||||
if pg and pg > 1:
|
||||
id = v['albumId']
|
||||
pages = list(range(2, pg + 1))
|
||||
page_results = {}
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
future_to_page = {
|
||||
executor.submit(self.fetch_page_data, page, id): page
|
||||
for page in pages
|
||||
}
|
||||
for future in as_completed(future_to_page):
|
||||
page = future_to_page[future]
|
||||
try:
|
||||
result = future.result()
|
||||
page_results[page] = result
|
||||
except Exception as e:
|
||||
print(f"Error fetching page {page}: {e}")
|
||||
for page in sorted(page_results.keys()):
|
||||
purl.extend(page_results[page])
|
||||
vod['vod_play_url'] = '#'.join(purl)
|
||||
else:
|
||||
vdata=self.fetch(f'{self.dhost}/h5/mina/baidu/play/head/v1/{ids[0]}/', headers=self.headers).json()
|
||||
v=vdata['data']['playInfo']
|
||||
vod = {
|
||||
'vod_name': v.get('shortTitle'),
|
||||
'type_name': v.get('channelName'),
|
||||
'vod_year': v.get('year'),
|
||||
'vod_remarks': v.get('focus'),
|
||||
'vod_actor': v.get('mainActors'),
|
||||
'vod_director': v.get('directors'),
|
||||
'vod_content': v.get('desc'),
|
||||
'vod_play_from': '爱奇艺',
|
||||
'vod_play_url': f'{v.get("shortTitle")}${ids[-1]}'
|
||||
}
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=self.fetch(f'{self.hhost}/portal/lw/search/homePageV3?key={key}¤t_page={pg}&mode=1&source=input&suggest=&version=13.014.21150&pageNum={pg}&pageSize=25&pu=&u={self.did}&scale=150&token=&userVip=0&conduit=&vipType=-1&os=&osShortName=win10&dataType=&appMode=', headers=self.headers).json()
|
||||
videos = []
|
||||
vdata=data['data']['templates']
|
||||
for i in data['data']['templates']:
|
||||
if i.get('intentAlbumInfos'):
|
||||
vdata=[{'albumInfo': c} for c in i['intentAlbumInfos']]+vdata
|
||||
|
||||
for i in vdata:
|
||||
if i.get('albumInfo') and (i['albumInfo'].get('playQipuId','') or i['albumInfo'].get('qipuId')) and i['albumInfo'].get('pageUrl'):
|
||||
b=i['albumInfo']
|
||||
id=f"{(b.get('playQipuId','') or b.get('qipuId'))}@{self.e64(b.get('pageUrl'))}"
|
||||
videos.append({
|
||||
'vod_id': id,
|
||||
'vod_name': b.get('title'),
|
||||
'vod_pic': b.get('img'),
|
||||
'vod_year': (b.get('year',{}) or {}).get('value'),
|
||||
'vod_remarks': b.get('subscriptContent') or b.get('channel') or b.get('vipTips')
|
||||
})
|
||||
return {'list':videos,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
id=id.replace('http://m.','https://www.')
|
||||
return {'jx':1,'parse': 1, 'url': id, 'header': ''}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def fetch_page_data(self, page, id):
|
||||
try:
|
||||
url = f'{self.dhost}/h5/mina/avlist/{page}/{id}/'
|
||||
data = self.fetch(url, headers=self.headers).json()
|
||||
return [f'{i["shortTitle"]}${i["pageUrl"]}' for i in data['data']['videoList']['videos']]
|
||||
except:
|
||||
return []
|
||||
|
||||
def getf(self,body):
|
||||
data=self.fetch(f'{self.hhost}/portal/lw/videolib/tag?channel_id={body["type_id"]}&tagAdd=&selected_tag_name=&version=13.014.21150&device={self.did}&uid=', headers=self.headers).json()
|
||||
ft = []
|
||||
# for i in data[:-1]:
|
||||
for i in data:
|
||||
try:
|
||||
value_array = [{"n": value['text'], "v": self.e64(value['tag_param'])} for value in i['tags'] if
|
||||
value.get('tag_param')]
|
||||
ft.append({"key": i['group'], "name": i['group'], "value": value_array})
|
||||
except:
|
||||
print(i)
|
||||
return (body['type_id'], ft)
|
||||
|
||||
def e64(self, text):
|
||||
try:
|
||||
text_bytes = text.encode('utf-8')
|
||||
encoded_bytes = b64encode(text_bytes)
|
||||
return encoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64编码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def d64(self,encoded_text: str):
|
||||
try:
|
||||
encoded_bytes = encoded_text.encode('utf-8')
|
||||
decoded_bytes = b64decode(encoded_bytes)
|
||||
return decoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64解码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def random_str(self,length=16):
|
||||
hex_chars = '0123456789abcdef'
|
||||
return ''.join(random.choice(hex_chars) for _ in range(length))
|
||||
@@ -0,0 +1,320 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import uuid
|
||||
import copy
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
self.dbody = {
|
||||
"page_params": {
|
||||
"channel_id": "",
|
||||
"filter_params": "sort=75",
|
||||
"page_type": "channel_operation",
|
||||
"page_id": "channel_list_second_page"
|
||||
}
|
||||
}
|
||||
self.body = self.dbody
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
host = 'https://v.qq.com'
|
||||
|
||||
apihost = 'https://pbaccess.video.qq.com'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
|
||||
'origin': host,
|
||||
'referer': f'{host}/'
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
cdata = {
|
||||
"电视剧": "100113",
|
||||
"电影": "100173",
|
||||
"综艺": "100109",
|
||||
"纪录片": "100105",
|
||||
"动漫": "100119",
|
||||
"少儿": "100150",
|
||||
"短剧": "110755"
|
||||
}
|
||||
result = {}
|
||||
classes = []
|
||||
filters = {}
|
||||
for k in cdata:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': cdata[k]
|
||||
})
|
||||
with ThreadPoolExecutor(max_workers=len(classes)) as executor:
|
||||
futures = [executor.submit(self.get_filter_data, item['type_id']) for item in classes]
|
||||
for future in futures:
|
||||
cid, data = future.result()
|
||||
if not data.get('data', {}).get('module_list_datas'):
|
||||
continue
|
||||
filter_dict = {}
|
||||
try:
|
||||
items = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']
|
||||
for item in items:
|
||||
if not item.get('item_params', {}).get('index_item_key'):
|
||||
continue
|
||||
params = item['item_params']
|
||||
filter_key = params['index_item_key']
|
||||
if filter_key not in filter_dict:
|
||||
filter_dict[filter_key] = {
|
||||
'key': filter_key,
|
||||
'name': params['index_name'],
|
||||
'value': []
|
||||
}
|
||||
filter_dict[filter_key]['value'].append({
|
||||
'n': params['option_name'],
|
||||
'v': params['option_value']
|
||||
})
|
||||
except (IndexError, KeyError):
|
||||
continue
|
||||
filters[cid] = list(filter_dict.values())
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
json_data = {'page_context':None,'page_params':{'page_id':'100101','page_type':'channel','skip_privacy_types':'0','support_click_scan':'1','new_mark_label_enabled':'1','ams_cookies':'',},'page_bypass_params':{'params':{'caller_id':'','data_mode':'default','page_id':'','page_type':'channel','platform_id':'2','user_mode':'default',},'scene':'channel','abtest_bypass_id':'',}}
|
||||
data = self.post(f'{self.apihost}/trpc.vector_layout.page_view.PageService/getPage',headers=self.headers, json=json_data).json()
|
||||
vlist = []
|
||||
for it in data['data']['CardList'][0]['children_list']['list']['cards']:
|
||||
if it.get('params'):
|
||||
p = it['params']
|
||||
tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}')
|
||||
id = it.get('id') or p.get('cid')
|
||||
name = p.get('mz_title') or p.get('title')
|
||||
if name and 'http' not in id:
|
||||
vlist.append({
|
||||
'vod_id': id,
|
||||
'vod_name': name,
|
||||
'vod_pic': p.get('image_url'),
|
||||
'vod_year': tag.get('tag_2', {}).get('text'),
|
||||
'vod_remarks': tag.get('tag_4', {}).get('text')
|
||||
})
|
||||
return {'list': vlist}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
result = {}
|
||||
params = {
|
||||
"sort": extend.get('sort', '75'),
|
||||
"attr": extend.get('attr', '-1'),
|
||||
"itype": extend.get('itype', '-1'),
|
||||
"ipay": extend.get('ipay', '-1'),
|
||||
"iarea": extend.get('iarea', '-1'),
|
||||
"iyear": extend.get('iyear', '-1'),
|
||||
"theater": extend.get('theater', '-1'),
|
||||
"award": extend.get('award', '-1'),
|
||||
"recommend": extend.get('recommend', '-1')
|
||||
}
|
||||
if pg == '1':
|
||||
self.body = self.dbody.copy()
|
||||
self.body['page_params']['channel_id'] = tid
|
||||
self.body['page_params']['filter_params'] = self.josn_to_params(params)
|
||||
data = self.post(
|
||||
f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1',
|
||||
json=self.body, headers=self.headers).json()
|
||||
ndata = data['data']
|
||||
if ndata['has_next_page']:
|
||||
result['pagecount'] = 9999
|
||||
self.body['page_context'] = ndata['next_page_context']
|
||||
else:
|
||||
result['pagecount'] = int(pg)
|
||||
vlist = []
|
||||
for its in ndata['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']:
|
||||
id = its.get('item_params', {}).get('cid')
|
||||
if id:
|
||||
p = its['item_params']
|
||||
tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}')
|
||||
name = p.get('mz_title') or p.get('title')
|
||||
pic = p.get('new_pic_hz') or p.get('new_pic_vt')
|
||||
vlist.append({
|
||||
'vod_id': id,
|
||||
'vod_name': name,
|
||||
'vod_pic': pic,
|
||||
'vod_year': tag.get('tag_2', {}).get('text'),
|
||||
'vod_remarks': tag.get('tag_4', {}).get('text')
|
||||
})
|
||||
result['list'] = vlist
|
||||
result['page'] = pg
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
vbody = {"page_params":{"req_from":"web","cid":ids[0],"vid":"","lid":"","page_type":"detail_operation","page_id":"detail_page_introduction"},"has_cache":1}
|
||||
body = {"page_params":{"req_from":"web_vsite","page_id":"vsite_episode_list","page_type":"detail_operation","id_type":"1","page_size":"","cid":ids[0],"vid":"","lid":"","page_num":"","page_context":"","detail_page_type":"1"},"has_cache":1}
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
future_detail = executor.submit(self.get_vdata, vbody)
|
||||
future_episodes = executor.submit(self.get_vdata, body)
|
||||
vdata = future_detail.result()
|
||||
data = future_episodes.result()
|
||||
|
||||
pdata = self.process_tabs(data, body, ids)
|
||||
if not pdata:
|
||||
return self.handle_exception(None, "No pdata available")
|
||||
|
||||
try:
|
||||
star_list = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][
|
||||
0].get('sub_items', {}).get('star_list', {}).get('item_datas', [])
|
||||
actors = [star['item_params']['name'] for star in star_list]
|
||||
names = ['腾讯视频', '预告片']
|
||||
plist, ylist = self.process_pdata(pdata, ids)
|
||||
if not plist:
|
||||
del names[0]
|
||||
if not ylist:
|
||||
del names[1]
|
||||
vod = self.build_vod(vdata, actors, plist, ylist, names)
|
||||
return {'list': [vod]}
|
||||
except Exception as e:
|
||||
return self.handle_exception(e, "Error processing detail")
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
params = {
|
||||
"query": key,
|
||||
"appID": "3172",
|
||||
"appKey": "lGhFIPeD3HsO9xEp",
|
||||
"pageNum": int(pg) - 1,
|
||||
"pageSize": "10"
|
||||
}
|
||||
data = self.fetch(f"{self.apihost}/trpc.videosearch.smartboxServer.HttpRountRecall/Smartbox", params=params,headers=self.headers).json()
|
||||
vlist = []
|
||||
for k in data['data']['smartboxItemList']:
|
||||
if k.get('basicDoc') and k['basicDoc'].get('id'):
|
||||
img_tag = k.get('videoInfo', {}).get('imgTag')
|
||||
if img_tag is not None and isinstance(img_tag, str):
|
||||
try:
|
||||
tag = json.loads(img_tag)
|
||||
except json.JSONDecodeError as e:
|
||||
tag = {}
|
||||
else:
|
||||
tag = {}
|
||||
vlist.append({
|
||||
'vod_id': k['basicDoc']['id'],
|
||||
'vod_name': self.removeHtmlTags(k['basicDoc']['title']),
|
||||
'vod_pic': k['videoInfo']['imgUrl'],
|
||||
'vod_year': k['videoInfo'].get('typeName') + ' ' + tag.get('tag_2', {}).get('text', ''),
|
||||
'vod_remarks': tag.get('tag_4', {}).get('text', '')
|
||||
})
|
||||
return {'list': vlist, 'page': pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
ids = id.split('@')
|
||||
url = f"{self.host}/x/cover/{ids[0]}/{ids[1]}.html"
|
||||
return {'jx':1,'parse': 1, 'url': url, 'header': ''}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def get_filter_data(self, cid):
|
||||
hbody = self.dbody.copy()
|
||||
hbody['page_params']['channel_id'] = cid
|
||||
data = self.post(
|
||||
f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1',
|
||||
json=hbody, headers=self.headers).json()
|
||||
return cid, data
|
||||
|
||||
def get_vdata(self, body):
|
||||
try:
|
||||
vdata = self.post(
|
||||
f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=3000010&vplatform=2&vversion_name=8.2.96',
|
||||
json=body, headers=self.headers
|
||||
).json()
|
||||
return vdata
|
||||
except Exception as e:
|
||||
print(f"Error in get_vdata: {str(e)}")
|
||||
return {'data': {'module_list_datas': []}}
|
||||
|
||||
def process_pdata(self, pdata, ids):
|
||||
plist = []
|
||||
ylist = []
|
||||
for k in pdata:
|
||||
if k.get('item_id'):
|
||||
pid = f"{k['item_params']['union_title']}${ids[0]}@{k['item_id']}"
|
||||
if '预告' in k['item_params']['union_title']:
|
||||
ylist.append(pid)
|
||||
else:
|
||||
plist.append(pid)
|
||||
return plist, ylist
|
||||
|
||||
def build_vod(self, vdata, actors, plist, ylist, names):
|
||||
d = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][0]['item_params']
|
||||
urls = []
|
||||
if plist:
|
||||
urls.append('#'.join(plist))
|
||||
if ylist:
|
||||
urls.append('#'.join(ylist))
|
||||
vod = {
|
||||
'type_name': d.get('sub_genre', ''),
|
||||
'vod_name': d.get('title', ''),
|
||||
'vod_year': d.get('year', ''),
|
||||
'vod_area': d.get('area_name', ''),
|
||||
'vod_remarks': d.get('holly_online_time', '') or d.get('hotval', ''),
|
||||
'vod_actor': ','.join(actors),
|
||||
'vod_content': d.get('cover_description', ''),
|
||||
'vod_play_from': '$$$'.join(names),
|
||||
'vod_play_url': '$$$'.join(urls)
|
||||
}
|
||||
return vod
|
||||
|
||||
def handle_exception(self, e, message):
|
||||
print(f"{message}: {str(e)}")
|
||||
return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': '翻车啦#555'}]}
|
||||
|
||||
def process_tabs(self, data, body, ids):
|
||||
try:
|
||||
pdata = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']
|
||||
tabs = data['data']['module_list_datas'][-1]['module_datas'][-1]['module_params'].get('tabs')
|
||||
if tabs and len(json.loads(tabs)):
|
||||
tabs = json.loads(tabs)
|
||||
remaining_tabs = tabs[1:]
|
||||
task_queue = []
|
||||
for tab in remaining_tabs:
|
||||
nbody = copy.deepcopy(body)
|
||||
nbody['page_params']['page_context'] = tab['page_context']
|
||||
task_queue.append(nbody)
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
future_map = {executor.submit(self.get_vdata, task): idx for idx, task in enumerate(task_queue)}
|
||||
results = [None] * len(task_queue)
|
||||
for future in as_completed(future_map.keys()):
|
||||
idx = future_map[future]
|
||||
results[idx] = future.result()
|
||||
for result in results:
|
||||
if result:
|
||||
page_data = result['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists'][
|
||||
'item_datas']
|
||||
pdata.extend(page_data)
|
||||
return pdata
|
||||
except Exception as e:
|
||||
print(f"Error processing episodes: {str(e)}")
|
||||
return []
|
||||
|
||||
def josn_to_params(self, params, skip_empty=False):
|
||||
query = []
|
||||
for k, v in params.items():
|
||||
if skip_empty and not v:
|
||||
continue
|
||||
query.append(f"{k}={v}")
|
||||
return "&".join(query)
|
||||
|
||||
|
||||
@@ -0,0 +1,205 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
rhost='https://www.mgtv.com'
|
||||
|
||||
host='https://pianku.api.mgtv.com'
|
||||
|
||||
vhost='https://pcweb.api.mgtv.com'
|
||||
|
||||
mhost='https://dc.bz.mgtv.com'
|
||||
|
||||
shost='https://mobileso.bz.mgtv.com'
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'origin': rhost,
|
||||
'referer': f'{rhost}/'
|
||||
}
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
cateManual = {
|
||||
"电影": "3",
|
||||
"电视剧": "2",
|
||||
"综艺": "1",
|
||||
"动画": "50",
|
||||
"少儿": "10",
|
||||
"纪录片": "51",
|
||||
"教育": "115"
|
||||
}
|
||||
classes = []
|
||||
filters = {}
|
||||
for k in cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': cateManual[k]
|
||||
})
|
||||
with ThreadPoolExecutor(max_workers=len(classes)) as executor:
|
||||
results = executor.map(self.getf, classes)
|
||||
for id, ft in results:
|
||||
if len(ft):filters[id] = ft
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data=self.fetch(f'{self.mhost}/dynamic/v1/channel/index/0/0/0/1000000/0/0/17/1354?type=17&version=5.0&t={str(int(time.time()*1000))}&_support=10000000', headers=self.headers).json()
|
||||
videoList = []
|
||||
for i in data['data']:
|
||||
if i.get('DSLList') and len(i['DSLList']):
|
||||
for j in i['DSLList']:
|
||||
if j.get('data') and j['data'].get('items') and len(j['data']['items']):
|
||||
for k in j['data']['items']:
|
||||
videoList.append({
|
||||
'vod_id': k["videoId"],
|
||||
'vod_name': k['videoName'],
|
||||
'vod_pic': k['img'],
|
||||
'vod_year': k.get('cornerTitle'),
|
||||
'vod_remarks': k.get('time') or k.get('desc'),
|
||||
})
|
||||
return {'list':videoList}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
body={
|
||||
'allowedRC': '1',
|
||||
'platform': 'pcweb',
|
||||
'channelId': tid,
|
||||
'pn': pg,
|
||||
'pc': '80',
|
||||
'hudong': '1',
|
||||
'_support': '10000000'
|
||||
}
|
||||
body.update(extend)
|
||||
data=self.fetch(f'{self.host}/rider/list/pcweb/v3', params=body, headers=self.headers).json()
|
||||
videoList = []
|
||||
for i in data['data']['hitDocs']:
|
||||
videoList.append({
|
||||
'vod_id': i["playPartId"],
|
||||
'vod_name': i['title'],
|
||||
'vod_pic': i['img'],
|
||||
'vod_year': (i.get('rightCorner',{}) or {}).get('text') or i.get('year'),
|
||||
'vod_remarks': i['updateInfo']
|
||||
})
|
||||
result = {}
|
||||
result['list'] = videoList
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
vbody={'allowedRC': '1', 'vid': ids[0], 'type': 'b', '_support': '10000000'}
|
||||
vdata=self.fetch(f'{self.vhost}/video/info', params=vbody, headers=self.headers).json()
|
||||
d=vdata['data']['info']['detail']
|
||||
vod = {
|
||||
'vod_name': vdata['data']['info']['title'],
|
||||
'type_name': d.get('kind'),
|
||||
'vod_year': d.get('releaseTime'),
|
||||
'vod_area': d.get('area'),
|
||||
'vod_lang': d.get('language'),
|
||||
'vod_remarks': d.get('updateInfo'),
|
||||
'vod_actor': d.get('leader'),
|
||||
'vod_director': d.get('director'),
|
||||
'vod_content': d.get('story'),
|
||||
'vod_play_from': '芒果TV',
|
||||
'vod_play_url': ''
|
||||
}
|
||||
data,pdata=self.fetch_page_data('1', ids[0],True)
|
||||
pagecount=data['data'].get('total_page') or 1
|
||||
if int(pagecount)>1:
|
||||
pages = list(range(2, pagecount+1))
|
||||
page_results = {}
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
future_to_page = {
|
||||
executor.submit(self.fetch_page_data, page, ids[0]): page
|
||||
for page in pages
|
||||
}
|
||||
for future in as_completed(future_to_page):
|
||||
page = future_to_page[future]
|
||||
try:
|
||||
result = future.result()
|
||||
page_results[page] = result
|
||||
except Exception as e:
|
||||
print(f"Error fetching page {page}: {e}")
|
||||
for page in sorted(page_results.keys()):
|
||||
pdata.extend(page_results[page])
|
||||
vod['vod_play_url'] = '#'.join(pdata)
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
data=self.fetch(f'{self.shost}/applet/search/v1?channelCode=mobile-wxap&q={key}&pn={pg}&pc=10&_support=10000000', headers=self.headers).json()
|
||||
videoList = []
|
||||
for i in data['data']['contents']:
|
||||
if i.get('data') and len(i['data']):
|
||||
k = i['data'][0]
|
||||
if k.get('vid') and k.get('img'):
|
||||
try:
|
||||
videoList.append({
|
||||
'vod_id': k['vid'],
|
||||
'vod_name': k['title'],
|
||||
'vod_pic': k['img'],
|
||||
'vod_year': (i.get('rightTopCorner',{}) or {}).get('text') or i.get('year'),
|
||||
'vod_remarks': '/'.join(i.get('desc',[])),
|
||||
})
|
||||
except:
|
||||
print(k)
|
||||
return {'list':videoList,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
id=f'{self.rhost}{id}'
|
||||
return {'jx':1,'parse': 1, 'url': id, 'header': ''}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def getf(self, body):
|
||||
params = {
|
||||
'allowedRC': '1',
|
||||
'channelId': body['type_id'],
|
||||
'platform': 'pcweb',
|
||||
'_support': '10000000',
|
||||
}
|
||||
data = self.fetch(f'{self.host}/rider/config/channel/v1', params=params, headers=self.headers).json()
|
||||
ft = []
|
||||
for i in data['data']['listItems']:
|
||||
try:
|
||||
value_array = [{"n": value['tagName'], "v": value['tagId']} for value in i['items'] if
|
||||
value.get('tagName')]
|
||||
ft.append({"key": i['eName'], "name": i['typeName'], "value": value_array})
|
||||
except:
|
||||
print(i)
|
||||
return body['type_id'], ft
|
||||
|
||||
def fetch_page_data(self, page, id, b=False):
|
||||
body = {'version': '5.5.35', 'video_id': id, 'page': page, 'size': '30',
|
||||
'platform': '4', 'src': 'mgtv', 'allowedRC': '1', '_support': '10000000'}
|
||||
data = self.fetch(f'{self.vhost}/episode/list', params=body, headers=self.headers).json()
|
||||
ldata = [f'{i["t3"]}${i["url"]}' for i in data['data']['list']]
|
||||
if b:
|
||||
return data, ldata
|
||||
else:
|
||||
return ldata
|
||||
@@ -0,0 +1,111 @@
|
||||
import re
|
||||
import asyncio
|
||||
from urllib.parse import unquote
|
||||
import aiohttp
|
||||
from pyquery import PyQuery as pq
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9'
|
||||
}
|
||||
|
||||
async def fetch(session, url):
|
||||
async with session.get(url, headers=headers) as response:
|
||||
return await response.text()
|
||||
|
||||
def grtclass(data):
|
||||
classes = []
|
||||
vdata = []
|
||||
for i in data.items():
|
||||
j = i('a').attr('href')
|
||||
if j and ('type' in j or 'show' in j):
|
||||
id = re.search(r'\d+', j)
|
||||
if id:
|
||||
id = id.group(0)
|
||||
else:
|
||||
id = j.split('/')[-1].split('.')[0]
|
||||
if id not in vdata:
|
||||
vdata.append(id)
|
||||
classes.append({
|
||||
'type_name': i('a').text(),
|
||||
'type_id': id
|
||||
})
|
||||
return classes
|
||||
|
||||
def get_k(text,type):
|
||||
key = ''
|
||||
cates={"class": "类型,剧情", "area": "地区", "lang": "语言", "year": "年份,时间", "letter": "字母", "by": "排序","sort": "排序"}
|
||||
for i,x in cates.items():
|
||||
if type== 'wobg' and i in text:
|
||||
key = i
|
||||
break
|
||||
elif type == 'wogg':
|
||||
for j in x.split(','):
|
||||
if j in text:
|
||||
key = i
|
||||
break
|
||||
|
||||
if type == 'wobg':
|
||||
if not key:
|
||||
if 'id' in text:
|
||||
key = 'id'
|
||||
return key
|
||||
|
||||
def get_v(text,key,type):
|
||||
if type == 'wobg':
|
||||
return text.split(f'{key}/')[-1].split('/')[0].split('.')[0]
|
||||
else:
|
||||
v=text.split('/',-1)[-1].split('.')[0][1:].replace('-','')
|
||||
if v=='09':v='0-9'
|
||||
return v
|
||||
|
||||
async def c(session, host):
|
||||
html = await fetch(session, host)
|
||||
data = pq(html)
|
||||
classes = grtclass(data('.drop-content-items li'))
|
||||
if not len(classes): classes = grtclass(data('.nav-menu-items li'))
|
||||
return classes
|
||||
|
||||
async def get_ft(session, url,type):
|
||||
print(f"请求: {url}")
|
||||
html = await fetch(session, url)
|
||||
data = pq(html)
|
||||
ft = []
|
||||
for i in list(data('div.library-box.scroll-box').items())[1:]:
|
||||
n = i('a.library-item-first').text()
|
||||
c = i('.library-list a')
|
||||
if type == 'wobg':
|
||||
key = get_k(c.eq(0).attr('href'), type)
|
||||
else:
|
||||
key = get_k(n,type)
|
||||
ft.append({
|
||||
'name': n or key,
|
||||
'key': key,
|
||||
'value': [{'v': unquote(get_v(j.attr('href'),key,type)), 'n': j.text()} for j in c.items()]
|
||||
})
|
||||
return ft
|
||||
|
||||
async def main(host,type):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
categories = await c(session, host)
|
||||
print(f"分类: {categories}")
|
||||
tasks = []
|
||||
fts = {}
|
||||
if len(categories):
|
||||
for i in categories:
|
||||
path=f"/index.php/vod/show/id/{i['type_id']}.html" if type == 'wobg' else f"/vodtype/{i['type_id']}.html"
|
||||
task = asyncio.create_task(get_ft(session, f"{host}{path}",type))
|
||||
tasks.append((i['type_id'], task))
|
||||
for type_id, task in tasks:
|
||||
fts[type_id] = await task
|
||||
return {'class': categories, 'filters': fts}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# url = 'http://wogg.xxooo.cf'
|
||||
url = 'http://2xiaopan.fun'
|
||||
types = ['wobg','wogg']
|
||||
loop = asyncio.get_event_loop()
|
||||
result = loop.run_until_complete(main(url, types[0]))
|
||||
print('分类筛选生成结果:')
|
||||
print(result)
|
||||
Reference in New Issue
Block a user