purge history

This commit is contained in:
GitHub Actions
2026-02-07 05:02:33 +00:00
commit d0bd20ce39
686 changed files with 294875 additions and 0 deletions
+335
View File
@@ -0,0 +1,335 @@
import requests
from bs4 import BeautifulSoup
import re
from base.spider import Spider
import sys
import json
import base64
import urllib.parse
from Crypto.Cipher import ARC4
from Crypto.Util.Padding import unpad
import binascii
sys.path.append('..')
xurl = "https://www.fullhd.xxx/zh/"
headerx = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36'
}
pm = ''
class Spider(Spider):
global xurl
global headerx
def getName(self):
return "首页"
def init(self, extend):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def extract_middle_text(self, text, start_str, end_str, pl, start_index1: str = '', end_index2: str = ''):
if pl == 3:
plx = []
while True:
start_index = text.find(start_str)
if start_index == -1:
break
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
break
middle_text = text[start_index + len(start_str):end_index]
plx.append(middle_text)
text = text.replace(start_str + middle_text + end_str, '')
if len(plx) > 0:
purl = ''
for i in range(len(plx)):
matches = re.findall(start_index1, plx[i])
output = ""
for match in matches:
match3 = re.search(r'(?:^|[^0-9])(\d+)(?:[^0-9]|$)', match[1])
if match3:
number = match3.group(1)
else:
number = 0
if 'http' not in match[0]:
output += f"#{'📽️' + match[1]}${number}{xurl}{match[0]}"
else:
output += f"#{'📽️' + match[1]}${number}{match[0]}"
output = output[1:]
purl = purl + output + "$$$"
purl = purl[:-3]
return purl
else:
return ""
else:
start_index = text.find(start_str)
if start_index == -1:
return ""
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
return ""
if pl == 0:
middle_text = text[start_index + len(start_str):end_index]
return middle_text.replace("\\", "")
if pl == 1:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
jg = ' '.join(matches)
return jg
if pl == 2:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
new_list = [f'{item}' for item in matches]
jg = '$$$'.join(new_list)
return jg
def homeContent(self, filter):
result = {}
result = {"class": [{"type_id": "latest-updates", "type_name": "最新视频🌠"},
{"type_id": "top-rated", "type_name": "最佳视频🌠"},
{"type_id": "most-popular", "type_name": "热门影片🌠"}],
}
return result
def homeVideoContent(self):
videos = []
try:
detail = requests.get(url=xurl, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
# Get videos from different sections
sections = {
"latest-updates": "最新视频",
"top-rated": "最佳视频",
"most-popular": "热门影片"
}
for section_id, section_name in sections.items():
section = doc.find('div', id=f"list_videos_videos_watched_right_now_items")
if not section:
continue
vods = section.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title'] if names and 'title' in names[0].attrs else section_name
ids = vod.find_all('a')
id = ids[0]['href'] if ids else ""
pics = vod.find('img', class_="lazyload")
pic = pics['data-src'] if pics and 'data-src' in pics.attrs else ""
if pic and 'http' not in pic:
pic = xurl + pic
remarks = vod.find('span', class_="duration")
remark = remarks.text.strip() if remarks else ""
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
result = {'list': videos}
return result
except Exception as e:
print(f"Error in homeVideoContent: {str(e)}")
return {'list': []}
def categoryContent(self, cid, pg, filter, ext):
result = {}
videos = []
try:
if pg and int(pg) > 1:
url = f'{xurl}/{cid}/{pg}/'
else:
url = f'{xurl}/{cid}/'
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
section = doc.find('div', class_="list-videos")
if section:
vods = section.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title'] if names and 'title' in names[0].attrs else ""
ids = vod.find_all('a')
id = ids[0]['href'] if ids else ""
pics = vod.find('img', class_="lazyload")
pic = pics['data-src'] if pics and 'data-src' in pics.attrs else ""
if pic and 'http' not in pic:
pic = xurl + pic
remarks = vod.find('span', class_="duration")
remark = remarks.text.strip() if remarks else ""
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
except Exception as e:
print(f"Error in categoryContent: {str(e)}")
result = {
'list': videos,
'page': pg,
'pagecount': 9999,
'limit': 90,
'total': 999999
}
return result
def detailContent(self, ids):
global pm
did = ids[0]
result = {}
videos = []
playurl = ''
if 'http' not in did:
did = xurl + did
res1 = requests.get(url=did, headers=headerx)
res1.encoding = "utf-8"
res = res1.text
content = '👉' + self.extract_middle_text(res,'<h1>','</h1>', 0)
yanuan = self.extract_middle_text(res, '<span>Pornstars:</span>','</div>',1, 'href=".*?">(.*?)</a>')
bofang = did
videos.append({
"vod_id": did,
"vod_actor": yanuan,
"vod_director": '',
"vod_content": content,
"vod_play_from": '💗4K💗',
"vod_play_url": bofang
})
result['list'] = videos
return result
def playerContent(self, flag, id, vipFlags):
parts = id.split("http")
xiutan = 0
if xiutan == 0:
if len(parts) > 1:
before_https, after_https = parts[0], 'http' + parts[1]
res = requests.get(url=after_https, headers=headerx)
res = res.text
url2 = self.extract_middle_text(res, '<video', '</video>', 0).replace('\\', '')
soup = BeautifulSoup(url2, 'html.parser')
first_source = soup.find('source')
src_value = first_source.get('src')
response = requests.head(src_value, allow_redirects=False)
if response.status_code == 302:
redirect_url = response.headers['Location']
response = requests.head(redirect_url, allow_redirects=False)
if response.status_code == 302:
redirect_url = response.headers['Location']
result = {}
result["parse"] = xiutan
result["playUrl"] = ''
result["url"] = redirect_url
result["header"] = headerx
return result
def searchContentPage(self, key, quick, page):
result = {}
videos = []
if not page:
page = '1'
if page == '1':
url = f'{xurl}/search/{key}/'
else:
url = f'{xurl}/search/{key}/{str(page)}/'
try:
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
section = doc.find('div', class_="list-videos")
if section:
vods = section.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title'] if names and 'title' in names[0].attrs else ""
ids = vod.find_all('a')
id = ids[0]['href'] if ids else ""
pics = vod.find('img', class_="lazyload")
pic = pics['data-src'] if pics and 'data-src' in pics.attrs else ""
if pic and 'http' not in pic:
pic = xurl + pic
remarks = vod.find('span', class_="duration")
remark = remarks.text.strip() if remarks else ""
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
except Exception as e:
print(f"Error in searchContentPage: {str(e)}")
result = {
'list': videos,
'page': page,
'pagecount': 9999,
'limit': 90,
'total': 999999
}
return result
def searchContent(self, key, quick):
return self.searchContentPage(key, quick, '1')
def localProxy(self, params):
if params['type'] == "m3u8":
return self.proxyM3u8(params)
elif params['type'] == "media":
return self.proxyMedia(params)
elif params['type'] == "ts":
return self.proxyTs(params)
return None
+155
View File
@@ -0,0 +1,155 @@
# -*- coding: utf-8 -*-
# @Author : Doubebly
# @Time : 2025/3/23 21:55
import base64
import sys
import time
import json
import requests
import re # 新增导入re模块
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def getName(self):
return "Litv"
def init(self, extend):
self.extend = extend
try:
self.extendDict = json.loads(extend)
except:
self.extendDict = {}
proxy = self.extendDict.get('proxy', None)
if proxy is None:
self.is_proxy = False
else:
self.proxy = proxy
self.is_proxy = True
pass
def getDependence(self):
return []
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def natural_sort_key(self, s):
"""
自然排序辅助函数
"""
return [
int(part) if part.isdigit() else part.lower()
for part in re.split(r'(\d+)', s)
]
def liveContent(self, url):
# 初始化默认M3U内容(至少包含EXTM3U声明)
a = ['#EXTM3U']
try:
base_url = "https://kzb29rda.com/prod-api/iptv/getIptvList?liveType=0&deviceType=1"
response = requests.get(base_url)
response.raise_for_status() # 自动抛出HTTP错误(如404/500
data = response.json()
sorted_list = sorted(
data.get('list', []),
key=lambda x: self.natural_sort_key(x.get("play_source_name", ""))
)
channels = [
element
#for item in data.get('list', [])
for item in sorted_list
for element in (
f'#EXTINF:-1 tvg-id="{item["play_source_name"]}" tvg-name="{item["play_source_name"]}" '
f'tvg-logo="https://logo.doube.eu.org/{item["play_source_name"]}.png" group-title="",'
f'{item["play_source_name"]}',
item['play_source_url']
)
]
a += channels # 合并到初始化的a中
except requests.exceptions.RequestException as e:
print(f"网络请求失败: {e}")
a.append('# 错误:无法获取频道列表')
except KeyError as e:
print(f"数据解析错误,缺少字段: {e}")
a.append('# 错误:数据格式异常')
except json.JSONDecodeError:
print("响应内容不是有效的JSON")
a.append('# 错误:无效的API响应')
return '\n'.join(a)
def homeContent(self, filter):
return {}
def homeVideoContent(self):
return {}
def categoryContent(self, cid, page, filter, ext):
return {}
def detailContent(self, did):
return {}
def searchContent(self, key, quick, page='1'):
return {}
def searchContentPage(self, keywords, quick, page):
return {}
def playerContent(self, flag, pid, vipFlags):
return {}
def localProxy(self, params):
if params['type'] == "m3u8":
return self.proxyM3u8(params)
if params['type'] == "ts":
return self.get_ts(params)
return [302, "text/plain", None, {'Location': 'https://sf1-cdn-tos.huoshanstatic.com/obj/media-fe/xgplayer_doc_video/mp4/xgplayer-demo-720p.mp4'}]
def proxyM3u8(self, params):
pid = params['pid']
info = pid.split(',')
a = info[0]
b = info[1]
c = info[2]
timestamp = int(time.time() / 4 - 355017625)
t = timestamp * 4
m3u8_text = f'#EXTM3U\n#EXT-X-VERSION:3\n#EXT-X-TARGETDURATION:4\n#EXT-X-MEDIA-SEQUENCE:{timestamp}\n'
for i in range(10):
url = f'https://ntd-tgc.cdn.hinet.net/live/pool/{a}/litv-pc/{a}-avc1_6000000={b}-mp4a_134000_zho={c}-begin={t}0000000-dur=40000000-seq={timestamp}.ts'
if self.is_proxy:
url = f'http://127.0.0.1:9978/proxy?do=py&type=ts&url={self.b64encode(url)}'
m3u8_text += f'#EXTINF:4,\n{url}\n'
timestamp += 1
t += 4
return [200, "application/vnd.apple.mpegurl", m3u8_text]
def get_ts(self, params):
url = self.b64decode(params['url'])
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, stream=True, proxies=self.proxy)
return [206, "application/octet-stream", response.content]
def destroy(self):
return '正在Destroy'
def b64encode(self, data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def b64decode(self, data):
return base64.b64decode(data.encode('utf-8')).decode('utf-8')
if __name__ == '__main__':
pass
+346
View File
@@ -0,0 +1,346 @@
"""
作者 乐哥 🚓 内容均从互联网收集而来 仅供交流学习使用 版权归原创者所有 如侵犯了您的权益 请通知作者 将及时删除侵权内容
====================lege====================
"""
import requests
from bs4 import BeautifulSoup
import re
from base.spider import Spider
import sys
import json
import base64
import urllib.parse
from Crypto.Cipher import ARC4
from Crypto.Util.Padding import unpad
import binascii
sys.path.append('..')
xurl = "https://www.fullhd.xxx/zh/"
headerx = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36'
}
pm = ''
class Spider(Spider):
global xurl
global headerx
def getName(self):
return "首页"
def init(self, extend):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def extract_middle_text(self, text, start_str, end_str, pl, start_index1: str = '', end_index2: str = ''):
if pl == 3:
plx = []
while True:
start_index = text.find(start_str)
if start_index == -1:
break
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
break
middle_text = text[start_index + len(start_str):end_index]
plx.append(middle_text)
text = text.replace(start_str + middle_text + end_str, '')
if len(plx) > 0:
purl = ''
for i in range(len(plx)):
matches = re.findall(start_index1, plx[i])
output = ""
for match in matches:
match3 = re.search(r'(?:^|[^0-9])(\d+)(?:[^0-9]|$)', match[1])
if match3:
number = match3.group(1)
else:
number = 0
if 'http' not in match[0]:
output += f"#{'📽️' + match[1]}${number}{xurl}{match[0]}"
else:
output += f"#{'📽️' + match[1]}${number}{match[0]}"
output = output[1:]
purl = purl + output + "$$$"
purl = purl[:-3]
return purl
else:
return ""
else:
start_index = text.find(start_str)
if start_index == -1:
return ""
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
return ""
if pl == 0:
middle_text = text[start_index + len(start_str):end_index]
return middle_text.replace("\\", "")
if pl == 1:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
jg = ' '.join(matches)
return jg
if pl == 2:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
new_list = [f'{item}' for item in matches]
jg = '$$$'.join(new_list)
return jg
def homeContent(self, filter):
result = {}
result = {"class": [{"type_id": "latest-updates", "type_name": "最新视频🌠"},
{"type_id": "top-rated", "type_name": "最佳视频🌠"},
{"type_id": "most-popular", "type_name": "热门影片🌠"}],
}
return result
def homeVideoContent(self):
videos = []
try:
detail = requests.get(url=xurl, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
soups = doc.find_all('div', class_="margin-fix")
if soups and len(soups) > 1:
soups = soups[0]
vods = soups.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title']
ids = vod.find_all('a')
id = ids[0]['href']
pics = vod.find('img', class_="lazyload")
pic = pics['data-src']
if 'http' not in pic:
pic = xurl + pic
remarks = vod.find('div', class_="img thumb__img")
remark = remarks.text.strip()
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
result = {'list': videos}
return result
except:
pass
def categoryContent(self, cid, pg, filter, ext):
result = {}
if pg:
page = int(pg)
else:
page = 1
page = int(pg)
videos = []
if page == '1':
url = f'{xurl}/{cid}/'
else:
url = f'{xurl}/{cid}/{str(page)}/'
try:
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
soups = doc.find_all('div', class_="margin-fix")
for soup in soups:
vods = soup.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title']
ids = vod.find_all('a')
id = ids[0]['href']
pics = vod.find('img', class_="lazyload")
pic = pics['data-src']
if 'http' not in pic:
pic = xurl + pic
remarks = vod.find('div', class_="img thumb__img")
remark = remarks.text.strip()
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
except:
pass
result = {'list': videos}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
global pm
did = ids[0]
result = {}
videos = []
playurl = ''
if 'http' not in did:
did = xurl + did
res1 = requests.get(url=did, headers=headerx)
res1.encoding = "utf-8"
res = res1.text
content = '资源来源于网络🚓侵权请联系删除👉' + self.extract_middle_text(res,'<h1>','</h1>', 0)
yanuan = self.extract_middle_text(res, '<span>Pornstars:</span>','</div>',1, 'href=".*?">(.*?)</a>')
bofang = did
videos.append({
"vod_id": did,
"vod_actor": yanuan,
"vod_director": '',
"vod_content": content,
"vod_play_from": '💗数逼毛💗',
"vod_play_url": bofang
})
result['list'] = videos
return result
def playerContent(self, flag, id, vipFlags):
parts = id.split("http")
xiutan = 0
if xiutan == 0:
if len(parts) > 1:
before_https, after_https = parts[0], 'http' + parts[1]
res = requests.get(url=after_https, headers=headerx)
res = res.text
url2 = self.extract_middle_text(res, '<video', '</video>', 0).replace('\\', '')
soup = BeautifulSoup(url2, 'html.parser')
first_source = soup.find('source')
src_value = first_source.get('src')
response = requests.head(src_value, allow_redirects=False)
if response.status_code == 302:
redirect_url = response.headers['Location']
response = requests.head(redirect_url, allow_redirects=False)
if response.status_code == 302:
redirect_url = response.headers['Location']
result = {}
result["parse"] = xiutan
result["playUrl"] = ''
result["url"] = redirect_url
result["header"] = headerx
return result
def searchContentPage(self, key, quick, page):
result = {}
videos = []
if not page:
page = '1'
if page == '1':
url = f'{xurl}/search/{key}/'
else:
url = f'{xurl}/search/{key}/{str(page)}/'
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
res = detail.text
doc = BeautifulSoup(res, "lxml")
soups = doc.find_all('div', class_="margin-fix")
for soup in soups:
vods = soup.find_all('div', class_="item")
for vod in vods:
names = vod.find_all('a')
name = names[0]['title']
ids = vod.find_all('a')
id = ids[0]['href']
pics = vod.find('img', class_="lazyload")
pic = pics['data-src']
if 'http' not in pic:
pic = xurl + pic
remarks = vod.find('div', class_="img thumb__img")
remark = remarks.text.strip()
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
result['list'] = videos
result['page'] = page
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def searchContent(self, key, quick):
return self.searchContentPage(key, quick, '1')
def localProxy(self, params):
if params['type'] == "m3u8":
return self.proxyM3u8(params)
elif params['type'] == "media":
return self.proxyMedia(params)
elif params['type'] == "ts":
return self.proxyTs(params)
return None
+276
View File
@@ -0,0 +1,276 @@
# coding=utf-8
# !/usr/bin/python
# by嗷呜
import json
import re
import sys
from pyquery import PyQuery as pq
from base64 import b64decode, b64encode
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
# 定义代理配置
proxies = {
'http': 'http://127.0.0.1:10172',
'https': 'http://127.0.0.1:10172'
}
def init(self, extend=""):
self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
# 初始化 Session 并设置代理
self.session = Session()
self.session.headers.update(self.headers)
self.session.proxies.update(self.proxies) # 添加代理到 session
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter):
result = {}
cateManual = {
"视频": "/video",
"片单": "/playlists",
"频道": "/channels",
"分类": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.getpq('/recommended')
vhtml = data("#recommendedListings .pcVideoListItem .phimage")
return {'list': self.getlist(vhtml)}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid == '/video' or '_this_video' in tid:
pagestr = f'&' if '?' in tid else f'?'
tid = tid.split('_this_video')[0]
data = self.getpq(f'{tid}{pagestr}page={pg}')
vdata = self.getlist(data('#videoCategory .pcVideoListItem'))
elif tid == '/playlists':
data = self.getpq(f'{tid}?page={pg}')
vhtml = data('#playListSection li')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'playlists_click_' + i('.thumbnail-info-wrapper .display-block a').attr('href'),
'vod_name': i('.thumbnail-info-wrapper .display-block a').attr('title'),
'vod_pic': i('.largeThumb').attr('src'),
'vod_tag': 'folder',
'vod_remarks': i('.playlist-videos .number').text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/channels':
data = self.getpq(f'{tid}?o=rk&page={pg}')
vhtml = data('#filterChannelsSection li .description')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'director_click_' + i('.avatar a').attr('href'),
'vod_name': i('.avatar img').attr('alt'),
'vod_pic': i('.avatar img').attr('src'),
'vod_tag': 'folder',
'vod_remarks': i('.descriptionContainer ul li').eq(-1).text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/categories' and pg == '1':
result['pagecount'] = 1
data = self.getpq(f'{tid}')
vhtml = data('.categoriesListSection li .relativeWrapper')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': i('a').attr('href') + '_this_video',
'vod_name': i('a').attr('alt'),
'vod_pic': i('a img').attr('src'),
'vod_tag': 'folder',
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}?o=t&page={pg}')
vhtml = data('#popularPornstars .performerCard .wrap')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'pornstars_click_' + i('a').attr('href'),
'vod_name': i('.performerCardName').text(),
'vod_pic': i('a img').attr('src'),
'vod_tag': 'folder',
'vod_year': i('.performerVideosViewsCount span').eq(0).text(),
'vod_remarks': i('.performerVideosViewsCount span').eq(-1).text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif 'playlists_click' in tid:
tid = tid.split('click_')[-1]
if pg == '1':
hdata = self.getpq(tid)
self.token = hdata('#searchInput').attr('data-token')
vdata = self.getlist(hdata('#videoPlaylist .pcVideoListItem .phimage'))
else:
tid = tid.split('playlist/')[-1]
data = self.getpq(f'/playlist/viewChunked?id={tid}&token={self.token}&page={pg}')
vdata = self.getlist(data('.pcVideoListItem .phimage'))
elif 'director_click' in tid:
tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}/videos?page={pg}')
vdata = self.getlist(data('#showAllChanelVideos .pcVideoListItem .phimage'))
elif 'pornstars_click' in tid:
tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}/videos?page={pg}')
vdata = self.getlist(data('#mostRecentVideosSection .pcVideoListItem .phimage'))
result['list'] = vdata
return result
def detailContent(self, ids):
url = f"{self.host}{ids[0]}"
data = self.getpq(ids[0])
vn = data('meta[property="og:title"]').attr('content')
dtext = data('.userInfo .usernameWrap a')
pdtitle = '[a=cr:' + json.dumps({'id': 'director_click_' + dtext.attr('href'), 'name': dtext.text()}) + '/]' + dtext.text() + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': (data('.userInfo').text() + ' / ' + data('.ratingInfo').text()).replace('\n', ' / '),
'vod_play_from': 'Pornhub',
'vod_play_url': ''
}
js_content = data("#player script").eq(0).text()
plist = [f"{vn}${self.e64(f'{1}@@@@{url}')}"]
try:
pattern = r'"mediaDefinitions":\s*(\[.*?\]),\s*"isVertical"'
match = re.search(pattern, js_content, re.DOTALL)
if match:
json_str = match.group(1)
udata = json.loads(json_str)
plist = [
f"{media['height']}${self.e64(f'{0}@@@@{url}')}"
for media in udata[:-1]
if (url := media.get('videoUrl'))
]
except Exception as e:
print(f"提取mediaDefinitions失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/video/search?search={key}&page={pg}')
return {'list': self.getlist(data('#videoSearchResult .pcVideoListItem .phimage'))}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@')
return {'parse': int(ids[0]), 'url': ids[1], 'header': headers}
def localProxy(self, param):
pass
def gethost(self):
try:
# 在获取 host 时也使用代理
response = self.session.get('https://www.pornhub.com', headers=self.headers, allow_redirects=False)
return response.headers['Location'][:-1]
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://www.pornhub.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('a').attr('href'),
'vod_name': i('a').attr('title'),
'vod_pic': i('img').attr('src'),
'vod_remarks': i('.bgShadeEffect').text() or i('.duration').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path):
try:
response = self.session.get(f'{self.host}{path}').text
return pq(response.encode('utf-8'))
except Exception as e:
print(f"请求失败: , {str(e)}")
return None
+271
View File
@@ -0,0 +1,271 @@
# coding=utf-8
# !/usr/bin/python
# by嗷呜
import json
import sys
from base64 import b64decode, b64encode
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
self.session = Session()
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter):
result = {}
cateManual = {
"4K": "/4k",
"国产": "two_click_/categories/chinese",
"最新": "/newest",
"最佳": "/best",
"频道": "/channels",
"类别": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
if k !='4K':filters[cateManual[k]]=[{'key':'type','name':'类型','value':[{'n':'4K','v':'/4k'}]}]
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.getpq()
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item"))}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid:
if 'two_click_' in tid: tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}{extend.get("type","")}/{pg}')
vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item"))
elif tid == '/channels':
data = self.getpq(f'{tid}/{pg}')
jsdata = self.getjsdata(data)
for i in jsdata['channels']:
vdata.append({
'vod_id': f"two_click_" + i.get('channelURL'),
'vod_name': i.get('channelName'),
'vod_pic': i.get('siteLogoURL'),
'vod_year': f'videos:{i.get("videoCount")}',
'vod_tag': 'folder',
'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/categories':
result['pagecount'] = pg
data = self.getpq(tid)
self.cdata = self.getjsdata(data)
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
vdata.append({
'vod_id': "one_click_" + i.get('id'),
'vod_name': i.get('name'),
'vod_pic': '',
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}/{pg}')
pdata = self.getjsdata(data)
for i in pdata['pagesPornstarsComponent']['pornstarListProps']['pornstars']:
vdata.append({
'vod_id': f"two_click_" + i.get('pageURL'),
'vod_name': i.get('name'),
'vod_pic': i.get('imageThumbUrl'),
'vod_remarks': i.get('translatedCountryName'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif 'one_click' in tid:
result['pagecount'] = pg
tid = tid.split('click_')[-1]
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
if i.get('id') == tid:
for j in i['items']:
vdata.append({
'vod_id': f"two_click_" + j.get('url'),
'vod_name': j.get('name'),
'vod_pic': j.get('thumb'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.getpq(ids[0])
djs = self.getjsdata(data)
vn = data('meta[property="og:title"]').attr('content')
dtext = data('#video-tags-list-container')
href = dtext('a').attr('href')
title = dtext('span[class*="body-bold-"]').eq(0).text()
pdtitle = ''
if href:
pdtitle = '[a=cr:' + json.dumps({'id': 'two_click_' + href, 'name': title}) + '/]' + title + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': data('.rb-new__info').text(),
'vod_play_from': 'Xhamster',
'vod_play_url': ''
}
try:
plist = []
d = djs['xplayerSettings']['sources']
f = d.get('standard')
def custom_sort_key(url):
quality = url.split('$')[0]
number = ''.join(filter(str.isdigit, quality))
number = int(number) if number else 0
return -number, quality
if f:
for key, value in f.items():
if isinstance(value, list):
for info in value:
id = self.e64(f'{0}@@@@{info.get("url") or info.get("fallback")}')
plist.append(f"{info.get('label') or info.get('quality')}${id}")
plist.sort(key=custom_sort_key)
if d.get('hls'):
for format_type, info in d['hls'].items():
if url := info.get('url'):
encoded = self.e64(f'{0}@@@@{url}')
plist.append(f"{format_type}${encoded}")
except Exception as e:
plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"]
print(f"获取视频信息失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/search/{key}?page={pg}')
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@')
return {'parse': int(ids[0]), 'url': ids[1], 'header': headers}
def localProxy(self, param):
pass
def gethost(self):
try:
response = self.fetch('https://xhamster.com', headers=self.headers, allow_redirects=False)
return response.headers['Location']
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://zn.xhamster.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('.role-pop').attr('href'),
'vod_name': i('.video-thumb-info a').text(),
'vod_pic': i('.role-pop img').attr('src'),
'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0],
'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path=''):
h = '' if path.startswith('http') else self.host
response = self.session.get(f'{h}{path}').text
try:
return pq(response)
except Exception as e:
print(f"{str(e)}")
return pq(response.encode('utf-8'))
def getjsdata(self, data):
vhtml = data("script[id='initials-script']").text()
jst = json.loads(vhtml.split('initials=')[-1][:-1])
return jst
+263
View File
@@ -0,0 +1,263 @@
# coding=utf-8
# !/usr/bin/python
# by嗷呜
import json
import sys
from base64 import b64decode, b64encode
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
self.session = Session()
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter):
result = {}
cateManual = {
"4K": "/4k",
"国产": "two_click_/categories/chinese",
"最新": "/newest",
"最佳": "/best",
"频道": "/channels",
"类别": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
if k !='4K':filters[cateManual[k]]=[{'key':'type','name':'类型','value':[{'n':'4K','v':'/4k'}]}]
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.getpq()
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item"))}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid:
if 'two_click_' in tid: tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}{extend.get("type","")}/{pg}')
vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item"))
elif tid == '/channels':
data = self.getpq(f'{tid}/{pg}')
jsdata = self.getjsdata(data)
for i in jsdata['channels']:
vdata.append({
'vod_id': f"two_click_" + i.get('channelURL'),
'vod_name': i.get('channelName'),
'vod_pic': i.get('siteLogoURL'),
'vod_year': f'videos:{i.get("videoCount")}',
'vod_tag': 'folder',
'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/categories':
result['pagecount'] = pg
data = self.getpq(tid)
self.cdata = self.getjsdata(data)
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
vdata.append({
'vod_id': "one_click_" + i.get('id'),
'vod_name': i.get('name'),
'vod_pic': '',
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}/{pg}')
pdata = self.getjsdata(data)
for i in pdata['pagesPornstarsComponent']['pornstarListProps']['pornstars']:
vdata.append({
'vod_id': f"two_click_" + i.get('pageURL'),
'vod_name': i.get('name'),
'vod_pic': i.get('imageThumbUrl'),
'vod_remarks': i.get('translatedCountryName'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif 'one_click' in tid:
result['pagecount'] = pg
tid = tid.split('click_')[-1]
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
if i.get('id') == tid:
for j in i['items']:
vdata.append({
'vod_id': f"two_click_" + j.get('url'),
'vod_name': j.get('name'),
'vod_pic': j.get('thumb'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.getpq(ids[0])
djs = self.getjsdata(data)
vn = data('meta[property="og:title"]').attr('content')
dtext = data('#video-tags-list-container')
href = dtext('a').attr('href')
title = dtext('span[class*="body-bold-"]').eq(0).text()
pdtitle = ''
if href:
pdtitle = '[a=cr:' + json.dumps({'id': 'two_click_' + href, 'name': title}) + '/]' + title + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': data('.rb-new__info').text(),
'vod_play_from': 'Xhamster',
'vod_play_url': ''
}
try:
plist = []
d = djs['xplayerSettings']['sources']
f = d.get('standard')
if d.get('hls'):
for format_type, info in d['hls'].items():
if url := info.get('url'):
encoded = self.e64(f'{0}@@@@{url}')
plist.append(f"{format_type}${encoded}")
if f:
for key, value in f.items():
if isinstance(value, list):
for info in value:
id = self.e64(f'{0}@@@@{info.get("url") or info.get("fallback")}')
plist.append(f"{info.get('label') or info.get('quality')}${id}")
except Exception as e:
plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"]
print(f"获取视频信息失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/search/{key}?page={pg}')
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@')
return {'parse': int(ids[0]), 'url': ids[1], 'header': headers}
def localProxy(self, param):
pass
def gethost(self):
try:
response = self.fetch('https://xhamster.com', headers=self.headers, allow_redirects=False)
return response.headers['Location']
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://zn.xhamster.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('.role-pop').attr('href'),
'vod_name': i('.video-thumb-info a').text(),
'vod_pic': i('.role-pop img').attr('src'),
'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0],
'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path=''):
h = '' if path.startswith('http') else self.host
response = self.session.get(f'{h}{path}').text
try:
return pq(response)
except Exception as e:
print(f"{str(e)}")
return pq(response.encode('utf-8'))
def getjsdata(self, data):
vhtml = data("script[id='initials-script']").text()
jst = json.loads(vhtml.split('initials=')[-1][:-1])
return jst
+731
View File
@@ -0,0 +1,731 @@
#coding=utf-8
#!/usr/bin/python
import re
import sys
import json
import time
from datetime import datetime
from urllib.parse import quote, unquote
import requests
sys.path.append('..')
from base.spider import Spider
class Spider(Spider): # 元类 默认的元类 type
def getName(self):
return "B站视频"
def init(self, extend):
try:
self.extendDict = json.loads(extend)
except:
self.extendDict = {}
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def homeContent(self, filter):
result = {}
result['filters'] = {}
cookie = ''
if 'cookie' in self.extendDict:
cookie = self.extendDict['cookie']
if 'json' in self.extendDict:
r = self.fetch(self.extendDict['json'], timeout=10)
if 'cookie' in r.json():
cookie = r.json()['cookie']
if cookie == '':
cookie = '{}'
elif type(cookie) == str and cookie.startswith('http'):
cookie = self.fetch(cookie, timeout=10).text.strip()
try:
if type(cookie) == dict:
cookie = json.dumps(cookie, ensure_ascii=False)
except:
pass
_, _, _ = self.getCookie(cookie)
bblogin = self.getCache('bblogin')
if bblogin:
result['class'] = []
else:
result['class'] = []
if 'json' in self.extendDict:
r = self.fetch(self.extendDict['json'], timeout=10)
params = r.json()
if 'classes' in params:
result['class'] = result['class'] + params['classes']
if filter:
if 'filter' in params:
result['filters'] = params['filter']
elif 'categories' in self.extendDict or 'type' in self.extendDict:
if 'categories' in self.extendDict:
cateList = self.extendDict['categories'].split('#')
else:
cateList = self.extendDict['type'].split('#')
for cate in cateList:
result['class'].append({'type_name': cate, 'type_id': cate})
if not 'class' in result or result['class'] == []:
result['class'] = [{"type_name": "沙雕动漫", "type_id": "沙雕动漫"}]
return result
def homeVideoContent(self):
result = {}
cookie = ''
if 'cookie' in self.extendDict:
cookie = self.extendDict['cookie']
if 'json' in self.extendDict:
r = self.fetch(self.extendDict['json'], timeout=10)
if 'cookie' in r.json():
cookie = r.json()['cookie']
if cookie == '':
cookie = '{}'
elif type(cookie) == str and cookie.startswith('http'):
cookie = self.fetch(cookie, timeout=10).text.strip()
try:
if type(cookie) == dict:
cookie = json.dumps(cookie, ensure_ascii=False)
except:
pass
cookie, imgKey, subKey = self.getCookie(cookie)
url = 'https://api.bilibili.com/x/web-interface/index/top/feed/rcmd?y_num=1&fresh_type=3&feed_version=SEO_VIDEO&fresh_idx_1h=1&fetch_row=1&fresh_idx=1&brush=0&homepage_ver=1&ps=20'
r = requests.get(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
try:
result['list'] = []
vodList = data['data']['item']
for vod in vodList:
aid = str(vod['id']).strip()
title = self.removeHtmlTags(vod['title']).strip()
img = vod['pic'].strip()
remark = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
if remark.startswith('00:'):
remark = remark[3:]
if remark == '00:00':
continue
result['list'].append({
'vod_id': aid,
'vod_name': title,
'vod_pic': img,
'vod_remarks': remark
})
except:
pass
return result
def categoryContent(self, cid, page, filter, ext):
page = int(page)
result = {}
videos = []
cookie = ''
pagecount = page
if 'cookie' in self.extendDict:
cookie = self.extendDict['cookie']
if 'json' in self.extendDict:
r = self.fetch(self.extendDict['json'], timeout=10)
if 'cookie' in r.json():
cookie = r.json()['cookie']
if cookie == '':
cookie = '{}'
elif type(cookie) == str and cookie.startswith('http'):
cookie = self.fetch(cookie, timeout=10).text.strip()
try:
if type(cookie) == dict:
cookie = json.dumps(cookie, ensure_ascii=False)
except:
pass
cookie, imgKey, subKey = self.getCookie(cookie)
if cid == '动态':
if page > 1:
offset = self.getCache('offset')
if not offset:
offset = ''
url = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/all?timezone_offset=-480&type=all&offset={offset}&page={page}'
else:
url = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/all?timezone_offset=-480&type=all&page={page}'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
self.setCache('offset', data['data']['offset'])
vodList = data['data']['items']
if data['data']['has_more']:
pagecount = page + 1
for vod in vodList:
if vod['type'] != 'DYNAMIC_TYPE_AV':
continue
vid = str(vod['modules']['module_dynamic']['major']['archive']['aid']).strip()
remark = vod['modules']['module_dynamic']['major']['archive']['duration_text'].strip()
title = self.removeHtmlTags(vod['modules']['module_dynamic']['major']['archive']['title']).strip()
img = vod['modules']['module_dynamic']['major']['archive']['cover']
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
elif cid == "收藏夹":
userid = self.getUserid(cookie)
if userid is None:
return {}, 1
url = f'http://api.bilibili.com/x/v3/fav/folder/created/list-all?up_mid={userid}&jsonp=jsonp'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
vodList = data['data']['list']
pagecount = page
for vod in vodList:
vid = vod['id']
title = vod['title'].strip()
remark = vod['media_count']
img = 'https://api-lmteam.koyeb.app/files/shoucang.png'
videos.append({
"vod_id": f'fav&&&{vid}',
"vod_name": title,
"vod_pic": img,
"vod_tag": 'folder',
"vod_remarks": remark
})
elif cid.startswith('fav&&&'):
cid = cid[6:]
url = f'http://api.bilibili.com/x/v3/fav/resource/list?media_id={cid}&pn={page}&ps=20&platform=web&type=0'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
if data['data']['has_more']:
pagecount = page + 1
else:
pagecount = page
vodList = data['data']['medias']
for vod in vodList:
vid = str(vod['id']).strip()
title = self.removeHtmlTags(vod['title']).replace("&quot;", '"')
img = vod['cover'].strip()
remark = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
if remark.startswith('00:'):
remark = remark[3:]
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
elif cid.startswith('UP主&&&'):
cid = cid[6:]
params = {'mid': cid, 'ps': 30, 'pn': page}
params = self.encWbi(params, imgKey, subKey)
url = 'https://api.bilibili.com/x/space/wbi/arc/search?'
for key in params:
url += f'&{key}={quote(params[key])}'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
if page < data['data']['page']['count']:
pagecount = page + 1
else:
pagecount = page
if page == 1:
videos = [{"vod_id": f'UP主&&&{tid}', "vod_name": '播放列表'}]
vodList = data['data']['list']['vlist']
for vod in vodList:
vid = str(vod['aid']).strip()
title = self.removeHtmlTags(vod['title']).replace("&quot;", '"')
img = vod['pic'].strip()
remarkinfos = vod['length'].split(':')
minutes = int(remarkinfos[0])
if minutes >= 60:
hours = str(minutes // 60)
minutes = str(minutes % 60)
if len(hours) == 1:
hours = '0' + hours
if len(minutes) == 1:
minutes = '0' + minutes
remark = hours + ':' + minutes + ':' + remarkinfos[1]
else:
remark = vod['length']
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
elif cid == '历史记录':
url = f'http://api.bilibili.com/x/v2/history?pn={page}'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
if len(data['data']) == 300:
pagecount = page + 1
else:
pagecount = page
vodList = data['data']
for vod in vodList:
if vod['duration'] <= 0:
continue
vid = str(vod["aid"]).strip()
img = vod["pic"].strip()
title = self.removeHtmlTags(vod["title"]).replace("&quot;", '"')
if vod['progress'] != -1:
process = time.strftime('%H:%M:%S', time.gmtime(vod['progress']))
totalTime = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
if process.startswith('00:'):
process = process[3:]
if totalTime.startswith('00:'):
totalTime = totalTime[3:]
remark = process + '|' + totalTime
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
else:
url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={}&page={}'
for key in ext:
if key == 'tid':
cid = ext[key]
continue
url += f'&{key}={ext[key]}'
url = url.format(cid, page)
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
pagecount = data['data']['numPages']
vodList = data['data']['result']
for vod in vodList:
if vod['type'] != 'video':
continue
vid = str(vod['aid']).strip()
title = self.removeHtmlTags(self.cleanText(vod['title']))
img = 'https:' + vod['pic'].strip()
remarkinfo = vod['duration'].split(':')
minutes = int(remarkinfo[0])
seconds = remarkinfo[1]
if len(seconds) == 1:
seconds = '0' + seconds
if minutes >= 60:
hour = str(minutes // 60)
minutes = str(minutes % 60)
if len(hour) == 1:
hour = '0' + hour
if len(minutes) == 1:
minutes = '0' + minutes
remark = f'{hour}:{minutes}:{seconds}'
else:
minutes = str(minutes)
if len(minutes) == 1:
minutes = '0' + minutes
remark = f'{minutes}:{seconds}'
videos.append({
"vod_id": vid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
lenvideos = len(videos)
result['list'] = videos
result['page'] = page
result['pagecount'] = pagecount
result['limit'] = lenvideos
result['total'] = lenvideos
return result
def detailContent(self, did):
aid = did[0]
if aid.startswith('UP主&&&'):
bizId = aid[6:]
oid = ''
url = f'https://api.bilibili.com/x/v2/medialist/resource/list?mobi_app=web&type=1&oid={oid}&biz_id={bizId}&otype=1&ps=100&direction=false&desc=true&sort_field=1&tid=0&with_current=false'
r = self.fetch(url, headers=self.header, timeout=5)
videoList = r.json()['data']['media_list']
vod = {
"vod_id": aid,
"vod_name": '播放列表',
'vod_play_from': 'B站视频'
}
playUrl = ''
for video in videoList:
remark = time.strftime('%H:%M:%S', time.gmtime(video['duration']))
name = self.removeHtmlTags(video['title']).strip().replace("#", "-").replace('$', '*')
if remark.startswith('00:'):
remark = remark[3:]
playUrl += f"[{remark}]/{name}$bvid&&&{video['bv_id']}#"
vod['vod_play_url'] = playUrl.strip('#')
result = {'list': [vod]}
return result
url = f"https://api.bilibili.com/x/web-interface/view?aid={aid}"
r = self.fetch(url, headers=self.header, timeout=10)
data = json.loads(self.cleanText(r.text))
if "staff" in data['data']:
director = ''
for staff in data['data']['staff']:
director += '[a=cr:{{"id":"UP主&&&{}","name":"{}"}}/]{}[/a],'.format(staff['mid'], staff['name'], staff['name'])
else:
director = '[a=cr:{{"id":"UP主&&&{}","name":"{}"}}/]{}[/a]'.format(data['data']['owner']['mid'], data['data']['owner']['name'], data['data']['owner']['name'])
vod = {
"vod_id": aid,
"vod_name": self.removeHtmlTags(data['data']['title']),
"vod_pic": data['data']['pic'],
"type_name": data['data']['tname'],
"vod_year": datetime.fromtimestamp(data['data']['pubdate']).strftime('%Y-%m-%d %H:%M:%S'),
"vod_content": data['data']['desc'].replace('\xa0', ' ').replace('\n\n', '\n').strip(),
"vod_director": director
}
videoList = data['data']['pages']
playUrl = ''
for video in videoList:
remark = time.strftime('%H:%M:%S', time.gmtime(video['duration']))
name = self.removeHtmlTags(video['part']).strip().replace("#", "-").replace('$', '*')
if remark.startswith('00:'):
remark = remark[3:]
playUrl = playUrl + f"[{remark}]/{name}${aid}_{video['cid']}#"
url = f'https://api.bilibili.com/x/web-interface/archive/related?aid={aid}'
r = self.fetch(url, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
videoList = data['data']
playUrl = playUrl.strip('#') + '$$$'
for video in videoList:
remark = time.strftime('%H:%M:%S', time.gmtime(video['duration']))
if remark.startswith('00:'):
remark = remark[3:]
name = self.removeHtmlTags(video['title']).strip().replace("#", "-").replace('$', '*')
playUrl = playUrl + '[{}]/{}${}_{}#'.format(remark, name, video['aid'], video['cid'])
vod['vod_play_from'] = 'B站视频$$$相关视频'
vod['vod_play_url'] = playUrl.strip('#')
result = {
'list': [
vod
]
}
return result
def searchContent(self, key, quick):
return self.searchContentPage(key, quick, '1')
def searchContentPage(self, key, quick, page):
videos = []
if quick:
result = {
'list': videos
}
return result
cookie = ''
if 'cookie' in self.extendDict:
cookie = self.extendDict['cookie']
if 'json' in self.extendDict:
r = self.fetch(self.extendDict['json'], timeout=10)
if 'cookie' in r.json():
cookie = r.json()['cookie']
if cookie == '':
cookie = '{}'
elif type(cookie) == str and cookie.startswith('http'):
cookie = self.fetch(cookie, timeout=10).text.strip()
try:
if type(cookie) == dict:
cookie = json.dumps(cookie, ensure_ascii=False)
except:
pass
cookie, _, _ = self.getCookie(cookie)
url = f'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={key}&page={page}'
r = self.fetch(url, headers=self.header, cookies=cookie, timeout=5)
jo = json.loads(self.cleanText(r.text))
if 'result' not in jo['data']:
return {'list': videos}, 1
vodList = jo['data']['result']
for vod in vodList:
aid = str(vod['aid']).strip()
title = self.removeHtmlTags(self.cleanText(vod['title']))
img = 'https:' + vod['pic'].strip()
try:
remarkinfo = vod['duration'].split(':')
minutes = int(remarkinfo[0])
seconds = remarkinfo[1]
except:
continue
if len(seconds) == 1:
seconds = '0' + seconds
if minutes >= 60:
hour = str(minutes // 60)
minutes = str(minutes % 60)
if len(hour) == 1:
hour = '0' + hour
if len(minutes) == 1:
minutes = '0' + minutes
remark = f'{hour}:{minutes}:{seconds}'
else:
minutes = str(minutes)
if len(minutes) == 1:
minutes = '0' + minutes
remark = f'{minutes}:{seconds}'
videos.append({
"vod_id": aid,
"vod_name": title,
"vod_pic": img,
"vod_remarks": remark
})
result = {
'list': videos
}
return result
def playerContent(self, flag, pid, vipFlags):
result = {}
if pid.startswith('bvid&&&'):
url = "https://api.bilibili.com/x/web-interface/view?bvid={}".format(pid[7:])
r = self.fetch(url, headers=self.header, timeout=10)
data = r.json()['data']
aid = data['aid']
cid = data['cid']
else:
idList = pid.split("_")
aid = idList[0]
cid = idList[1]
url = 'https://api.bilibili.com/x/player/playurl?avid={}&cid={}&qn=120&fnval=4048&fnver=0&fourk=1'.format(aid, cid)
cookie = ''
extendDict = self.extendDict
if 'cookie' in extendDict:
cookie = extendDict['cookie']
if 'json' in extendDict:
r = self.fetch(extendDict['json'], timeout=10)
if 'cookie' in r.json():
cookie = r.json()['cookie']
if cookie == '':
cookie = '{}'
elif type(cookie) == str and cookie.startswith('http'):
cookie = self.fetch(cookie, timeout=10).text.strip()
try:
if type(cookie) == dict:
cookie = json.dumps(cookie, ensure_ascii=False)
except:
pass
cookiesDict, _, _ = self.getCookie(cookie)
cookies = quote(json.dumps(cookiesDict))
if 'thread' in extendDict:
thread = str(extendDict['thread'])
else:
thread = '0'
result["parse"] = 0
result["playUrl"] = ''
result["url"] = f'http://127.0.0.1:9978/proxy?do=py&type=mpd&cookies={cookies}&url={quote(url)}&aid={aid}&cid={cid}&thread={thread}'
result["header"] = self.header
result['danmaku'] = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(cid)
result["format"] = 'application/dash+xml'
return result
def localProxy(self, params):
if params['type'] == "mpd":
return self.proxyMpd(params)
if params['type'] == "media":
return self.proxyMedia(params)
return None
def destroy(self):
pass
def proxyMpd(self, params):
content, durlinfos, mediaType = self.getDash(params)
if mediaType == 'mpd':
return [200, "application/dash+xml", content]
else:
url = ''
urlList = [content] + durlinfos['durl'][0]['backup_url'] if 'backup_url' in durlinfos['durl'][0] and durlinfos['durl'][0]['backup_url'] else [content]
for url in urlList:
if 'mcdn.bilivideo.cn' not in url:
break
header = self.header.copy()
if 'range' in params:
header['Range'] = params['range']
if '127.0.0.1:7777' in url:
header["Location"] = url
return [302, "video/MP2T", None, header]
r = requests.get(url, headers=header, stream=True)
return [206, "application/octet-stream", r.content]
def proxyMedia(self, params, forceRefresh=False):
_, dashinfos, _ = self.getDash(params)
if 'videoid' in params:
videoid = int(params['videoid'])
dashinfo = dashinfos['video'][videoid]
elif 'audioid' in params:
audioid = int(params['audioid'])
dashinfo = dashinfos['audio'][audioid]
else:
return [404, "text/plain", ""]
url = ''
urlList = [dashinfo['baseUrl']] + dashinfo['backupUrl'] if 'backupUrl' in dashinfo and dashinfo['backupUrl'] else [dashinfo['baseUrl']]
for url in urlList:
if 'mcdn.bilivideo.cn' not in url:
break
if url == "":
return [404, "text/plain", ""]
header = self.header.copy()
if 'range' in params:
header['Range'] = params['range']
r = requests.get(url, headers=header, stream=True)
return [206, "application/octet-stream", r.content]
def getDash(self, params, forceRefresh=False):
aid = params['aid']
cid = params['cid']
url = unquote(params['url'])
if 'thread' in params:
thread = params['thread']
else:
thread = 0
header = self.header.copy()
cookieDict = json.loads(params['cookies'])
key = f'bilivdmpdcache_{aid}_{cid}'
if forceRefresh:
self.delCache(key)
else:
data = self.getCache(key)
if data:
return data['content'], data['dashinfos'], data['type']
cookies = cookieDict.copy()
r = self.fetch(url, cookies=cookies, headers=header, timeout=5)
data = json.loads(self.cleanText(r.text))
if data['code'] != 0:
return '', {}, ''
if not 'dash' in data['data']:
purl = data['data']['durl'][0]['url']
try:
expiresAt = int(re.search(r'deadline=(\d+)', purl).group(1)) - 60
except:
expiresAt = int(time.time()) + 600
if int(thread) > 0:
try:
self.fetch('http://127.0.0.1:7777')
except:
self.fetch('http://127.0.0.1:9978/go')
purl = f'http://127.0.0.1:7777?url={quote(purl)}&thread={thread}'
self.setCache(key, {'content': purl, 'type': 'mp4', 'dashinfos': data['data'], 'expiresAt': expiresAt})
return purl, data['data'], 'mp4'
dashinfos = data['data']['dash']
duration = dashinfos['duration']
minBufferTime = dashinfos['minBufferTime']
videoinfo = ''
videoid = 0
deadlineList = []
for video in dashinfos['video']:
try:
deadline = int(re.search(r'deadline=(\d+)', video['baseUrl']).group(1))
except:
deadline = int(time.time()) + 600
deadlineList.append(deadline)
codecs = video['codecs']
bandwidth = video['bandwidth']
frameRate = video['frameRate']
height = video['height']
width = video['width']
void = video['id']
vidparams = params.copy()
vidparams['videoid'] = videoid
baseUrl = f'http://127.0.0.1:9978/proxy?do=py&type=media&cookies={quote(json.dumps(cookies))}&url={quote(url)}&aid={aid}&cid={cid}&videoid={videoid}'
videoinfo = videoinfo + f""" <Representation bandwidth="{bandwidth}" codecs="{codecs}" frameRate="{frameRate}" height="{height}" id="{void}" width="{width}">
<BaseURL>{baseUrl}</BaseURL>
<SegmentBase indexRange="{video['SegmentBase']['indexRange']}">
<Initialization range="{video['SegmentBase']['Initialization']}"/>
</SegmentBase>
</Representation>\n"""
videoid += 1
audioinfo = ''
audioid = 0
# audioList = sorted(dashinfos['audio'], key=lambda x: x['bandwidth'], reverse=True)
for audio in dashinfos['audio']:
try:
deadline = int(re.search(r'deadline=(\d+)', audio['baseUrl']).group(1))
except:
deadline = int(time.time()) + 600
deadlineList.append(deadline)
bandwidth = audio['bandwidth']
codecs = audio['codecs']
aoid = audio['id']
aidparams = params.copy()
aidparams['audioid'] = audioid
baseUrl = f'http://127.0.0.1:9978/proxy?do=py&type=media&cookies={quote(json.dumps(cookies))}&url={quote(url)}&aid={aid}&cid={cid}&audioid={audioid}'
audioinfo = audioinfo + f""" <Representation audioSamplingRate="44100" bandwidth="{bandwidth}" codecs="{codecs}" id="{aoid}">
<BaseURL>{baseUrl}</BaseURL>
<SegmentBase indexRange="{audio['SegmentBase']['indexRange']}">
<Initialization range="{audio['SegmentBase']['Initialization']}"/>
</SegmentBase>
</Representation>\n"""
audioid += 1
mpd = f"""<?xml version="1.0" encoding="UTF-8"?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" mediaPresentationDuration="PT{duration}S" minBufferTime="PT{minBufferTime}S">
<Period>
<AdaptationSet mimeType="video/mp4" startWithSAP="1" scanType="progressive" segmentAlignment="true">
{videoinfo.strip()}
</AdaptationSet>
<AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true" lang="und">
{audioinfo.strip()}
</AdaptationSet>
</Period>
</MPD>"""
expiresAt = min(deadlineList) - 60
self.setCache(key, {'type': 'mpd', 'content': mpd.replace('&', '&amp;'), 'dashinfos': dashinfos, 'expiresAt': expiresAt})
return mpd.replace('&', '&amp;'), dashinfos, 'mpd'
def getCookie(self, cookie):
if '{' in cookie and '}' in cookie:
cookies = json.loads(cookie)
else:
cookies = dict([co.strip().split('=', 1) for co in cookie.strip(';').split(';')])
bblogin = self.getCache('bblogin')
if bblogin:
imgKey = bblogin['imgKey']
subKey = bblogin['subKey']
return cookies, imgKey, subKey
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"
}
r = requests.get("http://api.bilibili.com/x/web-interface/nav", cookies=cookies, headers=header, timeout=10)
data = json.loads(r.text)
code = data["code"]
if code == 0:
imgKey = data['data']['wbi_img']['img_url'].rsplit('/', 1)[1].split('.')[0]
subKey = data['data']['wbi_img']['sub_url'].rsplit('/', 1)[1].split('.')[0]
self.setCache('bblogin', {'imgKey': imgKey, 'subKey': subKey, 'expiresAt': int(time.time()) + 1200})
return cookies, imgKey, subKey
r = self.fetch("https://www.bilibili.com/", headers=header, timeout=5)
cookies = r.cookies.get_dict()
imgKey = ''
subKey = ''
return cookies, imgKey, subKey
def getUserid(self, cookie):
# 获取自己的userid(cookies拥有者)
url = 'http://api.bilibili.com/x/space/myinfo'
r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
data = json.loads(self.cleanText(r.text))
if data['code'] == 0:
return data['data']['mid']
def removeHtmlTags(self, src):
from re import sub, compile
clean = compile('<.*?>')
return sub(clean, '', src)
def encWbi(self, params, imgKey, subKey):
from hashlib import md5
from functools import reduce
from urllib.parse import urlencode
mixinKeyEncTab = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52]
orig = imgKey + subKey
mixinKey = reduce(lambda s, i: s + orig[i], mixinKeyEncTab, '')[:32]
params['wts'] = round(time.time()) # 添加 wts 字段
params = dict(sorted(params.items())) # 按照 key 重排参数
# 过滤 value 中的 "!'()*" 字符
params = {
k: ''.join(filter(lambda chr: chr not in "!'()*", str(v)))
for k, v
in params.items()
}
query = urlencode(params) # 序列化参数
params['w_rid'] = md5((query + mixinKey).encode()).hexdigest() # 计算 w_rid
return params
retry = 0
header = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
"Referer": "https://www.bilibili.com"
}
+159
View File
@@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
# by @嗷呜
# 温馨提示:官方APP数据是错误的,你们可以给官方反馈,然后就可以写APP
import re
import sys
from Crypto.Hash import MD5
sys.path.append("..")
import json
import time
from pyquery import PyQuery as pq
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def action(self, action):
pass
def destroy(self):
pass
host = 'https://www.lreeok.vip'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'sec-ch-ua-platform': '"macOS"',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
'Origin': host,
'Referer': f"{host}/",
}
def homeContent(self, filter):
data = self.getpq(self.fetch(self.host, headers=self.headers).text)
result = {}
classes = []
for k in data('.head-more.box a').items():
i = k.attr('href')
if i and '/vod' in i:
classes.append({
'type_name': k.text(),
'type_id': re.search(r'\d+', i).group(0)
})
result['class'] = classes
result['list'] = self.getlist(data('.border-box.diy-center .public-list-div'))
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
body = {'type': tid, 'class': '', 'area': '', 'lang': '', 'version': '', 'state': '', 'letter': '', 'page': pg}
data = self.post(f"{self.host}/index.php/api/vod", headers=self.headers, data=self.getbody(body)).json()
result = {}
result['list'] = data['list']
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
data = self.getpq(self.fetch(f"{self.host}/voddetail/{ids[0]}.html", headers=self.headers).text)
v = data('.detail-info.lightSpeedIn .slide-info')
vod = {
'vod_year': v.eq(-1).text(),
'vod_remarks': v.eq(0).text(),
'vod_actor': v.eq(3).text(),
'vod_director': v.eq(2).text(),
'vod_content': data('.switch-box #height_limit').text()
}
np = data('.anthology.wow.fadeInUp')
ndata = np('.anthology-tab .swiper-wrapper .swiper-slide')
pdata = np('.anthology-list .anthology-list-box ul')
play, names = [], []
for i in range(len(ndata)):
n = ndata.eq(i)('a')
n('span').remove()
names.append(n.text())
vs = []
for v in pdata.eq(i)('li').items():
vs.append(f"{v.text()}${v('a').attr('href')}")
play.append('#'.join(vs))
vod["vod_play_from"] = "$$$".join(names)
vod["vod_play_url"] = "$$$".join(play)
result = {"list": [vod]}
return result
def searchContent(self, key, quick, pg="1"):
data = self.getpq(self.fetch(f"{self.host}/vodsearch/{key}----------{pg}---.html", headers=self.headers).text)
return {'list': self.getlist(data('.row-right .search-box .public-list-bj')), 'page': pg}
def playerContent(self, flag, id, vipFlags):
h, p = {"User-Agent": "okhttp/3.14.9"}, 1
url = f"{self.host}{id}"
data = self.getpq(self.fetch(url, headers=self.headers).text)
try:
jstr = data('.player .player-left script').eq(0).text()
jsdata = json.loads(jstr.split('aaa=')[-1])
body = {'url': jsdata['url']}
if not re.search(r'\.m3u8|\.mp4', body['url']):
data = self.post(f"{self.host}/okplay/api_config.php", headers=self.headers,
data=self.getbody(body)).json()
url = data.get('url') or data.get('data', {}).get('url')
p = 0
except Exception as e:
print('错误信息:', e)
pass
result = {}
result["parse"] = p
result["url"] = url
result["header"] = h
return result
def localProxy(self, param):
pass
def getbody(self, params):
t = int(time.time())
h = MD5.new()
h.update(f"DS{t}DCC147D11943AF75".encode('utf-8'))
key = h.hexdigest()
params.update({'time': t, 'key': key})
return params
def getlist(self, data):
videos = []
for i in data.items():
id = i('a').attr('href')
if id:
id = re.search(r'\d+', id).group(0)
img = i('img').attr('data-src')
if img and 'url=' in img: img = f'{self.host}{img}'
videos.append({
'vod_id': id,
'vod_name': i('img').attr('alt'),
'vod_pic': img,
'vod_remarks': i('.public-prt').text() or i('.public-list-prb').text()
})
return videos
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))
+94
View File
@@ -0,0 +1,94 @@
# coding=utf-8
# !/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def getName(self):
return "mp"
def init(self, extend=""):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
host = 'https://g.c494.com'
header = {
'User-Agent': 'Dart/2.10 (dart:io)',
'platform_version': 'RP1A.200720.011',
'version': '2.2.3',
'copyright': 'xiaogui',
'platform': 'android',
'client_name': '576O5p+P5b2x6KeG',
}
def homeContent(self, filter):
data = self.fetch(f'{self.host}/api.php/app/nav?token=', headers=self.header).json()
dy = {"class": "类型", "area": "地区", "lang": "语言", "year": "年份", "letter": "字母", "by": "排序",
"sort": "排序"}
filters = {}
classes = []
json_data = data["list"]
for item in json_data:
has_non_empty_field = False
jsontype_extend = item["type_extend"]
classes.append({"type_name": item["type_name"], "type_id": str(item["type_id"])})
for key in dy:
if key in jsontype_extend and jsontype_extend[key].strip() != "":
has_non_empty_field = True
break
if has_non_empty_field:
filters[str(item["type_id"])] = []
for dkey in jsontype_extend:
if dkey in dy and jsontype_extend[dkey].strip() != "":
values = jsontype_extend[dkey].split(",")
value_array = [{"n": value.strip(), "v": value.strip()} for value in values if
value.strip() != ""]
filters[str(item["type_id"])].append({"key": dkey, "name": dy[dkey], "value": value_array})
result = {}
result["class"] = classes
result["filters"] = filters
return result
def homeVideoContent(self):
rsp = self.fetch(f"{self.host}/api.php/app/index_video?token=", headers=self.header)
root = rsp.json()['list']
videos = [item for vodd in root for item in vodd['vlist']]
return {'list': videos}
def categoryContent(self, tid, pg, filter, extend):
parms = {"pg": pg, "tid": tid, "class": extend.get("class", ""), "area": extend.get("area", ""),
"lang": extend.get("lang", ""), "year": extend.get("year", ""), "token": ""}
data = self.fetch(f'{self.host}/api.php/app/video', params=parms, headers=self.header).json()
return data
def detailContent(self, ids):
parms = {"id": ids[0], "token": ""}
data = self.fetch(f'{self.host}/api.php/app/video_detail', params=parms, headers=self.header).json()
vod = data['data']
vod.pop('pause_advert_list', None)
vod.pop('init_advert_list', None)
vod.pop('vod_url_with_player', None)
return {"list": [vod]}
def searchContent(self, key, quick, pg='1'):
parms = {'pg': pg, 'text': key, 'token': ''}
data = self.fetch(f'{self.host}/api.php/app/search', params=parms, headers=self.header).json()
return data
def playerContent(self, flag, id, vipFlags):
return {"parse": 0, "url": id, "header": {'User-Agent': 'User-Agent: Lavf/58.12.100'}}
def localProxy(self, param):
pass
+172
View File
@@ -0,0 +1,172 @@
# coding=utf-8
# !/usr/bin/python
import sys
sys.path.append('')
from base.spider import Spider
from urllib.parse import quote
class Spider(Spider):
def getName(self):
return "xpg"
def init(self, extend=""):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def homeContent(self, filter):
data = self.fetch(
"{0}/api.php/v2.vod/androidtypes".format(self.host),
headers=self.header,
).json()
dy = {
"classes": "类型",
"areas": "地区",
"years": "年份",
"sortby": "排序",
}
filters = {}
classes = []
for item in data['data']:
has_non_empty_field = False
item['soryby'] = ['updatetime', 'hits', 'score']
demos = ['时间', '人气', '评分']
classes.append({"type_name": item["type_name"], "type_id": str(item["type_id"])})
for key in dy:
if key in item and len(item[key]) > 1:
has_non_empty_field = True
break
if has_non_empty_field:
filters[str(item["type_id"])] = []
for dkey in item:
if dkey in dy and len(item[dkey]) > 1:
values = item[dkey]
value_array = [
{"n": demos[idx] if dkey == "sortby" else value.strip(), "v": value.strip()}
for idx, value in enumerate(values)
if value.strip() != ""
]
filters[str(item["type_id"])].append(
{"key": dkey, "name": dy[dkey], "value": value_array}
)
result = {}
result["class"] = classes
result["filters"] = filters
return result
host = "http://item.xpgtv.com"
header = {
'User-Agent': 'okhttp/3.12.11',
'token': 'ElEDlwCVgXcFHFhddiq2JKteHofExRBUrfNlmHrWetU3VVkxnzJAodl52N9EUFS+Dig2A/fBa/V9RuoOZRBjYvI+GW8kx3+xMlRecaZuECdb/3AdGkYpkjW3wCnpMQxf8vVeCz5zQLDr8l8bUChJiLLJLGsI+yiNskiJTZz9HiGBZhZuWh1mV1QgYah5CLTbSz8=',
'token2': 'a0kEsBKRgTkBZ29NZ3WcNKN/C4T00RN/hNkmmGa5JMBeEENnqydLoetm/t8=',
'user_id': 'XPGBOX',
'version': 'XPGBOX com.phoenix.tv1.5.3',
'timestamp': '1732286435',
'hash': 'd9ab',
}
def homeVideoContent(self):
rsp = self.fetch("{0}/api.php/v2.main/androidhome".format(self.host), headers=self.header)
root = rsp.json()['data']['list']
videos = []
for vodd in root:
for vod in vodd['list']:
videos.append({
"vod_id": vod['id'],
"vod_name": vod['name'],
"vod_pic": vod['pic'],
"vod_remarks": vod['score']
})
result = {
'list': videos
}
return result
def categoryContent(self, tid, pg, filter, extend):
parms = []
parms.append(f"page={pg}")
parms.append(f"type={tid}")
if extend.get('areas'):
parms.append(f"area={quote(extend['areaes'])}")
if extend.get('years'):
parms.append(f"year={quote(extend['yeares'])}")
if extend.get('sortby'):
parms.append(f"sortby={extend['sortby']}")
if extend.get('classes'):
parms.append(f"class={quote(extend['classes'])}")
parms = "&".join(parms)
result = {}
url = '{0}/api.php/v2.vod/androidfilter10086?{1}'.format(self.host, parms)
rsp = self.fetch(url, headers=self.header)
root = rsp.json()['data']
videos = []
for vod in root:
videos.append({
"vod_id": vod['id'],
"vod_name": vod['name'],
"vod_pic": vod['pic'],
"vod_remarks": vod['score']
})
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
id = ids[0]
url = '{0}/api.php/v3.vod/androiddetail2?vod_id={1}'.format(self.host, id)
rsp = self.fetch(url, headers=self.header)
root = rsp.json()['data']
node = root['urls']
d = [it['key'] + "$" + f"http://c.xpgtv.net/m3u8/{it['url']}.m3u8" for it in node]
vod = {
"vod_name": root['name'],
'vod_play_from': '小苹果',
'vod_play_url': '#'.join(d),
}
print(vod)
result = {
'list': [
vod
]
}
return result
def searchContent(self, key, quick, pg='1'):
url = '{0}/api.php/v2.vod/androidsearch10086?page={1}&wd={2}'.format(self.host, pg, key)
rsp = self.fetch(url, headers=self.header)
root = rsp.json()['data']
videos = []
for vod in root:
videos.append({
"vod_id": vod['id'],
"vod_name": vod['name'],
"vod_pic": vod['pic'],
"vod_remarks": vod['score']
})
result = {
'list': videos
}
return result
def playerContent(self, flag, id, vipFlags):
result = {}
result["parse"] = 0
result["url"] = id
result["header"] = self.header
return result
def localProxy(self, param):
pass
+72
View File
@@ -0,0 +1,72 @@
#coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self,extend=""):
self.base_url='http://api.hclyz.com:81/mf'
def homeContent(self,filter):
classes = [{"type_name": "色播聚合","type_id":"/json.txt"}]
result = {"class": classes}
return result
def categoryContent(self,tid,pg,filter,extend):
home = self.fetch(f'{self.base_url}/json.txt').json()
data = home.get("pingtai")[1:]
videos = [
{
"vod_id": "/" + item['address'],
"vod_name": item['title'],
"vod_pic": item['xinimg'].replace("http://cdn.gcufbd.top/img/",
"https://slink.ltd/https://raw.githubusercontent.com/fish2018/lib/refs/heads/main/imgs/"),
"vod_remarks": item['Number'],
"style": {"type": "rect", "ratio": 1.33}
} for item in sorted(data, key=lambda x: int(x['Number']), reverse=True)
]
result = {
"page": pg,
"pagecount": 1,
"limit": len(videos),
"total": len(videos),
"list": videos
}
return result
def detailContent(self,array):
id = array[0]
data = self.fetch(f'{self.base_url}/{id}').json()
zhubo = data['zhubo']
playUrls = '#'.join([f"{vod['title']}${vod['address']}" for vod in zhubo])
vod = [{
"vod_play_from": 'sebo',
"vod_play_url": playUrls,
"vod_content": 'https://github.com/fish2018',
}]
result = {"list": vod}
return result
def playerContent(self,flag,id,vipFlags):
result = {
'parse': 0,
'url': id
}
return result
def getName(self):
return '色播聚合'
def homeVideoContent(self):
pass
def isVideoFormat(self,url):
pass
def manualVideoCheck(self):
pass
def searchContent(self,key,quick):
pass
def destroy(self):
pass
def localProxy(self, param):
pass
+591
View File
@@ -0,0 +1,591 @@
import json
import requests
import warnings
import re
import os
import time
from urllib3.exceptions import InsecureRequestWarning
from copy import deepcopy
from concurrent.futures import ThreadPoolExecutor
# 自定义 jsm.json 的路径或网络地址,留空则使用当前目录下的 jsm.json
jsm_file_path = ""
# 读取 jsm.json 文件
jsm_data = {}
if jsm_file_path:
if jsm_file_path.startswith(("http://", "https://")):
try:
response = requests.get(jsm_file_path)
jsm_data = response.json()
except Exception as e:
print(f"从网络读取 jsm.json 配置文件失败: {str(e)}")
else:
if os.path.exists(jsm_file_path):
try:
with open(jsm_file_path, 'r', encoding='utf-8') as f:
jsm_data = json.load(f)
except Exception as e:
print(f"读取本地 jsm.json 配置文件失败: {str(e)}")
else:
print(f"本地 jsm.json 文件 {jsm_file_path} 不存在")
else:
local_path = os.path.join(os.getcwd(), 'jsm.json')
if os.path.exists(local_path):
try:
with open(local_path, 'r', encoding='utf-8') as f:
jsm_data = json.load(f)
except Exception as e:
print(f"读取默认 jsm.json 配置文件失败: {str(e)}")
else:
print("默认的 jsm.json 文件不存在")
# 站点映射关系
site_mappings = {
'立播': 'libo', '闪电':'shandian', '欧哥': 'ouge', '小米': 'xiaomi', '多多': 'duoduo',
'蜡笔': 'labi', '至臻': 'zhizhen', '木偶':'mogg', '六趣': 'liuqu', '虎斑': 'huban',
'下饭': 'xiafan', '玩偶': 'wogg', '星剧社':'star2', '二小': 'xhww'
}
# 代理配置
proxy_config = {
"enabled": False,
"proxies": {
"http": "http://127.0.0.1:7890",
"https": "http://127.0.0.1:7890"
}
}
# 文件路径配置
file_path_config = {
"input_dir": "",
"output_dir": ""
}
# 新增jsm映射配置
jsm_mapping = {
"Libvio": "libo",
"Xiaomi": "xiaomi",
"yydsys": "duoduo",
"蜡笔网盘": "labi",
"玩偶 | 蜡笔": "labi",
"至臻|网盘": "zhizhen",
"Huban": "huban",
"Wogg": "wogg",
"Mogg": "mogg",
"玩偶 | 闪电uc": "shandian",
"玩偶 | 二小": "xhww",
"玩偶 | 小米": "xiaomi",
"玩偶 | 多多": "duoduo",
"玩偶 | 木偶": "mogg",
"玩偶gg": "wogg",
"星剧社": "star2"
}
# 需要拼接搜索路径的站点配置
search_path_config = {
'闪电': '/index.php/vod/search.html?wd=仙台有树',
'欧哥': '/index.php/vod/search.html?wd=仙台有树',
'小米': '/index.php/vod/search.html?wd=仙台有树',
'多多': '/index.php/vod/search.html?wd=仙台有树',
'蜡笔': '/index.php/vod/search.html?wd=仙台有树',
'至臻': '/index.php/vod/search.html?wd=仙台有树',
'六趣': '/index.php/vod/search.html?wd=仙台有树',
'虎斑': '/index.php/vod/search.html?wd=仙台有树',
'下饭': '/index.php/vod/search.html?wd=仙台有树',
'玩偶': '/vodsearch/-------------.html?wd=仙台有树',
'木偶': '/index.php/vod/search.html?wd=仙台有树',
'二小': '/index.php/vod/search.html?wd=仙台有树',
'立播': '/search/-------------.html?wd=仙台有树&submit='
}
# 定义需要校验关键字的站点及其关键字
keyword_required_sites = {
'闪电': 'class="search-stat"',
'欧哥': 'class="search-stat"',
'小米': 'class="search-stat"',
'多多': 'class="search-stat"',
'蜡笔': 'class="search-stat"',
'至臻': 'class="search-stat"',
'六趣': 'class="search-stat"',
'虎斑': 'class="search-stat"',
'下饭': 'class="search-stat"',
'玩偶': 'class="search-stat"',
'木偶': 'class="search-stat"',
'二小': 'class="search-stat"',
'立播': 'class="stui-screen"'
}
# 新增可选的URL加权配置,默认权重为50
url_weight_config = {
"木偶": {
"https://aliii.deno.dev": 60,
"http://149.88.87.72:5666": 60
},
"至臻": {
"http://www.xhww.net": 10,
"http://xhww.net": 10
},
"立播": {
"https://libvio.mov": 60,
"https://www.libvio.cc": 60
}
}
# 兜底URL配置
fallback_url_config = {
"立播": [
"https://libvio.mov",
"https://www.libvio.cc",
"https://libvio.la",
"https://libvio.pro",
"https://libvio.fun",
"https://libvio.me",
"https://libvio.in",
"https://libvio.site",
"https://libvio.art",
"https://libvio.com",
"https://libvio.vip",
"https://libvio.pw",
"https://libvio.link"
],
"闪电": [
"http://1.95.79.193",
"http://1.95.79.193:666"
],
"欧哥": [
"https://woog.nxog.eu.org"
],
"小米": [
"http://www.54271.fun",
"https://www.milvdou.fun",
"http://www.54271.fun",
"https://www.mucpan.cc",
"https://mucpan.cc",
"http://milvdou.fun"
],
"多多": [
"https://tv.yydsys.top",
"https://tv.yydsys.cc",
"https://tv.214521.xyz",
"http://155.248.200.65"
],
"蜡笔": [
"http://feimaoai.site",
"https://feimao666.fun",
"http://feimao888.fun"
],
"至臻": [
"https://mihdr.top",
"http://www.miqk.cc",
"http://www.xhww.net",
"http://xhww.net",
"https://xiaomiai.site"
],
"六趣": [
"https://wp.0v.fit"
],
"虎斑": [
"http://103.45.162.207:20720"
],
"下饭": [
"http://txfpan.top",
"http://www.xn--ghqy10g1w0a.xyz"
],
"玩偶": [
"https://wogg.xxooo.cf",
"https://wogg.333232.xyz",
"https://www.wogg.one",
"https://www.wogg.lol",
"https://www.wogg.net"
],
"木偶": [
"https://tv.91muou.icu",
"https://mo.666291.xyz",
"https://mo.muouso.fun",
"https://aliii.deno.dev",
"http://149.88.87.72:5666"
],
"星剧社": [
"https://mlink.cc/520TV"
],
"二小": [
"https://xhww.net",
"https://www.xhww.net"
]
}
# 全局状态
last_site = None
def log_message(message, site_name=None, step="", max_error_length=80):
"""格式化日志打印"""
global last_site
status_emojis = {
'[开始]': '🚀', '[成功]': '', '[完成]': '🎉', '[失败]': '',
'[超时]': '', '[警告]': '⚠️', '[错误]': '🚨', '[信息]': '',
'[选择]': '🔍', '[连接失败]': '🔌'
}
if site_name and site_name != last_site:
print(f"\n{'' + '=' * 38 + ''}")
print(f"🌐 [站点: {site_name}]")
print(f"{'' + '=' * 38 + ''}")
last_site = site_name
for status, emoji in status_emojis.items():
if status in message:
message = message.replace(status, f"{status} {emoji}")
break
else:
message = f"{message} 📢"
# 截断过长的错误信息
if "[连接失败]" in message or "[错误]" in message:
if len(message) > max_error_length:
message = message[:max_error_length] + "..."
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] [{step}] {message}") if step else print(message)
def test_url(url, site_name=None):
"""增强版URL测试函数"""
search_path = search_path_config.get(site_name)
test_url = url.strip() + search_path if search_path else url.strip()
keyword = keyword_required_sites.get(site_name)
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(max_retries=2)
session.mount('http://', adapter)
session.mount('https://', adapter)
try:
# 直接请求测试
response = session.get(
test_url,
timeout=7,
verify=False,
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
)
if response.status_code == 200:
latency = response.elapsed.total_seconds()
has_keyword = keyword in response.text if keyword else True
log_msg = f"直接访问成功 | 延迟: {latency:.2f}s"
if keyword:
log_msg += f" | 关键字: {'' if has_keyword else ''}"
log_message(f"[成功] {test_url} {log_msg}", site_name, "URL测试")
return latency, has_keyword
log_message(f"[失败] HTTP状态码 {response.status_code}", site_name, "URL测试")
return None, None
except requests.RequestException as e:
error_type = "[超时]" if isinstance(e, requests.Timeout) else "[连接失败]"
log_message(f"{error_type} {str(e)}", site_name, "URL测试")
# 代理重试逻辑
if proxy_config["enabled"]:
try:
response = session.get(
test_url,
timeout=7,
verify=False,
proxies=proxy_config["proxies"],
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
)
if response.status_code == 200:
latency = response.elapsed.total_seconds()
has_keyword = keyword in response.text if keyword else True
log_message(f"[成功] 代理访问成功 | 延迟: {latency:.2f}s | 关键字: {'' if has_keyword else ''}",
site_name, "URL测试")
return latency, has_keyword
except Exception as proxy_e:
log_message(f"[失败] 代理访问错误: {str(proxy_e)}", site_name, "URL测试")
return None, None
def get_best_url(urls, site_name=None, existing_url=None):
"""优化后的URL选择算法"""
if not isinstance(urls, list):
return urls
weights = url_weight_config.get(site_name, {})
default_weight = 50
sorted_urls = sorted([(url, weights.get(url, default_weight)) for url in urls],
key=lambda x: -x[1])
def test_single_url(url_weight):
url, weight = url_weight
latency, has_keyword = test_url(url, site_name)
if latency is not None:
return {
"url": url,
"latency": latency,
"has_keyword": has_keyword,
"weight": weight,
"score": (weight * 0.6) + ((1 / (latency + 0.1)) * 40)
}
return None
with ThreadPoolExecutor() as executor:
candidates = [result for result in executor.map(test_single_url, sorted_urls) if result]
if not candidates:
log_message(f"[警告] 无可用URL,使用现有配置: {existing_url}" if existing_url else
"[错误] 无可用URL且无历史配置", site_name, "URL选择")
return existing_url if existing_url else None
# 按评分排序:关键字存在 > 评分 > 延迟
sorted_candidates = sorted(candidates,
key=lambda x: (-x['has_keyword'], -x['score'], x['latency']))
log_message("候选URL评估结果:\n" + "\n".join(
[f"{item['url']} | 权重:{item['weight']} 延迟:{item['latency']:.2f}s 评分:{item['score']:.1f}"
for item in sorted_candidates]), site_name, "URL选择")
best = sorted_candidates[0]
log_message(f"[选择] 最优URL: {best['url']} (评分: {best['score']:.1f})", site_name, "URL选择")
return best['url']
def get_star2_real_url(source_url):
"""改进的星剧社真实URL提取"""
try:
response = requests.get(
source_url,
timeout=8,
verify=False,
headers={'Referer': 'https://mlink.cc/'}
)
if response.status_code == 200:
# 增强版正则匹配
match = re.search(
r'''(?i)(?:href|src|data-?url)=["'](https?://[^"']*?star2\.cn[^"']*)["']''',
response.text
)
if match:
real_url = match.group(1).strip().rstrip('/')
log_message(f"[成功] 提取真实链接: {real_url}", "星剧社", "链接解析")
return real_url
log_message("[失败] 未找到有效链接", "星剧社", "链接解析")
except Exception as e:
log_message(f"[错误] 解析失败: {str(e)}", "星剧社", "链接解析")
return None
def merge_url_data(*dicts):
"""数据合并去重"""
merged = {}
for d in dicts:
if not d: continue
for site, urls in d.items():
merged.setdefault(site, []).extend(urls if isinstance(urls, list) else [urls])
return {k: list(dict.fromkeys(v)) for k, v in merged.items()}
def get_file_path(filename, is_input=True):
"""路径处理函数"""
base_dir = file_path_config.get("input_dir" if is_input else "output_dir", "")
return os.path.join(base_dir or os.getcwd(), filename)
def load_existing_config():
"""加载现有url.json配置"""
url_path = get_file_path('url.json')
if os.path.exists(url_path):
try:
with open(url_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
log_message(f"[错误] 读取现有配置失败: {str(e)}", step="配置加载")
return {}
def get_api_urls():
"""从本地文件获取链接"""
API_FILE_PATH = get_file_path('url.json')
try:
with open(API_FILE_PATH, 'r', encoding='utf-8') as f:
api_data = json.load(f)
print("成功读取 url.json 文件")
# 基于 jsm_mapping 生成 url_mapping
url_mapping = {key: api_data.get(value) for key, value in jsm_mapping.items()}
print("生成的 url_mapping:", url_mapping)
return url_mapping
except FileNotFoundError:
print("未找到 url.json 文件,请检查文件路径。")
except json.JSONDecodeError:
print("url.json 文件格式错误,请检查文件内容。")
return {}
def replace_urls(data, urls):
"""替换 JSON 数据中的 URL"""
# 根据 jsm_mapping 转换 api_urls
api_urls = {
jsm_key: urls.get(jsm_value)
for jsm_key, jsm_value in jsm_mapping.items()
}
sites = data.get('sites', [])
replaced_count = 0
for item in sites:
if isinstance(item, dict):
key = item.get('key')
ext = item.get('ext')
new_url = api_urls.get(key)
old_url = None
if new_url and isinstance(ext, str):
parts = ext.split('$$$')
if len(parts) > 1 and parts[1].strip().startswith('http'):
old_url = parts[1]
parts[1] = new_url
item['ext'] = '$$$'.join(parts)
replaced_count += 1
print(f"成功替换 {key} 的链接: {old_url} -> {new_url}")
if 'url' in item:
del item['url'] # 删除 url 字段
if old_url and not new_url:
print(f"未成功替换 {key} 的链接,原链接: {old_url}")
else:
print(f"跳过非字典类型的 item: {item}")
print(f"总共替换了 {replaced_count} 个链接。")
return data
def update_jsm_config(urls):
"""更新jsm.json配置文件中的URL"""
global jsm_data
if not jsm_data:
log_message("[错误] jsm_data 为空,无法更新配置", step="配置更新")
return False
updated_jsm_data = replace_urls(deepcopy(jsm_data), urls)
try:
jsm_output_path = get_file_path('jsm.json', is_input=False)
os.makedirs(os.path.dirname(jsm_output_path), exist_ok=True)
with open(jsm_output_path, 'w', encoding='utf-8') as f:
json.dump(updated_jsm_data, f, ensure_ascii=False, indent=4)
log_message("[完成] jsm.json 配置文件更新成功", step="配置更新")
return True
except Exception as e:
log_message(f"[错误] 更新 jsm.json 配置文件失败: {str(e)}", step="配置更新")
return False
def process_urls():
"""核心处理流程"""
log_message("[开始] 启动URL更新流程", step="主流程")
# 加载现有配置
existing_config = load_existing_config()
reverse_site_mapping = {v: k for k, v in site_mappings.items()}
# 数据源处理
data_sources = []
try:
remote_data = requests.get(
'https://github.catvod.com/https://raw.githubusercontent.com/celin1286/xiaosa/main/yuan.json',
timeout=10
).json()
data_sources.append(remote_data)
log_message("[成功] 远程数据加载完成", step="数据收集")
except Exception as e:
log_message(f"[错误] 远程数据获取失败: {str(e)}", step="数据收集")
local_path = get_file_path('yuan.json')
if os.path.exists(local_path):
try:
with open(local_path, 'r', encoding='utf-8') as f:
data_sources.append(json.load(f))
log_message("[成功] 本地数据加载完成", step="数据收集")
except Exception as e:
log_message(f"[错误] 本地数据读取失败: {str(e)}", step="数据收集")
data_sources.append(fallback_url_config)
merged_data = merge_url_data(*data_sources)
# 结果存储
result = {'url': {}}
stats = {'total': 0,'success': 0, 'failed': [], 'changed': []}
for cn_name, urls in merged_data.items():
stats['total'] += 1
site_key = site_mappings.get(cn_name)
existing_url = existing_config.get(site_key, '')
if cn_name == '星剧社':
best_source = get_best_url(urls, cn_name, existing_url)
final_url = get_star2_real_url(best_source) if best_source else existing_url
else:
final_url = get_best_url(urls, cn_name, existing_url) or existing_url
if final_url:
result['url'][site_key] = final_url
if existing_url and existing_url != final_url:
stats['changed'].append(f"{cn_name}: {existing_url}{final_url}")
log_message(f"[更新] 配置变更检测", cn_name, "结果处理")
stats['success'] += 1
else:
stats['failed'].append(cn_name)
log_message("[警告] 无可用URL", cn_name, "结果处理")
# 文件保存
output_files = {
'yuan.json': merged_data,
'url.json': result['url']
}
for filename, data in output_files.items():
try:
path = get_file_path(filename, is_input=False)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
log_message(f"[成功] 保存文件: {path}", step="数据持久化")
except Exception as e:
log_message(f"[错误] 文件保存失败: {str(e)}", step="数据持久化")
# 新增jsm更新流程
log_message("[开始] 启动jsm配置更新", step="主流程")
update_success = update_jsm_config(result['url'])
log_message(
f"[{'成功' if update_success else '失败'}] jsm配置更新完成",
step="主流程"
)
# 统计报告
log_message(
f"[完成] 处理结果: {stats['success']}/{stats['total']} 成功\n"
f"url.json变更项 ({len(stats['changed'])}):\n" + "\n".join(stats['changed']) + "\n"
f"url.json失败项 ({len(stats['failed'])}): {', '.join(stats['failed']) if stats['failed'] else ''}",
step="统计报告"
)
return stats['success'] > 0
def main():
warnings.simplefilter('ignore', InsecureRequestWarning)
process_urls()
if __name__ == "__main__":
start_time = time.time()
main()
elapsed = time.time() - start_time
print(f"总耗时: {elapsed:.2f}")
+276
View File
@@ -0,0 +1,276 @@
# coding=utf-8
# !/usr/bin/python
# by嗷呜
import json
import sys
from base64 import b64decode, b64encode
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.proxy = ''
if extend and json.loads(extend).get('proxy'):
self.proxy = json.loads(extend).get('proxy')
self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
self.session = Session()
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter):
result = {}
cateManual = {
"4K": "/4k",
"国产": "two_click_/categories/chinese",
"最新": "/newest",
"最佳": "/best",
"频道": "/channels",
"类别": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
if k != '4K': filters[cateManual[k]] = [{'key': 'type', 'name': '类型', 'value': [{'n': '4K', 'v': '/4k'}]}]
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.getpq()
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item"))}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid:
if 'two_click_' in tid: tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}{extend.get("type", "")}/{pg}')
vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item"))
elif tid == '/channels':
data = self.getpq(f'{tid}/{pg}')
jsdata = self.getjsdata(data)
for i in jsdata['channels']:
vdata.append({
'vod_id': f"two_click_" + i.get('channelURL'),
'vod_name': i.get('channelName'),
'vod_pic': i.get('siteLogoURL'),
'vod_year': f'videos:{i.get("videoCount")}',
'vod_tag': 'folder',
'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/categories':
result['pagecount'] = pg
data = self.getpq(tid)
self.cdata = self.getjsdata(data)
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
vdata.append({
'vod_id': "one_click_" + i.get('id'),
'vod_name': i.get('name'),
'vod_pic': '',
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}/{pg}')
pdata = self.getjsdata(data)
for i in pdata['pagesPornstarsComponent']['pornstarListProps']['pornstars']:
vdata.append({
'vod_id': f"two_click_" + i.get('pageURL'),
'vod_name': i.get('name'),
'vod_pic': i.get('imageThumbUrl'),
'vod_remarks': i.get('translatedCountryName'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif 'one_click' in tid:
result['pagecount'] = pg
tid = tid.split('click_')[-1]
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
if i.get('id') == tid:
for j in i['items']:
vdata.append({
'vod_id': f"two_click_" + j.get('url'),
'vod_name': j.get('name'),
'vod_pic': j.get('thumb'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.getpq(ids[0])
djs = self.getjsdata(data)
vn = data('meta[property="og:title"]').attr('content')
dtext = data('#video-tags-list-container')
href = dtext('a').attr('href')
title = dtext('span[class*="body-bold-"]').eq(0).text()
pdtitle = ''
if href:
pdtitle = '[a=cr:' + json.dumps({'id': 'two_click_' + href, 'name': title}) + '/]' + title + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': data('.rb-new__info').text(),
'vod_play_from': 'Xhamster',
'vod_play_url': ''
}
try:
plist = []
d = djs['xplayerSettings']['sources']
f = d.get('standard')
def get_sort_key(url):
quality = url.split('$')[0]
number = ''.join(filter(str.isdigit, quality))
number = int(number) if number else 0
return -number, quality
if f:
for key, value in f.items():
if isinstance(value, list):
for info in value:
id = self.e64(f'{0}@@@@{info.get("url") or info.get("fallback")}')
plist.append(f"{info.get('label') or info.get('quality')}${id}")
plist.sort(key=get_sort_key)
if d.get('hls'):
for format_type, info in d['hls'].items():
if url := info.get('url'):
encoded = self.e64(f'{0}@@@@{url}')
plist.append(f"{format_type}${encoded}")
except Exception as e:
plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"]
print(f"获取视频信息失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/search/{key}?page={pg}')
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@')
return {'parse': int(ids[0]), 'url': f'{self.proxy}{ids[1]}', 'header': headers}
def localProxy(self, param):
pass
def gethost(self):
try:
response = self.fetch(f'{self.proxy}https://xhamster.com', headers=self.headers, allow_redirects=False)
return response.headers['Location']
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://zn.xhamster.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('.role-pop').attr('href'),
'vod_name': i('.video-thumb-info a').text(),
'vod_pic': i('.role-pop img').attr('src'),
'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0],
'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path=''):
h = '' if path.startswith('http') else self.host
response = self.session.get(f'{self.proxy}{h}{path}').text
try:
return pq(response)
except Exception as e:
print(f"{str(e)}")
return pq(response.encode('utf-8'))
def getjsdata(self, data):
vhtml = data("script[id='initials-script']").text()
jst = json.loads(vhtml.split('initials=')[-1][:-1])
return jst