diff --git a/JS/lib/util.ym.js b/JS/lib/util.ym.js new file mode 100644 index 0000000..489c7ea --- /dev/null +++ b/JS/lib/util.ym.js @@ -0,0 +1,158 @@ +import 'assets://js/lib/uri.min.js' +import cheerio from 'assets://js/lib/cheerio.min.js'; +import 'assets://js/lib/crypto-js.js' + +var charStr = 'abacdefghjklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ0123456789'; +export function randIndex(min, max, i) { + let index = Math.floor(Math.random() * (max - min + 1) + min), + numStart = charStr.length - 10; + if (i == 0 && index >= numStart) { + index = randIndex(min, max, i); + } + return index; +} + +export function randomStr(len) { + let min = 0, max = charStr.length - 1, _str = ''; + len = len || 15; + for (var i = 0, index; i < len; i++) { + index = randIndex(min, max, i); + _str += charStr[index]; + } + return _str; +} + +export function urljoin(base, url) { + base = base || ''; + url = url || ''; + let baseU = new Uri(base.trim().rstrip('/')); + url = url.trim().rstrip('/'); + let u = undefined; + if (url.startsWith('http://') || url.startsWith('https://')) { + u = new Uri(url); + } else if (url.startsWith('://')) { + u = new Uri(baseU.protocol() + url); + } else if (url.startsWith('//')) { + u = new Uri(baseU.protocol() + ':' + url); + } else { + u = new Uri(baseU.protocol() + '://' + baseU.host() + (baseU.port() ? ':' + baseU.port() : '') + '/' + url); + } + if ((!u.path() || u.path().trim().length === 0) && baseU.path()) + u.path(baseU.path()); + if (!u.query() && baseU.query()) + u.query(baseU.query()); + return u.toString(); +} + +const DOM_CHECK_ATTR = /(url|src|href|data-original|data-src)$/; +const SELECT_REGEX = /:eq|:lt|:gt|#/g; +const SELECT_REGEX_A = /:eq|:lt|:gt/g; + +export function pdfh(html, parse, base_url) { + if (!parse || !parse.trim()) { + return '' + } + let eleFind = typeof html === 'object'; + let option = undefined; + if (eleFind && parse.startsWith('body&&')) { + parse = parse.substr(6); + if (parse.indexOf('&&') < 0) { + option = parse.trim(); + parse = '*=*'; + } + } + if (parse.indexOf('&&') > -1) { + let sp = parse.split('&&'); + option = sp[sp.length - 1]; + sp.splice(sp.length - 1); + if (sp.length > 1) { + for (let i in sp) { + if (!SELECT_REGEX.test(sp[i])) { + sp[i] = sp[i] + ':eq(0)'; + } + } + } else { + if (!SELECT_REGEX.test(sp[0])) { + sp[0] = sp[0] + ':eq(0)'; + } + } + parse = sp.join(' '); + } + let result = ''; + const $ = eleFind ? html.rr : cheerio.load(html); + let ret = eleFind ? ((parse === '*=*' || $(html.ele).is(parse)) ? html.ele : $(html.ele).find(parse)) : $(parse); + if (option) { + if (option === 'Text') { + result = $(ret).text(); + } + else if (option === 'Html') { + result = $(ret).html(); + } + else { + result = $(ret).attr(option); + } + if (result && base_url && DOM_CHECK_ATTR.test(option)) { + if (/http/.test(result)) { + result = result.substr(result.indexOf('http')); + } else { + result = urljoin(base_url, result) + } + } + } else { + result = $(ret).toString(); + } + return result; +} + +export function pdfa(html, parse) { + if (!parse || !parse.trim()) { + return []; + } + let eleFind = typeof html === 'object'; + if (parse.indexOf('&&') > -1) { + let sp = parse.split('&&'); + for (let i in sp) { + if (!SELECT_REGEX_A.test(sp[i]) && i < sp.length - 1) { + sp[i] = sp[i] + ':eq(0)'; + } + } + parse = sp.join(' '); + } + const $ = eleFind ? html.rr : cheerio.load(html); + let ret = eleFind ? ($(html.ele).is(parse) ? html.ele : $(html.ele).find(parse)) : $(parse); + let result = []; + if (ret) { + ret.each(function (idx, ele) { + result.push({ rr: $, ele: ele }); + }); + } + return result; +} + +const defaultParser = { + pdfh:pdfh, + pdfa:pdfa, + pd(html,parse,uri){ + let ret = this.pdfh(html,parse); + if(typeof(uri)==='undefined'||!uri){ + uri = ''; + } + if(DOM_CHECK_ATTR.test(parse)){ + if(/http/.test(ret)){ + ret = ret.substr(ret.indexOf('http')); + }else{ + ret = urljoin(MY_URL,ret) + } + } + return ret + }, +}; + +globalThis.randIndex = randIndex; +globalThis.randomStr = randomStr; +globalThis.urljoin = urljoin; +globalThis.joinUrl = urljoin; +globalThis.defaultParser = defaultParser; +globalThis.pdfa = defaultParser.pdfa; +globalThis.pdfh = defaultParser.pdfh; +globalThis.pd = defaultParser.pd;