Начало обработки по ссылкам

This commit is contained in:
2024-09-04 16:12:03 +03:00
parent 48b4d6cccf
commit 04374fef40
10 changed files with 633 additions and 103 deletions

View File

@ -68,4 +68,14 @@ class ContingentFacade
$con['org_id'] = $orgId;
}
}
/**
* Summary of getContingentFromLink
* @param string $html
* @return array
*/
// public function getContingentFromLink(string $html): array
// {
// $parser = new ContingentParser($html);
// $
// }
}

View File

@ -7,6 +7,7 @@ use DOMXPath;
class ContingentParser
{
private ?DOMXPath $xpath;
private DOMDocument $dom;
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
private const ENCODING = "UTF-8";
private const FIELDS = [
@ -20,15 +21,15 @@ class ContingentParser
public function __construct(string $html)
{
libxml_use_internal_errors(true);
$dom = new DOMDocument(
$this->dom = new DOMDocument(
encoding: self::ENCODING
);
if (empty($html)) {
$this->xpath = null;
} else {
$this->setEncoding($html);
$dom->loadHTML($html);
$this->xpath = new DOMXPath($dom);
$this->dom->loadHTML($html);
$this->xpath = new DOMXPath($this->dom);
}
}
@ -45,32 +46,11 @@ class ContingentParser
}
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
}
private function parse() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getDataTable() : array
{
if (empty($this->xpath)) return [];
$data = $this->parse();
$data = $this->parseContingent();
$records = [];
if ($data == null) return [];
@ -96,4 +76,32 @@ class ContingentParser
}
return $records;
}
private function parseContingent() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getLink(): string
{
$data = $this->dom->getElementsByTagName('a');
var_dump($data->item(0)->getAttribute('href'));
return '';
}
}