sveden-parser/ContingentParser/Parser/ContingentParser.php

107 lines
3.1 KiB
PHP
Raw Normal View History

2024-08-08 12:32:27 +02:00
<?php
namespace ContingentParser\Parser;
2024-08-08 12:32:27 +02:00
use DOMDocument;
use DOMXPath;
2024-08-08 12:32:27 +02:00
class ContingentParser
2024-08-08 12:32:27 +02:00
{
private ?DOMXPath $xpath;
private DOMDocument $dom;
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
private const ENCODING = "UTF-8";
2024-08-08 12:32:27 +02:00
private const FIELDS = [
"eduCode" => "td",
"eduName" => "td",
"eduLevel" => "td",
"eduForm" => "td",
"numberAll" => ["th", "td"]
2024-08-08 12:32:27 +02:00
];
public function __construct(string $html)
2024-08-08 12:32:27 +02:00
{
libxml_use_internal_errors(true);
$this->dom = new DOMDocument(
encoding: self::ENCODING
);
if (empty($html)) {
$this->xpath = null;
} else {
$this->setEncoding($html);
$this->dom->loadHTML($html);
$this->xpath = new DOMXPath($this->dom);
}
}
private function setEncoding(string &$html) : void
{
$encoding = mb_detect_encoding($html, 'UTF-8, windows-1251');
if ($encoding != self::ENCODING) {
$html = mb_convert_encoding(
$html,
self::ENCODING,
$encoding
);
$html = str_replace('windows-1251',self::ENCODING, $html);
}
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
2024-08-08 12:32:27 +02:00
}
public function getDataTable() : array
{
if (empty($this->xpath)) return [];
$data = $this->parseContingent();
$records = [];
if ($data == null) return [];
2024-08-08 12:32:27 +02:00
$equal = $data['eduName']->length;
foreach ($data as $field) {
if ($field->length == 0) {
return [];
}
if ($field->length != $equal) {
return [];
}
}
for ($i = 0; $i < $data['eduCode']->length; $i++) {
$contingentRow = new ContingentRow(
2024-08-08 12:32:27 +02:00
$data['eduCode']->item($i)->textContent,
$data['eduName']->item($i)->textContent,
$data['eduLevel']->item($i)->textContent,
$data['eduForm']->item($i)->textContent,
(int)$data['numberAll']->item($i)->textContent
);
$records[] = $contingentRow->getData();
2024-08-08 12:32:27 +02:00
}
return $records;
}
private function parseContingent() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getLink(): string
{
$data = $this->dom->getElementsByTagName('a');
var_dump($data->item(0)->getAttribute('href'));
return '';
}
2024-08-08 12:32:27 +02:00
}