добавил абстракций и первые варианты для парсинга результатов према

This commit is contained in:
2024-10-10 11:51:24 +03:00
parent 984f6bda0a
commit 3b6fecec6c
310 changed files with 7831 additions and 44954 deletions

View File

@ -0,0 +1,81 @@
<?php
namespace SvedenParser\PriemParser;
use SvedenParser\Color;
use SvedenParser\Http\UrlBuilder;
use SvedenParser\Manager;
use SvedenParser\Printer;
final class PriemManager extends Manager
{
public function __construct()
{
parent::__construct();
$this->repository = new PriemRepository();
$this->service = new PriemService();
}
public function collectData(array $site): void
{
if ($this->isExit($site)) {
return;
}
list('org_id' => $orgId, 'site' => $url) = $site;
$url = UrlBuilder::build($url);
Printer::println(implode(' ', $site), Color::GREEN);
$html = $this->httpClient->getContentOfSite(
$url,
$site,
'sveden/education/'
);
if (!$html) {
return;
}
$uri = $this->service->getLink($html);
Printer::println($uri, Color::YELLOW);
if ($uri) {
$pattern = '/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/';
if (preg_match($pattern, $uri)) {
$html = $this->httpClient->getContentOfSite(
$uri,
$site
);
} else if (UrlBuilder::checkUri($uri)) {
if (0 === strpos($uri, '/')) {
$html = $this->httpClient->getContentOfSite(
$url,
$site,
$uri
);
} else {
$html = $this->httpClient->getContentOfSite(
$url,
$site,
"sveden/education/$uri"
);
}
}
}
// Получаем данные таблицы приема
$priem = $this->service->getData(
$html,
$this->repository->specialties(),
$orgId
);
if ($priem
&& $this->service->isValidData($priem)
) {
// Заносим в базу
Printer::print_r($priem, Color::BLUE);
// $this->repository->insert($contingent);
} else {
Printer::println("No result", Color::RED);
$this->htmlLogger->log("$orgId $url");
}
Printer::println();
}
}

View File

@ -0,0 +1,97 @@
<?php
namespace SvedenParser\PriemParser;
use SvedenParser\Color;
use SvedenParser\Parser;
use SvedenParser\Printer;
final class PriemParser extends Parser
{
private const TEMPLATE = '//tr[@itemprop="eduPriem"]//';
private const FIELDS = [
"eduCode" => "td",
"eduName" => "td",
"eduLevel" => "td",
"eduForm" => "td",
"numberBF" => "td",
"numberBR" => "td",
"numberBM" => "td",
"numberP" => "td",
"score" => "td"
];
public function getDataTable(): array
{
if (!$this->xpath) return [];
$data = $this->parse();
$records = [];
if (!$data) return [];
$equal = $data['eduName']->length;
foreach ($data as $field) {
if ($field->length == 0) {
return [];
}
if ($field->length != $equal) {
return [];
}
}
for ($i = 0; $i < $data['eduCode']->length; $i++) {
try {
$contingentRow = new PriemRow(
$data['eduCode']->item($i)->textContent,
$data['eduName']->item($i)->textContent,
$data['eduLevel']->item($i)->textContent,
$data['eduForm']->item($i)->textContent,
$data['score']->item($i)->textContent,
[
$data['numberBF']->item($i)->textContent,
$data['numberBR']->item($i)->textContent,
$data['numberBM']->item($i)->textContent,
$data['numberP']->item($i)->textContent,
],
);
$records[] = $contingentRow->getData();
} catch (\Exception $e) {
Printer::println($e->getMessage(), Color::RED);
}
}
return $records;
}
protected function parse(): array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
// $th = $this->xpath->query(
// self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
// );
// $td = $this->xpath->query(
// self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
// );
// $data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getLink(): string
{
$needle = "Информация о результатах приёма";
$data = $this->dom->getElementsByTagName('a');
for ($i = 0; $i < $data->length; $i++) {
$haystack = $data->item($i)->textContent;
$isInformationOfContingent = strpos($haystack, $needle) !== false;
if ($isInformationOfContingent) {
return $data->item($i)->getAttribute('href');
}
}
return '';
}
}

View File

@ -0,0 +1,11 @@
<?php
namespace SvedenParser\PriemParser;
use SvedenParser\Repository;
final class PriemRepository extends Repository
{
public function insert(array $data): void
{
}
}

View File

@ -0,0 +1,54 @@
<?php
namespace SvedenParser\PriemParser;
class PriemRow
{
private int $all;
private int $budget;
private float $avgScore;
public function __construct(
private string $eduCode,
private string $eduName,
private string $eduLevel,
private string $eduForm,
string $avgScore,
array $contingent,
) {
if ($avgScore < 0) {
throw new \Exception('Недействительная средняя сумма набранных баллов обучающихся!');
}
$this->eduCode = trim($eduCode);
$this->eduName = trim($eduName);
$this->eduLevel = trim($eduLevel);
$this->eduForm = trim($eduForm);
$this->avgScore = (float)str_replace(',', '.', $avgScore);
$this->calcContingent($contingent);
}
public function getData(): array
{
return [
'spec_code' => $this->eduCode,
'spec_name' => $this->eduName,
'edu_level' => $this->eduLevel,
'edu_forms'=> $this->eduForm,
'avgScore' => $this->avgScore,
'contongent' => $this->all,
'budget' => $this->budget,
];
}
private function calcContingent(array $contingent): void
{
$all = 0;
$budget = 0;
foreach ($contingent as $key => $con) {
$all += (int)$con;
if ($key !== 3) {
$budget += $con;
}
}
$this->all = $all;
$this->budget = $budget;
}
}

View File

@ -0,0 +1,42 @@
<?php
namespace SvedenParser\PriemParser;
use SvedenParser\Service;
final class PriemService extends Service
{
/**
* Получить данные о приеме
* @param string $html Разметка сайта вуза
* @param mixed $specialties Массив специальностей
* @param int $orgId Идентификатор организации
* @return array
*/
public function getData(string $html, array $specialties, int $orgId): array
{
$parser = new PriemParser($html);
$contingent = $parser->getDataTable();
$this->addSpecId($contingent, $specialties);
$this->addOrgId($contingent, $orgId);
return $contingent;
}
/**
* Проверка на валидность записи примема
* @param array $contingent Массив численности по специальностям
* @return bool
*/
public function isValidData(array $contingent): bool
{
$count = 1;
// foreach ($contingent as $value) {
// $count += $value['contingent'];
// }
return $count ? true : false;
}
public function getLink(string $html): string
{
$parser = new PriemParser($html);
return $parser->getLink();
}
}