добавил абстракций и первые варианты для парсинга результатов према
This commit is contained in:
81
src/PriemParser/PriemManager.php
Normal file
81
src/PriemParser/PriemManager.php
Normal file
@ -0,0 +1,81 @@
|
||||
<?php
|
||||
namespace SvedenParser\PriemParser;
|
||||
|
||||
use SvedenParser\Color;
|
||||
use SvedenParser\Http\UrlBuilder;
|
||||
use SvedenParser\Manager;
|
||||
use SvedenParser\Printer;
|
||||
|
||||
final class PriemManager extends Manager
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
$this->repository = new PriemRepository();
|
||||
$this->service = new PriemService();
|
||||
}
|
||||
public function collectData(array $site): void
|
||||
{
|
||||
if ($this->isExit($site)) {
|
||||
return;
|
||||
}
|
||||
list('org_id' => $orgId, 'site' => $url) = $site;
|
||||
$url = UrlBuilder::build($url);
|
||||
Printer::println(implode(' ', $site), Color::GREEN);
|
||||
|
||||
$html = $this->httpClient->getContentOfSite(
|
||||
$url,
|
||||
$site,
|
||||
'sveden/education/'
|
||||
);
|
||||
if (!$html) {
|
||||
return;
|
||||
}
|
||||
|
||||
$uri = $this->service->getLink($html);
|
||||
Printer::println($uri, Color::YELLOW);
|
||||
|
||||
if ($uri) {
|
||||
$pattern = '/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/';
|
||||
if (preg_match($pattern, $uri)) {
|
||||
$html = $this->httpClient->getContentOfSite(
|
||||
$uri,
|
||||
$site
|
||||
);
|
||||
} else if (UrlBuilder::checkUri($uri)) {
|
||||
if (0 === strpos($uri, '/')) {
|
||||
$html = $this->httpClient->getContentOfSite(
|
||||
$url,
|
||||
$site,
|
||||
$uri
|
||||
);
|
||||
} else {
|
||||
$html = $this->httpClient->getContentOfSite(
|
||||
$url,
|
||||
$site,
|
||||
"sveden/education/$uri"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
// Получаем данные таблицы приема
|
||||
$priem = $this->service->getData(
|
||||
$html,
|
||||
$this->repository->specialties(),
|
||||
$orgId
|
||||
);
|
||||
|
||||
if ($priem
|
||||
&& $this->service->isValidData($priem)
|
||||
) {
|
||||
// Заносим в базу
|
||||
Printer::print_r($priem, Color::BLUE);
|
||||
// $this->repository->insert($contingent);
|
||||
} else {
|
||||
Printer::println("No result", Color::RED);
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
}
|
||||
Printer::println();
|
||||
}
|
||||
}
|
97
src/PriemParser/PriemParser.php
Normal file
97
src/PriemParser/PriemParser.php
Normal file
@ -0,0 +1,97 @@
|
||||
<?php
|
||||
namespace SvedenParser\PriemParser;
|
||||
|
||||
use SvedenParser\Color;
|
||||
use SvedenParser\Parser;
|
||||
use SvedenParser\Printer;
|
||||
|
||||
final class PriemParser extends Parser
|
||||
{
|
||||
private const TEMPLATE = '//tr[@itemprop="eduPriem"]//';
|
||||
|
||||
private const FIELDS = [
|
||||
"eduCode" => "td",
|
||||
"eduName" => "td",
|
||||
"eduLevel" => "td",
|
||||
"eduForm" => "td",
|
||||
"numberBF" => "td",
|
||||
"numberBR" => "td",
|
||||
"numberBM" => "td",
|
||||
"numberP" => "td",
|
||||
"score" => "td"
|
||||
];
|
||||
public function getDataTable(): array
|
||||
{
|
||||
if (!$this->xpath) return [];
|
||||
|
||||
$data = $this->parse();
|
||||
$records = [];
|
||||
if (!$data) return [];
|
||||
|
||||
$equal = $data['eduName']->length;
|
||||
foreach ($data as $field) {
|
||||
if ($field->length == 0) {
|
||||
return [];
|
||||
}
|
||||
if ($field->length != $equal) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
for ($i = 0; $i < $data['eduCode']->length; $i++) {
|
||||
try {
|
||||
$contingentRow = new PriemRow(
|
||||
$data['eduCode']->item($i)->textContent,
|
||||
$data['eduName']->item($i)->textContent,
|
||||
$data['eduLevel']->item($i)->textContent,
|
||||
$data['eduForm']->item($i)->textContent,
|
||||
$data['score']->item($i)->textContent,
|
||||
[
|
||||
$data['numberBF']->item($i)->textContent,
|
||||
$data['numberBR']->item($i)->textContent,
|
||||
$data['numberBM']->item($i)->textContent,
|
||||
$data['numberP']->item($i)->textContent,
|
||||
],
|
||||
);
|
||||
$records[] = $contingentRow->getData();
|
||||
} catch (\Exception $e) {
|
||||
Printer::println($e->getMessage(), Color::RED);
|
||||
}
|
||||
|
||||
}
|
||||
return $records;
|
||||
}
|
||||
protected function parse(): array
|
||||
{
|
||||
$data = [];
|
||||
foreach (self::FIELDS as $field => $tag) {
|
||||
if (!is_array($tag)) {
|
||||
$data[$field] = $this->xpath->query(
|
||||
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
} else {
|
||||
// $th = $this->xpath->query(
|
||||
// self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
|
||||
// );
|
||||
// $td = $this->xpath->query(
|
||||
// self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
|
||||
// );
|
||||
// $data[$field] = $th->length > $td->length ? $th : $td;
|
||||
}
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getLink(): string
|
||||
{
|
||||
$needle = "Информация о результатах приёма";
|
||||
$data = $this->dom->getElementsByTagName('a');
|
||||
for ($i = 0; $i < $data->length; $i++) {
|
||||
$haystack = $data->item($i)->textContent;
|
||||
$isInformationOfContingent = strpos($haystack, $needle) !== false;
|
||||
if ($isInformationOfContingent) {
|
||||
return $data->item($i)->getAttribute('href');
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}
|
11
src/PriemParser/PriemRepository.php
Normal file
11
src/PriemParser/PriemRepository.php
Normal file
@ -0,0 +1,11 @@
|
||||
<?php
|
||||
namespace SvedenParser\PriemParser;
|
||||
use SvedenParser\Repository;
|
||||
|
||||
final class PriemRepository extends Repository
|
||||
{
|
||||
public function insert(array $data): void
|
||||
{
|
||||
|
||||
}
|
||||
}
|
54
src/PriemParser/PriemRow.php
Normal file
54
src/PriemParser/PriemRow.php
Normal file
@ -0,0 +1,54 @@
|
||||
<?php
|
||||
namespace SvedenParser\PriemParser;
|
||||
|
||||
class PriemRow
|
||||
{
|
||||
private int $all;
|
||||
private int $budget;
|
||||
private float $avgScore;
|
||||
public function __construct(
|
||||
private string $eduCode,
|
||||
private string $eduName,
|
||||
private string $eduLevel,
|
||||
private string $eduForm,
|
||||
string $avgScore,
|
||||
array $contingent,
|
||||
) {
|
||||
if ($avgScore < 0) {
|
||||
throw new \Exception('Недействительная средняя сумма набранных баллов обучающихся!');
|
||||
}
|
||||
$this->eduCode = trim($eduCode);
|
||||
$this->eduName = trim($eduName);
|
||||
$this->eduLevel = trim($eduLevel);
|
||||
$this->eduForm = trim($eduForm);
|
||||
$this->avgScore = (float)str_replace(',', '.', $avgScore);
|
||||
$this->calcContingent($contingent);
|
||||
}
|
||||
|
||||
public function getData(): array
|
||||
{
|
||||
return [
|
||||
'spec_code' => $this->eduCode,
|
||||
'spec_name' => $this->eduName,
|
||||
'edu_level' => $this->eduLevel,
|
||||
'edu_forms'=> $this->eduForm,
|
||||
'avgScore' => $this->avgScore,
|
||||
'contongent' => $this->all,
|
||||
'budget' => $this->budget,
|
||||
];
|
||||
}
|
||||
|
||||
private function calcContingent(array $contingent): void
|
||||
{
|
||||
$all = 0;
|
||||
$budget = 0;
|
||||
foreach ($contingent as $key => $con) {
|
||||
$all += (int)$con;
|
||||
if ($key !== 3) {
|
||||
$budget += $con;
|
||||
}
|
||||
}
|
||||
$this->all = $all;
|
||||
$this->budget = $budget;
|
||||
}
|
||||
}
|
42
src/PriemParser/PriemService.php
Normal file
42
src/PriemParser/PriemService.php
Normal file
@ -0,0 +1,42 @@
|
||||
<?php
|
||||
namespace SvedenParser\PriemParser;
|
||||
use SvedenParser\Service;
|
||||
|
||||
final class PriemService extends Service
|
||||
{
|
||||
/**
|
||||
* Получить данные о приеме
|
||||
* @param string $html Разметка сайта вуза
|
||||
* @param mixed $specialties Массив специальностей
|
||||
* @param int $orgId Идентификатор организации
|
||||
* @return array
|
||||
*/
|
||||
public function getData(string $html, array $specialties, int $orgId): array
|
||||
{
|
||||
$parser = new PriemParser($html);
|
||||
$contingent = $parser->getDataTable();
|
||||
$this->addSpecId($contingent, $specialties);
|
||||
$this->addOrgId($contingent, $orgId);
|
||||
|
||||
return $contingent;
|
||||
}
|
||||
/**
|
||||
* Проверка на валидность записи примема
|
||||
* @param array $contingent Массив численности по специальностям
|
||||
* @return bool
|
||||
*/
|
||||
public function isValidData(array $contingent): bool
|
||||
{
|
||||
$count = 1;
|
||||
// foreach ($contingent as $value) {
|
||||
// $count += $value['contingent'];
|
||||
// }
|
||||
return $count ? true : false;
|
||||
}
|
||||
|
||||
public function getLink(string $html): string
|
||||
{
|
||||
$parser = new PriemParser($html);
|
||||
return $parser->getLink();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user