102 lines
3.3 KiB
PHP
Raw Normal View History

2024-08-29 21:57:53 +03:00
<?php
namespace ContingentParser;
2024-09-03 20:16:34 +03:00
use ContingentParser\Database\DatabaseFacade;
use ContingentParser\Http\HttpClientFacade;
use ContingentParser\Http\UrlBuilder;
use ContingentParser\Logger\HtmlLogger;
use ContingentParser\Parser\ContingentFacade;
2024-08-29 21:57:53 +03:00
class Facade
{
2024-09-03 20:16:34 +03:00
private DatabaseFacade $databaseFacade;
private HttpClientFacade $httpClientFacade;
private ContingentFacade $contingentFacade;
private HtmlLogger $htmlLogger;
/**
* Конструктор
*/
2024-08-29 21:57:53 +03:00
public function __construct()
{
2024-09-03 20:16:34 +03:00
$this->databaseFacade = new DatabaseFacade();
$this->httpClientFacade = new HttpClientFacade();
$this->contingentFacade = new ContingentFacade();
$this->htmlLogger = new HtmlLogger('log/html.log');
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
/**
* Получить массив сайтов
* @param array $params
* Массив сайтов, у которых нужны обновиленные URL
* @return array
*/
public function getSites(array $params = []) : array
2024-08-29 21:57:53 +03:00
{
2024-09-03 20:16:34 +03:00
if (empty($params)) {
return $this->databaseFacade->getSitesFromNiimko();
} else {
return $this->databaseFacade->getSitesFromMiccedu($params);
2024-08-29 21:57:53 +03:00
}
}
2024-09-03 20:16:34 +03:00
/**
* Cобирает из микроразметки данные таблицы
* "Информация о численности обучающихся" в разделе "Образование"
* @param array $site
* Сайт содержащий id организации и URL
* @return void
*/
public function collectDataFromContingent(array $site) : void
2024-08-29 21:57:53 +03:00
{
2024-09-03 20:16:34 +03:00
list('org_id' => $orgId, 'site' => $url) = $site;
// Нет URL сайта вуза
if (empty($site)) {
// $httpLogger->log($orgId);
return;
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
// Уже в базе
if (in_array($orgId, $this->databaseFacade->universities())) {
return;
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
$urlBuilder = new UrlBuilder();
$url = $urlBuilder->build($url);
Printer::println(implode(' ', $site), 'green');
$html = $this->httpClientFacade->processEducationContingentSites(
$url,
$site
);
2024-08-29 21:57:53 +03:00
2024-09-03 20:16:34 +03:00
$contingent = $this->contingentFacade->getContingent(
$html,
$this->databaseFacade->specialties(),
$orgId
);
if (empty($contingent)) {
Printer::println("No result", 'red');
$this->htmlLogger->log("$orgId $url");
} else {
if ($this->contingentFacade->isValidContingent($contingent)) {
// Заносим в базу
Printer::print_r($contingent, 'blue');
// $this->databaseFacade->insertContingent($contingent);
} else {
$this->htmlLogger->log("$orgId $url");
Printer::println("No result", 'red');
}
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
Printer::println();
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
public function getExclusionSites(string $path) : array
2024-08-29 21:57:53 +03:00
{
2024-09-03 20:16:34 +03:00
$logs = file($path);
$result = [];
foreach ($logs as $log) {
$data = explode(' ', $log);
$result[] = [
'org_id' => $data[2],
'site' => $data[3] ? $data[3] : ''
];
2024-08-29 21:57:53 +03:00
}
2024-09-03 20:16:34 +03:00
return $result;
2024-08-29 21:57:53 +03:00
}
}