112 lines
3.7 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace ContingentParser;
use ContingentParser\Database\DatabaseFacade;
use ContingentParser\Http\HttpClientFacade;
use ContingentParser\Http\UrlBuilder;
use ContingentParser\Logger\HtmlLogger;
use ContingentParser\Parser\ContingentFacade;
final class Facade
{
private DatabaseFacade $databaseFacade;
private HttpClientFacade $httpClientFacade;
private ContingentFacade $contingentFacade;
private UrlBuilder $urlBuilder;
private HtmlLogger $htmlLogger;
/**
* Конструктор
*/
public function __construct()
{
$this->databaseFacade = new DatabaseFacade();
$this->httpClientFacade = new HttpClientFacade();
$this->urlBuilder = new UrlBuilder();
$this->contingentFacade = new ContingentFacade();
$this->htmlLogger = new HtmlLogger('log/html.log');
}
/**
* Получить массив сайтов
* @param array $params Массив сайтов, у которых нужны обновиленные URL
* @return array
*/
public function getSites(array $params = []): array
{
if (!$params) {
return $this->databaseFacade->getSitesFromNiimko();
} else {
return $this->databaseFacade->getSitesFromMiccedu($params);
}
}
/**
* Cобирает из микроразметки данные таблицы
* "Информация о численности обучающихся" в разделе "Образование"
* @param array $site Сайт содержащий id организации и URL
* @return void
*/
public function collectDataFromContingent(array $site): void
{
list('org_id' => $orgId, 'site' => $url) = $site;
if ($this->isExit($site)) {
return;
}
$url = $this->urlBuilder->build($url);
Printer::println(implode(' ', $site), Color::GREEN);
$html = $this->httpClientFacade->getContentOfSite(
$url,
$site
);
$uri = $this->contingentFacade->getLink($html);
if ($uri) {
$pattern = '/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/';
if (preg_match($pattern, $uri)) {
$html = $this->httpClientFacade->getContentOfSite(
$url,
$site
);
} else if ($this->urlBuilder->checkUri($uri)) {
$html = $this->httpClientFacade->getContentOfSite(
$url,
$site,
$uri
);
}
}
// Получаем данные таблицы численности
$contingent = $this->contingentFacade->getContingent(
$html,
$this->databaseFacade->specialties(),
$site['org_id']
);
if ($this->contingentFacade->isValidContingent($contingent)
&& $contingent
) {
// Заносим в базу
Printer::print_r($contingent, Color::BLUE);
$this->databaseFacade->insertContingent($contingent);
} else {
Printer::println("No result", Color::RED);
$this->htmlLogger->log("$orgId $url");
}
Printer::println();
}
/**
* Условие выхода
* @param array $site
* @return bool
*/
private function isExit(array $site): bool
{
// Нет URL сайта вуза
if (!$site['site']) {
return true;
}
// Уже в базе
if (in_array($site['org_id'], $this->databaseFacade->universities())) {
return true;
}
return false;
}
}