Начало обработки по ссылкам

This commit is contained in:
2024-09-04 16:12:03 +03:00
parent 48b4d6cccf
commit 04374fef40
10 changed files with 633 additions and 103 deletions

View File

@ -0,0 +1,29 @@
<?php
namespace ContingentParser;
enum Color : string
{
case WHITE = "\033[0m";
case GREEN = "\033[92m";
case RED = "\033[91m";
case BLUE = "\033[94m";
public static function create(string $color) : Color
{
switch ($color) {
case 'green':
return self::GREEN;
case 'red':
return self::RED;
case 'blue':
return self::BLUE;
default:
return self::WHITE;
}
}
public function tostring() : string
{
return $this->value;
}
}

View File

@ -8,10 +8,10 @@ use PDO;
final class Database
{
private PDO $_pdo;
private static $_logFile = 'log/database.log';
private DatabaseConfig $_databaseConfig;
private DatabaseLogger $_logger;
private PDO $pdo;
private static $logfile = 'log/database.log';
private DatabaseConfig $databaseConfig;
private DatabaseLogger $logger;
/**
* Конструктор
* @param \ContingentParser\Database\DatabaseConfig $config
@ -19,23 +19,23 @@ final class Database
*/
public function __construct(DatabaseConfig $config)
{
$this->_logger = new DatabaseLogger(self::$_logFile);
$this->_databaseConfig = $config;
$this->logger = new DatabaseLogger(self::$logfile);
$this->databaseConfig = $config;
try {
$dsn = $this->_databaseConfig->getDsn();
$username = $this->_databaseConfig->getUsername();
$password = $this->_databaseConfig->getPassword();
$this->_pdo = new PDO(
$dsn = $this->databaseConfig->getDsn();
$username = $this->databaseConfig->getUsername();
$password = $this->databaseConfig->getPassword();
$this->pdo = new PDO(
$dsn,
$username,
$password,
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
$message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!";
$this->_logger->log($message);
$message = "Подключение к {$this->databaseConfig->getDBName()} успешно!";
$this->logger->log($message);
} catch (PDOException $e) {
$message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}";
$this->_logger->log($message);
$message = "Ошибка подключения к {$this->databaseConfig->getDBName()}: {$e->getMessage()}";
$this->logger->log($message);
}
}
/**
@ -43,8 +43,8 @@ final class Database
*/
public function __destruct()
{
$message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!";
$this->_logger->log($message);
$message = "Подключение к {$this->databaseConfig->getDBName()} прервано!";
$this->logger->log($message);
}
/**
* Выборка данных из базы
@ -57,7 +57,7 @@ final class Database
public function select(string $sql, array $params = []) : array
{
try {
$stmt = $this->_pdo->prepare($sql);
$stmt = $this->pdo->prepare($sql);
if (!empty($params)) {
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":v".($i+1), $params[$i]);
@ -67,7 +67,7 @@ final class Database
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (PDOException $e) {
$message = "Ошибка запроса: " . $e->getMessage();
$this->_logger->log($message);
$this->logger->log($message);
} finally {
return $array;
}
@ -83,7 +83,7 @@ final class Database
public function insert(string $sql, array $params)
{
try {
$stmt = $this->_pdo->prepare($sql);
$stmt = $this->pdo->prepare($sql);
$count = 1;
$size = count($params[0]);
foreach ($params as $param) {
@ -95,10 +95,10 @@ final class Database
$size += count($param);
}
$stmt->execute();
$this->_logger->log("Запрос выполнен успешно!");
$this->logger->log("Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
$this->_logger->log($message);
$this->logger->log($message);
// При ошибке запроса сохраняем валидные данные в yaml-файл
if ($e->getCode() === "HY000") {
$yaml = Yaml::dump($params);
@ -117,16 +117,16 @@ final class Database
public function update(string $sql, array $params)
{
try {
$stmt = $this->_pdo->prepare($sql);
$stmt = $this->pdo->prepare($sql);
$count = count($params);
for ($i = 0; $i < $count; $i++) {
$stmt->bindParam(":v".($i+1), $params[$i]);
}
// $stmt->execute();
$this->_logger->log("Запрос выполнен успешно!");
$this->logger->log("Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
$this->_logger->log($message);
$this->logger->log($message);
}
}
}

View File

@ -118,7 +118,7 @@ class DatabaseFacade
*/
public function specialties() : array
{
return $this->specialties ? $this->universities : [];
return $this->specialties ? $this->specialties : [];
}
/**
* Публичное получение id вузов, занесенных в базу opendata
@ -126,7 +126,7 @@ class DatabaseFacade
*/
public function universities() : array
{
return $this->universities ? $this->specialties : [];
return $this->universities ? $this->universities : [];
}
/**
* Извлечение кодов специальности из базы данных niimko

View File

@ -59,7 +59,7 @@ class Facade
return;
}
$url = $this->urlBuilder->build($url);
print(implode(' ', $site). "\033[0m" . PHP_EOL);
Printer::println(implode(' ', $site), 'green');
$html = $this->httpClientFacade->processEducationContingentSites(
$url,
$site
@ -71,22 +71,23 @@ class Facade
$orgId
);
if (empty($contingent)) {
print("\033[91mNo result\033[0m\n");
$this->htmlLogger->log("$orgId $url");
if ($contingent) {
// $contingent = $this->contingentFacade->getContingentFromLink($html);
// if ($contingent) {
Printer::println("No result", 'red');
$this->htmlLogger->log("$orgId $url");
// }
} else {
if ($this->contingentFacade->isValidContingent($contingent)) {
// Заносим в базу
print_r($contingent);
// $this->databaseFacade->insertContingent($contingent);
Printer::print_r($contingent, 'blue');
$this->databaseFacade->insertContingent($contingent);
} else {
$this->htmlLogger->log("$orgId $url");
print("\033[91mNo result\033[0m\n");
Printer::println("No result", 'red');
}
}
unset($contingent);
print(PHP_EOL);
Printer::println();
}
public function getExclusionSites(string $path) : array

View File

@ -2,6 +2,7 @@
namespace ContingentParser\Http;
use ContingentParser\Logger\HttpLogger;
use ContingentParser\Printer;
use CurlHandle;
/**
* Summary of CurlHelper
@ -38,7 +39,7 @@ final class CurlHelper
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90);
}
/**
* Прекратить сессии
* Прекратить сессию
*/
public function __destruct()
{
@ -55,7 +56,6 @@ final class CurlHelper
if ($this->checkLocation($this->url, $html)) {
$html = $this->getContent();
}
$this->reportError();
return $html;
}
/**
@ -77,14 +77,14 @@ final class CurlHelper
* Сообщить об ошибке
* @return void
*/
private function reportError() : void
public function reportError() : void
{
$httpLogger = new HttpLogger('log/http-curl.log');
$httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
if ($httpCode != 200 && $httpCode != 0) {
print("\033[91mHTTP-code: $httpCode\033[0m\n");
Printer::println("HTTP-code: $httpCode", 'red');
$message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')';
$httpLogger->log($message, $httpCode);
} else if ($httpCode == 0) {
@ -93,7 +93,7 @@ final class CurlHelper
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
$httpLogger->log($message);
} else {
print("\033[94mHTTP-code: $httpCode\033[0m\n");
Printer::println("HTTP-code: $httpCode", 'blue');
}
}
}

View File

@ -1,12 +1,8 @@
<?php
namespace ContingentParser\Http;
use ContingentParser\Printer;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\Psr7\Exception\MalformedUriException;
use GuzzleHttp\TransferStats;
final class HttpClientFacade
@ -17,10 +13,8 @@ final class HttpClientFacade
public function __construct() {}
/**
* Обработка численности обучающихся
* @param string $url
* URL сайта
* @param array $site
* Идентификатор организации, и базовый URL
* @param string $url URL сайта
* @param array $site Идентификатор организации, и базовый URL
* @return string
*/
public function processEducationContingentSites(
@ -35,24 +29,19 @@ final class HttpClientFacade
$redirectUrl = $stats->getEffectiveUri();
}
]);
print("Redirect $url -> $redirectUrl" . PHP_EOL);
Printer::println("Redirect $url -> $redirectUrl");
$url .= substr($url, -1) == '/' ? '':'/';
$url .= "sveden/education/";
print("Parsing for $url" . PHP_EOL);
$url .= "sveden/education/study";
Printer::println("Parsing for $url");
$response = $client->get($url);
$httpCode = $response->getStatusCode();
print("\033[94mHTTP-code: $httpCode\033[0m\n");
Printer::println("HTTP-code: $httpCode", 'blue');
$html = $response->getBody()->getContents();
} catch (ClientException
| RequestException
| ConnectException
| ServerException
| MalformedUriException $e
} catch (\Exception $e
) {
print("\033[91mHTTP-code: ". $e->getCode(). "\033[0m\n");
Printer::println("HTTP-code: ".$e->getCode(), 'red');
$html = $this->handleException($url, $site);
} finally {
return $html;
@ -60,23 +49,24 @@ final class HttpClientFacade
}
/**
* Обработка исключения
* Повторная попытка спомощью CurlHelper
* @param string $url
* URL сайта
* Повторная попытка с помощью CurlHelper
* @param string $url URL сайта
* @param array $site
* @return string
*/
private function handleException(string $url, array $site) : string
private function handleException(string $url, array $site): string
{
$curlHelper = new CurlHelper($url, $site);
return $curlHelper->getContent();
$html = $curlHelper->getContent();
$curlHelper->reportError();
return $html;
}
/**
* Создать клиента с базовым URL
* @param string $url
* @return \GuzzleHttp\Client
*/
private function createClient(string $url) : Client
private function createClient(string $url): Client
{
$this->config = $this->config() + ["base_uri" => $url];
return new Client($this->config);

View File

@ -68,4 +68,14 @@ class ContingentFacade
$con['org_id'] = $orgId;
}
}
/**
* Summary of getContingentFromLink
* @param string $html
* @return array
*/
// public function getContingentFromLink(string $html): array
// {
// $parser = new ContingentParser($html);
// $
// }
}

View File

@ -7,6 +7,7 @@ use DOMXPath;
class ContingentParser
{
private ?DOMXPath $xpath;
private DOMDocument $dom;
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
private const ENCODING = "UTF-8";
private const FIELDS = [
@ -20,15 +21,15 @@ class ContingentParser
public function __construct(string $html)
{
libxml_use_internal_errors(true);
$dom = new DOMDocument(
$this->dom = new DOMDocument(
encoding: self::ENCODING
);
if (empty($html)) {
$this->xpath = null;
} else {
$this->setEncoding($html);
$dom->loadHTML($html);
$this->xpath = new DOMXPath($dom);
$this->dom->loadHTML($html);
$this->xpath = new DOMXPath($this->dom);
}
}
@ -45,32 +46,11 @@ class ContingentParser
}
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
}
private function parse() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getDataTable() : array
{
if (empty($this->xpath)) return [];
$data = $this->parse();
$data = $this->parseContingent();
$records = [];
if ($data == null) return [];
@ -96,4 +76,32 @@ class ContingentParser
}
return $records;
}
private function parseContingent() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getLink(): string
{
$data = $this->dom->getElementsByTagName('a');
var_dump($data->item(0)->getAttribute('href'));
return '';
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace ContingentParser;
class Printer
{
public static function print(string $text = '', string $color = '') : void
{
$color = Color::create($color);
print($color->tostring().$text.Color::WHITE->tostring());
}
public static function println(string $text = '', string $color = '') : void
{
$color = Color::create($color);
print($color->tostring().$text.Color::WHITE->tostring());
print(PHP_EOL);
}
public static function print_r(mixed $value, string $color = '') : void
{
$color = Color::create($color);
print($color->tostring());
print_r($value);
print(Color::WHITE->tostring());
}
}