Начало обработки по ссылкам
This commit is contained in:
29
ContingentParser/Color.php
Normal file
29
ContingentParser/Color.php
Normal file
@ -0,0 +1,29 @@
|
||||
<?php
|
||||
namespace ContingentParser;
|
||||
|
||||
enum Color : string
|
||||
{
|
||||
case WHITE = "\033[0m";
|
||||
case GREEN = "\033[92m";
|
||||
case RED = "\033[91m";
|
||||
case BLUE = "\033[94m";
|
||||
|
||||
public static function create(string $color) : Color
|
||||
{
|
||||
switch ($color) {
|
||||
case 'green':
|
||||
return self::GREEN;
|
||||
case 'red':
|
||||
return self::RED;
|
||||
case 'blue':
|
||||
return self::BLUE;
|
||||
default:
|
||||
return self::WHITE;
|
||||
}
|
||||
}
|
||||
|
||||
public function tostring() : string
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
}
|
@ -8,10 +8,10 @@ use PDO;
|
||||
|
||||
final class Database
|
||||
{
|
||||
private PDO $_pdo;
|
||||
private static $_logFile = 'log/database.log';
|
||||
private DatabaseConfig $_databaseConfig;
|
||||
private DatabaseLogger $_logger;
|
||||
private PDO $pdo;
|
||||
private static $logfile = 'log/database.log';
|
||||
private DatabaseConfig $databaseConfig;
|
||||
private DatabaseLogger $logger;
|
||||
/**
|
||||
* Конструктор
|
||||
* @param \ContingentParser\Database\DatabaseConfig $config
|
||||
@ -19,23 +19,23 @@ final class Database
|
||||
*/
|
||||
public function __construct(DatabaseConfig $config)
|
||||
{
|
||||
$this->_logger = new DatabaseLogger(self::$_logFile);
|
||||
$this->_databaseConfig = $config;
|
||||
$this->logger = new DatabaseLogger(self::$logfile);
|
||||
$this->databaseConfig = $config;
|
||||
try {
|
||||
$dsn = $this->_databaseConfig->getDsn();
|
||||
$username = $this->_databaseConfig->getUsername();
|
||||
$password = $this->_databaseConfig->getPassword();
|
||||
$this->_pdo = new PDO(
|
||||
$dsn = $this->databaseConfig->getDsn();
|
||||
$username = $this->databaseConfig->getUsername();
|
||||
$password = $this->databaseConfig->getPassword();
|
||||
$this->pdo = new PDO(
|
||||
$dsn,
|
||||
$username,
|
||||
$password,
|
||||
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
|
||||
);
|
||||
$message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!";
|
||||
$this->_logger->log($message);
|
||||
$message = "Подключение к {$this->databaseConfig->getDBName()} успешно!";
|
||||
$this->logger->log($message);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}";
|
||||
$this->_logger->log($message);
|
||||
$message = "Ошибка подключения к {$this->databaseConfig->getDBName()}: {$e->getMessage()}";
|
||||
$this->logger->log($message);
|
||||
}
|
||||
}
|
||||
/**
|
||||
@ -43,8 +43,8 @@ final class Database
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
$message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!";
|
||||
$this->_logger->log($message);
|
||||
$message = "Подключение к {$this->databaseConfig->getDBName()} прервано!";
|
||||
$this->logger->log($message);
|
||||
}
|
||||
/**
|
||||
* Выборка данных из базы
|
||||
@ -57,7 +57,7 @@ final class Database
|
||||
public function select(string $sql, array $params = []) : array
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
if (!empty($params)) {
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":v".($i+1), $params[$i]);
|
||||
@ -67,7 +67,7 @@ final class Database
|
||||
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса: " . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
$this->logger->log($message);
|
||||
} finally {
|
||||
return $array;
|
||||
}
|
||||
@ -83,7 +83,7 @@ final class Database
|
||||
public function insert(string $sql, array $params)
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
$count = 1;
|
||||
$size = count($params[0]);
|
||||
foreach ($params as $param) {
|
||||
@ -95,10 +95,10 @@ final class Database
|
||||
$size += count($param);
|
||||
}
|
||||
$stmt->execute();
|
||||
$this->_logger->log("Запрос выполнен успешно!");
|
||||
$this->logger->log("Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
$this->logger->log($message);
|
||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||
if ($e->getCode() === "HY000") {
|
||||
$yaml = Yaml::dump($params);
|
||||
@ -117,16 +117,16 @@ final class Database
|
||||
public function update(string $sql, array $params)
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
$count = count($params);
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
$stmt->bindParam(":v".($i+1), $params[$i]);
|
||||
}
|
||||
// $stmt->execute();
|
||||
$this->_logger->log("Запрос выполнен успешно!");
|
||||
$this->logger->log("Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
$this->logger->log($message);
|
||||
}
|
||||
}
|
||||
}
|
@ -118,7 +118,7 @@ class DatabaseFacade
|
||||
*/
|
||||
public function specialties() : array
|
||||
{
|
||||
return $this->specialties ? $this->universities : [];
|
||||
return $this->specialties ? $this->specialties : [];
|
||||
}
|
||||
/**
|
||||
* Публичное получение id вузов, занесенных в базу opendata
|
||||
@ -126,7 +126,7 @@ class DatabaseFacade
|
||||
*/
|
||||
public function universities() : array
|
||||
{
|
||||
return $this->universities ? $this->specialties : [];
|
||||
return $this->universities ? $this->universities : [];
|
||||
}
|
||||
/**
|
||||
* Извлечение кодов специальности из базы данных niimko
|
||||
|
@ -59,7 +59,7 @@ class Facade
|
||||
return;
|
||||
}
|
||||
$url = $this->urlBuilder->build($url);
|
||||
print(implode(' ', $site). "\033[0m" . PHP_EOL);
|
||||
Printer::println(implode(' ', $site), 'green');
|
||||
$html = $this->httpClientFacade->processEducationContingentSites(
|
||||
$url,
|
||||
$site
|
||||
@ -71,22 +71,23 @@ class Facade
|
||||
$orgId
|
||||
);
|
||||
|
||||
if (empty($contingent)) {
|
||||
print("\033[91mNo result\033[0m\n");
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
if ($contingent) {
|
||||
// $contingent = $this->contingentFacade->getContingentFromLink($html);
|
||||
// if ($contingent) {
|
||||
Printer::println("No result", 'red');
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
// }
|
||||
} else {
|
||||
if ($this->contingentFacade->isValidContingent($contingent)) {
|
||||
// Заносим в базу
|
||||
print_r($contingent);
|
||||
// $this->databaseFacade->insertContingent($contingent);
|
||||
Printer::print_r($contingent, 'blue');
|
||||
$this->databaseFacade->insertContingent($contingent);
|
||||
} else {
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
print("\033[91mNo result\033[0m\n");
|
||||
Printer::println("No result", 'red');
|
||||
}
|
||||
}
|
||||
|
||||
unset($contingent);
|
||||
print(PHP_EOL);
|
||||
Printer::println();
|
||||
}
|
||||
|
||||
public function getExclusionSites(string $path) : array
|
||||
|
@ -2,6 +2,7 @@
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use ContingentParser\Logger\HttpLogger;
|
||||
use ContingentParser\Printer;
|
||||
use CurlHandle;
|
||||
/**
|
||||
* Summary of CurlHelper
|
||||
@ -38,7 +39,7 @@ final class CurlHelper
|
||||
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90);
|
||||
}
|
||||
/**
|
||||
* Прекратить сессии
|
||||
* Прекратить сессию
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
@ -55,7 +56,6 @@ final class CurlHelper
|
||||
if ($this->checkLocation($this->url, $html)) {
|
||||
$html = $this->getContent();
|
||||
}
|
||||
$this->reportError();
|
||||
return $html;
|
||||
}
|
||||
/**
|
||||
@ -77,14 +77,14 @@ final class CurlHelper
|
||||
* Сообщить об ошибке
|
||||
* @return void
|
||||
*/
|
||||
private function reportError() : void
|
||||
public function reportError() : void
|
||||
{
|
||||
$httpLogger = new HttpLogger('log/http-curl.log');
|
||||
|
||||
$httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
|
||||
|
||||
if ($httpCode != 200 && $httpCode != 0) {
|
||||
print("\033[91mHTTP-code: $httpCode\033[0m\n");
|
||||
Printer::println("HTTP-code: $httpCode", 'red');
|
||||
$message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')';
|
||||
$httpLogger->log($message, $httpCode);
|
||||
} else if ($httpCode == 0) {
|
||||
@ -93,7 +93,7 @@ final class CurlHelper
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
$httpLogger->log($message);
|
||||
} else {
|
||||
print("\033[94mHTTP-code: $httpCode\033[0m\n");
|
||||
Printer::println("HTTP-code: $httpCode", 'blue');
|
||||
}
|
||||
}
|
||||
}
|
@ -1,12 +1,8 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use ContingentParser\Printer;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
||||
use GuzzleHttp\TransferStats;
|
||||
|
||||
final class HttpClientFacade
|
||||
@ -17,10 +13,8 @@ final class HttpClientFacade
|
||||
public function __construct() {}
|
||||
/**
|
||||
* Обработка численности обучающихся
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* Идентификатор организации, и базовый URL
|
||||
* @param string $url URL сайта
|
||||
* @param array $site Идентификатор организации, и базовый URL
|
||||
* @return string
|
||||
*/
|
||||
public function processEducationContingentSites(
|
||||
@ -35,24 +29,19 @@ final class HttpClientFacade
|
||||
$redirectUrl = $stats->getEffectiveUri();
|
||||
}
|
||||
]);
|
||||
|
||||
print("Redirect $url -> $redirectUrl" . PHP_EOL);
|
||||
Printer::println("Redirect $url -> $redirectUrl");
|
||||
$url .= substr($url, -1) == '/' ? '':'/';
|
||||
$url .= "sveden/education/";
|
||||
print("Parsing for $url" . PHP_EOL);
|
||||
$url .= "sveden/education/study";
|
||||
Printer::println("Parsing for $url");
|
||||
|
||||
$response = $client->get($url);
|
||||
$httpCode = $response->getStatusCode();
|
||||
print("\033[94mHTTP-code: $httpCode\033[0m\n");
|
||||
Printer::println("HTTP-code: $httpCode", 'blue');
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
} catch (ClientException
|
||||
| RequestException
|
||||
| ConnectException
|
||||
| ServerException
|
||||
| MalformedUriException $e
|
||||
} catch (\Exception $e
|
||||
) {
|
||||
print("\033[91mHTTP-code: ". $e->getCode(). "\033[0m\n");
|
||||
Printer::println("HTTP-code: ".$e->getCode(), 'red');
|
||||
$html = $this->handleException($url, $site);
|
||||
} finally {
|
||||
return $html;
|
||||
@ -60,23 +49,24 @@ final class HttpClientFacade
|
||||
}
|
||||
/**
|
||||
* Обработка исключения
|
||||
* Повторная попытка спомощью CurlHelper
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* Повторная попытка с помощью CurlHelper
|
||||
* @param string $url URL сайта
|
||||
* @param array $site
|
||||
* @return string
|
||||
*/
|
||||
private function handleException(string $url, array $site) : string
|
||||
private function handleException(string $url, array $site): string
|
||||
{
|
||||
$curlHelper = new CurlHelper($url, $site);
|
||||
return $curlHelper->getContent();
|
||||
$html = $curlHelper->getContent();
|
||||
$curlHelper->reportError();
|
||||
return $html;
|
||||
}
|
||||
/**
|
||||
* Создать клиента с базовым URL
|
||||
* @param string $url
|
||||
* @return \GuzzleHttp\Client
|
||||
*/
|
||||
private function createClient(string $url) : Client
|
||||
private function createClient(string $url): Client
|
||||
{
|
||||
$this->config = $this->config() + ["base_uri" => $url];
|
||||
return new Client($this->config);
|
||||
|
@ -68,4 +68,14 @@ class ContingentFacade
|
||||
$con['org_id'] = $orgId;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Summary of getContingentFromLink
|
||||
* @param string $html
|
||||
* @return array
|
||||
*/
|
||||
// public function getContingentFromLink(string $html): array
|
||||
// {
|
||||
// $parser = new ContingentParser($html);
|
||||
// $
|
||||
// }
|
||||
}
|
@ -7,6 +7,7 @@ use DOMXPath;
|
||||
class ContingentParser
|
||||
{
|
||||
private ?DOMXPath $xpath;
|
||||
private DOMDocument $dom;
|
||||
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
|
||||
private const ENCODING = "UTF-8";
|
||||
private const FIELDS = [
|
||||
@ -20,15 +21,15 @@ class ContingentParser
|
||||
public function __construct(string $html)
|
||||
{
|
||||
libxml_use_internal_errors(true);
|
||||
$dom = new DOMDocument(
|
||||
$this->dom = new DOMDocument(
|
||||
encoding: self::ENCODING
|
||||
);
|
||||
if (empty($html)) {
|
||||
$this->xpath = null;
|
||||
} else {
|
||||
$this->setEncoding($html);
|
||||
$dom->loadHTML($html);
|
||||
$this->xpath = new DOMXPath($dom);
|
||||
$this->dom->loadHTML($html);
|
||||
$this->xpath = new DOMXPath($this->dom);
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,32 +46,11 @@ class ContingentParser
|
||||
}
|
||||
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
|
||||
}
|
||||
private function parse() : array
|
||||
{
|
||||
$data = [];
|
||||
foreach (self::FIELDS as $field => $tag) {
|
||||
if (!is_array($tag)) {
|
||||
$data[$field] = $this->xpath->query(
|
||||
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
} else {
|
||||
$th = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$td = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$data[$field] = $th->length > $td->length ? $th : $td;
|
||||
}
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getDataTable() : array
|
||||
{
|
||||
if (empty($this->xpath)) return [];
|
||||
|
||||
$data = $this->parse();
|
||||
$data = $this->parseContingent();
|
||||
$records = [];
|
||||
if ($data == null) return [];
|
||||
|
||||
@ -96,4 +76,32 @@ class ContingentParser
|
||||
}
|
||||
return $records;
|
||||
}
|
||||
|
||||
private function parseContingent() : array
|
||||
{
|
||||
$data = [];
|
||||
foreach (self::FIELDS as $field => $tag) {
|
||||
if (!is_array($tag)) {
|
||||
$data[$field] = $this->xpath->query(
|
||||
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
} else {
|
||||
$th = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$td = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$data[$field] = $th->length > $td->length ? $th : $td;
|
||||
}
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getLink(): string
|
||||
{
|
||||
$data = $this->dom->getElementsByTagName('a');
|
||||
var_dump($data->item(0)->getAttribute('href'));
|
||||
return '';
|
||||
}
|
||||
}
|
26
ContingentParser/Printer.php
Normal file
26
ContingentParser/Printer.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
namespace ContingentParser;
|
||||
|
||||
class Printer
|
||||
{
|
||||
public static function print(string $text = '', string $color = '') : void
|
||||
{
|
||||
$color = Color::create($color);
|
||||
print($color->tostring().$text.Color::WHITE->tostring());
|
||||
}
|
||||
|
||||
public static function println(string $text = '', string $color = '') : void
|
||||
{
|
||||
$color = Color::create($color);
|
||||
print($color->tostring().$text.Color::WHITE->tostring());
|
||||
print(PHP_EOL);
|
||||
}
|
||||
|
||||
public static function print_r(mixed $value, string $color = '') : void
|
||||
{
|
||||
$color = Color::create($color);
|
||||
print($color->tostring());
|
||||
print_r($value);
|
||||
print(Color::WHITE->tostring());
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user