Все переписано на фасады

This commit is contained in:
Alexander 2024-09-03 15:41:45 +03:00
parent 74ba48620c
commit 48b4d6cccf
34 changed files with 1036 additions and 807 deletions

View File

@ -0,0 +1,132 @@
<?php
namespace ContingentParser\Database;
use ContingentParser\Logger\DatabaseLogger;
use Symfony\Component\Yaml\Yaml;
use PDOException;
use PDO;
final class Database
{
private PDO $_pdo;
private static $_logFile = 'log/database.log';
private DatabaseConfig $_databaseConfig;
private DatabaseLogger $_logger;
/**
* Конструктор
* @param \ContingentParser\Database\DatabaseConfig $config
* Конфигурация подключения к базе данных
*/
public function __construct(DatabaseConfig $config)
{
$this->_logger = new DatabaseLogger(self::$_logFile);
$this->_databaseConfig = $config;
try {
$dsn = $this->_databaseConfig->getDsn();
$username = $this->_databaseConfig->getUsername();
$password = $this->_databaseConfig->getPassword();
$this->_pdo = new PDO(
$dsn,
$username,
$password,
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
$message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!";
$this->_logger->log($message);
} catch (PDOException $e) {
$message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}";
$this->_logger->log($message);
}
}
/**
* Сообщение о разрыве соединения
*/
public function __destruct()
{
$message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!";
$this->_logger->log($message);
}
/**
* Выборка данных из базы
* @param string $sql
* SQL-запрос
* @param array $params
* Параметры запроса
* @return array
*/
public function select(string $sql, array $params = []) : array
{
try {
$stmt = $this->_pdo->prepare($sql);
if (!empty($params)) {
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":v".($i+1), $params[$i]);
}
}
$stmt->execute();
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (PDOException $e) {
$message = "Ошибка запроса: " . $e->getMessage();
$this->_logger->log($message);
} finally {
return $array;
}
}
/**
* Добавление данных в базу
* @param string $sql
* SQL-запрос
* @param array $params
* Параметры запроса
* @return void
*/
public function insert(string $sql, array $params)
{
try {
$stmt = $this->_pdo->prepare($sql);
$count = 1;
$size = count($params[0]);
foreach ($params as $param) {
for ($i = $count; $i <= $size; $i++) {
$param = array_values($param);
$stmt->bindParam(":v$i", $param[$i-$count]);
}
$count += count($param);
$size += count($param);
}
$stmt->execute();
$this->_logger->log("Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
$this->_logger->log($message);
// При ошибке запроса сохраняем валидные данные в yaml-файл
if ($e->getCode() === "HY000") {
$yaml = Yaml::dump($params);
file_put_contents('not-recorded-in-db.yaml', $yaml, FILE_APPEND);
}
}
}
/**
* Обновление данных в базе
* @param string $sql
* SQL-запрос
* @param array $params
* Параметры запроса
* @return void
*/
public function update(string $sql, array $params)
{
try {
$stmt = $this->_pdo->prepare($sql);
$count = count($params);
for ($i = 0; $i < $count; $i++) {
$stmt->bindParam(":v".($i+1), $params[$i]);
}
// $stmt->execute();
$this->_logger->log("Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
$this->_logger->log($message);
}
}
}

View File

@ -0,0 +1,62 @@
<?php
namespace ContingentParser\Database;
final class DatabaseConfig
{
private string $_driver;
private string $_host;
private string $_dbname;
private string $_port;
private string $_charset;
private string $_username;
private string $_password;
public function __construct(string $db)
{
$config = $this->getDataEnv($db);
$this->_driver = $config['DB_DRIVER'];
$this->_host = $config['DB_HOST'];
$this->_dbname = $config['DB_NAME'];
$this->_port = $config['DB_PORT'];
$this->_charset = $config["DB_CHARSET"];
$this->_username = $config['DB_USERNAME'];
$this->_password = $config['DB_PASSWORD'];
}
private function getDataEnv(string $db) : array
{
$envVars = parse_ini_file('.env', true);
$db = strtoupper($db);
$config = [];
foreach ($envVars as $dbname => $dbconfig) {
if ($dbname == $db) {
$config = $dbconfig;
}
}
return $config;
}
public function getDBName(): string
{
return $this->_dbname;
}
public function getDsn() : string
{
return $this->_driver.":host=".$this->_host
.";dbname=".$this->_dbname
.";charset=".$this->_charset
.";port=".$this->_port;
}
public function getUsername() : string
{
return $this->_username;
}
public function getPassword() : string
{
return $this->_password;
}
}

View File

@ -0,0 +1,214 @@
<?php
namespace ContingentParser\Database;
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
class DatabaseFacade
{
private GenericBuilder $builder;
private Database $opendata;
private Database $niimko;
private array $specialties;
private array $universities;
/**
* Конструктор
*/
public function __construct()
{
$this->niimko = new Database(new DatabaseConfig('niimko'));
$this->opendata = new Database(new DatabaseConfig('opendata'));
$this->specialties = $this->getSpecialties();
$this->universities = $this->getUniversities();
}
/**
* Извлечение URL сайтов из базы данных niimko
* @return array
*/
public function getSitesFromNiimko() : array
{
/*
SELECT kod AS org_id, site FROM niimko.s_vuzes
WHERE ootype = 'vuz' AND deleted = 'n' AND fake = 'n'
*/
$builder = new GenericBuilder();
$params = ['vuz', 'n', 'n', 'RU'];
$query = $builder->select()
->setTable('s_vuzes')
->setColumns(['org_id' => 'kod', 'site'])
->where('AND')
->equals('ootype', 'vuz')
->equals('deleted', 'n')
->equals('fake', 'n')
->equals('country', 'RU')
->end();
$sql = $builder->write($query);
$sites = $this->niimko->select($sql, $params);
return $sites;
}
/**
* Извлечение сайтов базы данных opendata
* из таблицы miccedu_monitoring.
* @param array $params
* Сайты, у которых устаревшие URL
* @return array
*/
public function getSitesFromMiccedu(array $params) : array
{
/*
SELECT site, vuzkod AS org_id FROM opendata.miccedu_monitoring
WHERE year = 2023 AND (vuzkod = :val1 OR vuzkod = :val2 OR ...)
*/
$builder = new GenericBuilder();
$year = 2023;
foreach ($params as $key => $org) {
$params[$key] = (int)$org['org_id'];
}
$query = $builder->select()
->setTable('miccedu_monitoring')
->setColumns(['org_id' => 'vuzkod','site'])
->where('AND')
->equals('year', $year)
->subWhere('OR');
foreach ($params as $orgId) {
$query->equals('vuzkod', $orgId);
}
$query = $query->end();
$sql = $builder->write($query);
array_unshift($params, $year);
$sites = $this->opendata->select($sql, $params);
return $sites;
}
/**
* Внесение данных численности обучающихся в базу данных opendata
* @param array $contingent
* Массив записей численности по специальностям
* @return void
*/
public function insertContingent(array $contingent) : void
{
/*
INSERT INTO sveden_education_contingent
(org_id, spec_id, spec_code, spec_name, edu_level, edu_forms, contingent)
VALUES
(:v1, :v2, :v3, :v4, :v5, :v6, :v7)
...
*/
$builder = new GenericBuilder();
$countAtributes = count($contingent[0]);
$size = $countAtributes * (count($contingent) - 1);
$query = $builder->insert()
->setTable('sveden_education_contingent')
->setValues(
$contingent[0]
);
$sql = $builder->write($query);
for ($i = $countAtributes; $i <= $size;) {
$sql .= " (:v".(++$i).", :v".(++$i).", :v".(++$i).", :v"
.(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).")\n";
}
$sql = preg_replace('/\)\s*VALUES\s*/', ') VALUES ', $sql);
$sql = preg_replace('/\)\s*\(/', '), (', $sql);
$this->opendata->insert($sql, $contingent);
}
/**
* Публичное получение специальностей
* @return array
*/
public function specialties() : array
{
return $this->specialties ? $this->universities : [];
}
/**
* Публичное получение id вузов, занесенных в базу opendata
* @return array
*/
public function universities() : array
{
return $this->universities ? $this->specialties : [];
}
/**
* Извлечение кодов специальности из базы данных niimko
* @return array
*/
private function getSpecialties() : array
{
/*
SELECT id AS spec_id, kod AS spec_code FROM niimko.s_specs
WHERE oopkodes = 'gos3p'
*/
$builder = new GenericBuilder();
$params = ['gos3p'];
$query = $builder->select()
->setTable('s_specs')
->setColumns(['spec_id' =>'id', 'spec_code' => 'kod'])
->where()
->equals('oopkodes','gos3p')
->end();
$sql = $builder->write($query);
$specialties = $this->niimko->select($sql, $params);
return $specialties;
}
/**
* Извлечение id вузов, занесенных в базу opendata
* @return array
*/
private function getUniversities() : array
{
/*
SELECT DISTINCT org_id FROM sveden_education_contingent
*/
$builder = new GenericBuilder();
$query = $builder->select()
->setTable('sveden_education_contingent')
->setColumns(['org_id'])
->where()
->greaterThan('org_id', 0)
->end();
$sql = $builder->write($query);
$sql = preg_replace("/ WHERE.*/", '', $sql);
$sql = preg_replace('/SELECT/', 'SELECT DISTINCT', $sql);
$universities = $this->opendata->select($sql);
return array_column($universities, 'org_id');
}
/**
* Обновление сайтов в базе данных niimko
* @param array $params
* Массив [['org_id' => val1, 'site' => val1,],...]
* @return void
*/
public function updateSitesOpendata(array $params) : void
{
/*
UPDATE niimko.s_vuzes
SET site = CASE kod
WHEN :v1 THEN :v2
WHEN :v3 THEN :v4
...
ELSE kod
END
WHERE kod IN (:v1, :v2...)
*/
$count = count($params);
for ($i = 0; $i < $count; $i++) {
if ($i % 2 == 0) {
$params[] = $params[$i];
}
}
$sql = "UPDATE niimko.s_vuzes\nSET site = CASE kod\n";
for ($i = 0; $i < $count;) {
$sql .= "WHEN :v".++$i." THEN :v".++$i."\n";
}
$sql .= "ELSE kod\nEND\nWHERE kod in(";
for ($i = $count++; $i < count($params);) {
$sql .= ":v".++$i.",\n";
}
$sql = rtrim($sql,",\n") .")\n";
$this->opendata->update($sql, $params);
}
}

105
ContingentParser/Facade.php Normal file
View File

@ -0,0 +1,105 @@
<?php
namespace ContingentParser;
use ContingentParser\Database\DatabaseFacade;
use ContingentParser\Http\HttpClientFacade;
use ContingentParser\Http\UrlBuilder;
use ContingentParser\Logger\HtmlLogger;
use ContingentParser\Parser\ContingentFacade;
class Facade
{
private DatabaseFacade $databaseFacade;
private HttpClientFacade $httpClientFacade;
private ContingentFacade $contingentFacade;
private UrlBuilder $urlBuilder;
private HtmlLogger $htmlLogger;
/**
* Конструктор
*/
public function __construct()
{
$this->databaseFacade = new DatabaseFacade();
$this->httpClientFacade = new HttpClientFacade();
$this->urlBuilder = new UrlBuilder();
$this->contingentFacade = new ContingentFacade();
$this->htmlLogger = new HtmlLogger('log/html.log');
}
/**
* Получить массив сайтов
* @param array $params
* Массив сайтов, у которых нужны обновиленные URL
* @return array
*/
public function getSites(array $params = []) : array
{
if (empty($params)) {
return $this->databaseFacade->getSitesFromNiimko();
} else {
return $this->databaseFacade->getSitesFromMiccedu($params);
}
}
/**
* Cобирает из микроразметки данные таблицы
* "Информация о численности обучающихся" в разделе "Образование"
* @param array $site
* Сайт содержащий id организации и URL
* @return void
*/
public function collectDataFromContingent(array $site) : void
{
list('org_id' => $orgId, 'site' => $url) = $site;
// Нет URL сайта вуза
if (empty($site)) {
// $httpLogger->log($orgId);
return;
}
// Уже в базе
if (in_array($orgId, $this->databaseFacade->universities())) {
return;
}
$url = $this->urlBuilder->build($url);
print(implode(' ', $site). "\033[0m" . PHP_EOL);
$html = $this->httpClientFacade->processEducationContingentSites(
$url,
$site
);
$contingent = $this->contingentFacade->getContingent(
$html,
$this->databaseFacade->specialties(),
$orgId
);
if (empty($contingent)) {
print("\033[91mNo result\033[0m\n");
$this->htmlLogger->log("$orgId $url");
} else {
if ($this->contingentFacade->isValidContingent($contingent)) {
// Заносим в базу
print_r($contingent);
// $this->databaseFacade->insertContingent($contingent);
} else {
$this->htmlLogger->log("$orgId $url");
print("\033[91mNo result\033[0m\n");
}
}
unset($contingent);
print(PHP_EOL);
}
public function getExclusionSites(string $path) : array
{
$logs = file($path);
$result = [];
foreach ($logs as $log) {
$data = explode(' ', $log);
$result[] = [
'org_id' => $data[2],
'site' => $data[3] ? $data[3] : ''
];
}
return $result;
}
}

View File

@ -0,0 +1,99 @@
<?php
namespace ContingentParser\Http;
use ContingentParser\Logger\HttpLogger;
use CurlHandle;
/**
* Summary of CurlHelper
*/
final class CurlHelper
{
private CurlHandle|bool $curl;
private string $url;
private array $site;
/**
* Коснтруктор
* Инициализация сессии
* @param string $url
* URL сайта
* @param array $site
* Идентификатор организации и базовый URL сайта
*/
public function __construct(string $url, array $site)
{
$this->url = $url;
$this->site = $site;
$this->curl = curl_init();
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curl, CURLOPT_HEADER, true);
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl, CURLOPT_USERAGENT,
'Mozilla/5.0 (X11; Linux x86_64) '
.'AppleWebKit/537.36 (KHTML, like Gecko) '
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'
);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90);
}
/**
* Прекратить сессии
*/
public function __destruct()
{
curl_close($this->curl);
}
/**
* Получить html-разметку
* @return string
*/
public function getContent() : string
{
curl_setopt($this->curl, CURLOPT_URL, $this->url);
$html = curl_exec($this->curl);
if ($this->checkLocation($this->url, $html)) {
$html = $this->getContent();
}
$this->reportError();
return $html;
}
/**
* Summary of checkLocation
* @param string $html
* @return bool
*/
private function checkLocation(string &$url, string $html) : bool
{
preg_match('/location:(.*?)\n/i', $html, $matches);
if (empty($matches)) return false;
$target = $matches[1];
$target = preg_replace("/[^a-z0-9\-:.\/,]/iu", '', $target);
$url = $target ? $target : $url;
return $target ? true : false;
}
/**
* Сообщить об ошибке
* @return void
*/
private function reportError() : void
{
$httpLogger = new HttpLogger('log/http-curl.log');
$httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
if ($httpCode != 200 && $httpCode != 0) {
print("\033[91mHTTP-code: $httpCode\033[0m\n");
$message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')';
$httpLogger->log($message, $httpCode);
} else if ($httpCode == 0) {
$errno = curl_errno($this->curl);
$message = implode(' ', $this->site);
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
$httpLogger->log($message);
} else {
print("\033[94mHTTP-code: $httpCode\033[0m\n");
}
}
}

View File

@ -0,0 +1,110 @@
<?php
namespace ContingentParser\Http;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\Psr7\Exception\MalformedUriException;
use GuzzleHttp\TransferStats;
final class HttpClientFacade
{
private Client $client;
private array $config;
public function __construct() {}
/**
* Обработка численности обучающихся
* @param string $url
* URL сайта
* @param array $site
* Идентификатор организации, и базовый URL
* @return string
*/
public function processEducationContingentSites(
string $url,
array $site
) : string {
try {
$client = $this->createClient($url);
// Запрос по базовому uri
$response = $client->get('', [
'on_stats' => function (TransferStats $stats) use (&$redirectUrl) {
$redirectUrl = $stats->getEffectiveUri();
}
]);
print("Redirect $url -> $redirectUrl" . PHP_EOL);
$url .= substr($url, -1) == '/' ? '':'/';
$url .= "sveden/education/";
print("Parsing for $url" . PHP_EOL);
$response = $client->get($url);
$httpCode = $response->getStatusCode();
print("\033[94mHTTP-code: $httpCode\033[0m\n");
$html = $response->getBody()->getContents();
} catch (ClientException
| RequestException
| ConnectException
| ServerException
| MalformedUriException $e
) {
print("\033[91mHTTP-code: ". $e->getCode(). "\033[0m\n");
$html = $this->handleException($url, $site);
} finally {
return $html;
}
}
/**
* Обработка исключения
* Повторная попытка спомощью CurlHelper
* @param string $url
* URL сайта
* @param array $site
* @return string
*/
private function handleException(string $url, array $site) : string
{
$curlHelper = new CurlHelper($url, $site);
return $curlHelper->getContent();
}
/**
* Создать клиента с базовым URL
* @param string $url
* @return \GuzzleHttp\Client
*/
private function createClient(string $url) : Client
{
$this->config = $this->config() + ["base_uri" => $url];
return new Client($this->config);
}
/**
* Конфигурация клиента
* @return array
*/
private function config() : array
{
return [
'force_ip_resolve' => 'v4',
'debug' => fopen("log/debug-http.log", "w"),
'allow_directs' => [
'max' => 5,
'strict' => true,
'referer' => true,
'protocols' => ['http', 'https'],
'track_redirects' => true
],
'connect_timeout' => 300.0,
'verify' => false,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) '
.'AppleWebKit/537.36 (KHTML, like Gecko) '
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
'Content-Type' => 'text/html;charset=utf-8'
]
];
}
}

View File

@ -0,0 +1,30 @@
<?php
namespace ContingentParser\Http;
class UrlBuilder
{
public function __construct() {}
/**
* Строит валидный URL сайта
* @param string $url
* Изначальный URL
* @return string
*/
public function build(string $url) : string
{
// Строит -> https://<base_uri>
$url = trim(strtolower($url));
$url = preg_replace('/\s+/', '', $url);
$url = str_replace("www/", "www.", $url);
$url = str_replace("http:\\\\", "", $url);
if (!preg_match('#^https?://#', $url)) {
$url = "http://$url";
}
// $url = str_replace("http://", "https://", $url);
$arr = parse_url($url);
$url = $arr['scheme'] . '://' . $arr['host'] . '/';
// $url = str_replace("www.", "", $url);
$url = str_replace("_", "/", $url);
return trim($url);
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace ContingentParser\Logger;
final class DatabaseLogger extends Logger
{
public function log(string $message) : void
{
$date = date('Y-m-d H:i:s');
$logMessage = "[$date] $message\n";
file_put_contents($this->_path, $logMessage, FILE_APPEND);
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace ContingentParser\Logger;
class HtmlLogger extends Logger
{
public function log(string $message) : void
{
$date = date('Y-m-d H:i:s');
$logMessage = "[$date] $message\n";
file_put_contents($this->_path, $logMessage, FILE_APPEND);
}
}

View File

@ -0,0 +1,70 @@
<?php
namespace ContingentParser\Logger;
final class HttpLogger extends Logger
{
private const ARR_HTTP_STATUS_CODE = array(
100 => 'Continue',
101 => 'Switching Protocols',
200 => 'OK',
201 => 'Created',
202 => 'Accepted',
203 => 'Non-Authoritative Information',
204 => 'No Content',
205 => 'Reset Content',
206 => 'Partial Content',
300 => 'Multiple Choices',
301 => 'Moved Permanently',
302 => 'Found',
303 => 'See Other',
304 => 'Not Modified',
305 => 'Use Proxy',
306 => 'Switch Proxy',
307 => 'Temporary Redirect',
400 => 'Bad Request',
401 => 'Unauthorized',
402 => 'Payment Required',
403 => 'Forbidden',
404 => 'Not Found',
405 => 'Method Not Allowed',
406 => 'Not Acceptable',
407 => 'Proxy Authentication Required',
408 => 'Request Timeout',
409 => 'Conflict',
410 => 'Gone',
411 => 'Length Required',
412 => 'Precondition Failed',
413 => 'Payload Too Large',
414 => 'URI Too Long',
415 => 'Unsupported Media Type',
416 => 'Range Not Satisfiable',
417 => 'Expectation Failed',
418 => 'I\'m a teapot',
429 => 'Too Many Requests',
451 => 'Unavailable For Legal Reasons',
500 => 'Internal Server Error',
501 => 'Not Implemented',
502 => 'Bad Gateway',
503 => 'Service Unavailable',
504 => 'Gateway Timeout',
505 => 'HTTP Version Not Supported',
506 => 'Variant Also Negotiates',
507 => 'Insufficient Storage',
508 => 'Loop Detected',
509 => 'Bandwidth Limit Exceeded',
510 => 'Not Extended',
511 => 'Network Authentication Required'
);
public function log(string $message, int $httpCode = null) : void
{
$date = date('Y-m-d H:i:s');
if (empty($httpCode)) {
$logMessage = "[$date] $message\n";
file_put_contents($this->_path, $logMessage, FILE_APPEND);
} else {
$logMessage = "[$date] $message "
.self::ARR_HTTP_STATUS_CODE[$httpCode]."\n";
file_put_contents($this->_path, $logMessage, FILE_APPEND);
}
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace ContingentParser\Logger;
abstract class Logger
{
protected string $_path;
public function __construct(string $path)
{
$this->_path = $path;
}
}

View File

@ -0,0 +1,71 @@
<?php
namespace ContingentParser\Parser;
class ContingentFacade
{
/**
* Получить данные о численности
* @param string $html
* Разметка сайта вуза
* @param mixed $specialties
* Массив специальностей
* @param int $orgId
* Идентификатор организации
* @return array
*/
public function getContingent(
string $html,
array $specialties,
int $orgId
) : array {
$parser = new ContingentParser($html);
$contingent = $parser->getDataTable();
$this->addSpecId($contingent, $specialties);
$this->addOrgId($contingent, $orgId);
return $contingent;
}
/**
* Проверка на валидность записи численнести
* @param array $contingent
* Массив численности по специальностям
* @return bool
*/
public function isValidContingent(array $contingent) : bool
{
$count = 0;
foreach ($contingent as $value) {
$count += $value['contingent'];
}
return $count ? true : false;
}
/**
* Добавить идентификатор специальности в запись численности
* @param array $contingent
* Массив численности по специальностям
* @param array $specialties
* Массив специальностей
* @return void
*/
private function addSpecId(array &$contingent, array $specialties) : void
{
$specIdMap = array_column($specialties, 'spec_id', 'spec_code');
foreach ($contingent as $key => $con) {
$contingent[$key]['spec_id'] = $specIdMap[$con['spec_code']] ?? null;
}
}
/**
* Добавить идентификатор организации в запись численности
* @param array $contingent
* Массив численности по специальностям
* @param int $orgId
* Идентифиактор организации
* @return void
*/
private function addOrgId(array &$contingent, int $orgId): void
{
foreach ($contingent as &$con) {
$con['org_id'] = $orgId;
}
}
}

View File

@ -1,71 +1,77 @@
<?php
namespace App\Library;
namespace ContingentParser\Parser;
use App\Library\ContingentRow;
use DOMDocument;
use DOMXPath;
class ContingentParser
{
private \DOMXPath $xpath;
private string $template;
private ?DOMXPath $xpath;
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
private const ENCODING = "UTF-8";
private const FIELDS = [
"eduCode" => "td",
"eduName" => "td",
"eduLevel" => "td",
"eduForm" => "td",
// "numberBF" => "th",
// "numberBFF" => "th",
// "numberBR" => "th",
// "numberBRF" => "th",
// "numberBM" => "th",
// "numberBMF" => "th",
// "numberP" => "th",
// "numberPF" => "th",
"numberAll" => ["th", "td"]
];
public function __construct(string $html, string $template)
public function __construct(string $html)
{
libxml_use_internal_errors(true);
$dom = new \DOMDocument(
// encoding: "UTF-8"
$dom = new DOMDocument(
encoding: self::ENCODING
);
if (empty($html)) {
$this->xpath = null;
} else {
$this->setEncoding($html);
$dom->loadHTML($html);
$this->xpath = new DOMXPath($dom);
}
}
private function setEncoding(string &$html) : void
{
$encoding = mb_detect_encoding($html, 'UTF-8, windows-1251');
if ($encoding != "UTF-8") {
if ($encoding != self::ENCODING) {
$html = mb_convert_encoding(
$html,
'UTF-8',
self::ENCODING,
$encoding
);
$html = str_replace('windows-1251','utf-8', $html);
$html = str_replace('windows-1251',self::ENCODING, $html);
}
$dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8'));
$this->xpath = new \DOMXPath($dom);
$this->template = $template;
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
}
private function parse(): array
private function parse() : array
{
$data = array();
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query($this->template . $tag . "[@itemprop=\"$field\"]");
$data[$field] = $this->xpath->query(
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
);
} else {
$th = $this->xpath->query($this->template . $tag[0] . "[@itemprop=\"$field\"]");
$td = $this->xpath->query($this->template . $tag[1] . "[@itemprop=\"$field\"]");
$th = $this->xpath->query(
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
);
$td = $this->xpath->query(
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
);
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getDataTable() : array
{
if (empty($this->xpath)) return [];
$data = $this->parse();
$records = array();
// var_dump($data);
// exit(0);
$records = [];
if ($data == null) return [];
$equal = $data['eduName']->length;

View File

@ -1,6 +1,6 @@
<?php
namespace App\Library;
// Специальность, направление подготовки
namespace ContingentParser\Parser;
class ContingentRow
{
public function __construct(

View File

@ -1,37 +0,0 @@
<?php
use App\Library\ContingentManager;
use App\Library\Database;
use App\Library\DatabaseConfig;
use Symfony\Component\Yaml\Yaml;
// При ошибке в запросе к БД. Распарсингованные данные
// сохраняются в yaml-файле. Скрипт парсит этот файл и заносит в БД
require_once "vendor/autoload.php";
function array_depth(array $array)
{
$max_depth = 1;
foreach ($array as $value) {
if (is_array($value)) {
$depth = array_depth($value) + 1;
if ($depth > $max_depth) {
$max_depth = $depth;
}
}
}
return $max_depth;
}
$data = Yaml::parse(file_get_contents(__DIR__ . '/not-recorded-in-db.yaml'));
$db = new Database(new DatabaseConfig('opendata'));
// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
if(array_depth($data) == 2){
ContingentManager::getInstance()->insertContingent($db, $data);
} else {
foreach ($data as $value) {
ContingentManager::getInstance()->insertContingent($db, $value);
}
}

View File

@ -1,215 +0,0 @@
<?php
use App\Library\ContingentManager;
use App\Library\DatabaseConfig;
use App\Library\Logger;
use GuzzleHttp\Psr7\Exception\MalformedUriException;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use App\Library\ContingentParser;
use App\Library\Database;
use GuzzleHttp\Client;
use GuzzleHttp\TransferStats;
use Symfony\Component\Yaml\Yaml;
$pathLogErrorHtml = __DIR__.'/../log/'. date('Y-m-d') . '/error-html.log';
$pathLogErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http-curl.log';
$pathErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http.log';
$dbOpendata = new Database(new DatabaseConfig('opendata'));
$dbNiimko = new Database(new DatabaseConfig('niimko'));
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
// $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
// $exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('select-html-error.log');
// $exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
// print_r($exceptionsOrgHttpCurl);
// echo count($exceptionsOrgHttpCurl) . " - http-error sites" . PHP_EOL;
// $start = 794;
// for ($i = $start; $i < count($sites); $i++) {
// // Нет URL сайта вуза
// if (empty($sites[$i]['site'])) {
// $message = implode(' ', $sites[$i]);
// Logger::log($pathLogErrorHttp, $message);
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
// continue;
// }
// // Уже в базе
// if (in_array($sites[$i]['org_id'], $orgs)) {
// continue;
// }
// // С ошибками разметки игнорируем
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
// continue;
// }
// // Без ошибок http игнорируем
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
// continue;
// }
// try {
// $baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
// $client = new Client([
// 'force_ip_resolve' => 'v4',
// 'debug' => fopen("debug-http.log", "a"),
// 'base_uri' => $baseUri,
// 'allow_directs' => [
// 'max' => 5,
// 'strict' => true,
// 'referer' => true,
// 'protocols' => ['http', 'https'],
// 'track_redirects' => true
// ],
// 'connect_timeout' => 300.0,
// 'verify' => false,
// 'headers' => [
// 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
// 'Content-Type' => 'text/html;charset=utf-8'
// ]
// ]);
// $response = $client->get('', [
// 'on_stats' => function (TransferStats $stats) use (&$url) {
// $url = $stats->getEffectiveUri();
// }
// ]);
// print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
// if (substr($url, -1) == '/') {
// $url = $url."sveden/education/";
// } else {
// $url = $url."/sveden/education/";
// }
// echo $url .PHP_EOL;
// $response = $client->get($url, [
// 'on_stats' => function (TransferStats $stats) use (&$url) {
// $url = $stats->getEffectiveUri();
// }
// ]);
// echo $url . PHP_EOL;
// $html = $response->getBody()->getContents();
// if (empty($html)) {
// $message = implode(' ', $sites[$i]);
// Logger::log($pathLogErrorHtml, $message);
// continue;
// }
// $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
// $contingent = $parser->getDataTable();
// // Добавляем поле spec_id по spec_code
// ContingentManager::getInstance()->addSpecId($contingent, $specializations);
// // Добавляем поле org_id
// ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
// print_r($contingent);
// if (empty($contingent)) {
// $message = implode(' ', $sites[$i]);
// Logger::log($pathLogErrorHtml, $message);
// } else {
// $set = ContingentManager::getInstance()->checkContingent($contingent);
// if ($set) {
// // Заносим в базу
// ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
// } else {
// $message = implode(' ', $sites[$i]);
// Logger::log($pathLogErrorHtml, $message);
// }
// unset($contingent);
// }
// } catch (ClientException
// | RequestException
// | ConnectException
// | ServerException
// | MalformedUriException $e
// ) {
// $message = implode(' ', $sites[$i]) . " " . $e->getCode() . " " . $e->getMessage();
// Logger::log($pathLogErrorHttp, $message);
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
// }
// }
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
// print_r($sites);
for ($i = 0; $i < count($sites); $i++) {
// Нет URL сайта вуза
if (empty($sites[$i]['site'])) {
// $message = implode(' ', $sites[$i]);
// Logger::log($pathLogErrorHttp, $message);
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
continue;
}
// Уже в базе
if (in_array($sites[$i]['org_id'], $orgs)) {
continue;
}
// С ошибками разметки игнорируем
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
// continue;
// }
// Без ошибок http игнорируем
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
// continue;
// }
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
$uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']));
$uri = str_replace("_","/", $uri);
if (substr($uri, -1) == '/') {
$uri = $uri."sveden/education/";
} else {
$uri = $uri."/sveden/education/";
}
echo $uri . PHP_EOL;
$ch = curl_init($uri);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($httpCode != 200) {
$errno = curl_errno($ch);
$message = implode(' ', $sites[$i]);
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
Logger::log($pathLogErrorHttp, $message);
unset($httpCode);
continue;
}
curl_close($ch);
echo "HTTP-code: " . $httpCode . PHP_EOL;
if (empty($html)) continue;
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
$contingent = $parser->getDataTable();
// Добавляем поле spec_id по spec_code
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
// Добавляем поле org_id
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
if (empty($contingent)) {
echo "empty". PHP_EOL;
$message = implode(' ', $sites[$i]);
Logger::log($pathLogErrorHtml, $message);
} else {
print_r($contingent);
$set = ContingentManager::getInstance()->checkContingent($contingent);
if ($set) {
// Заносим в базу
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
} else {
$message = implode(' ', $sites[$i]);
Logger::log($pathLogErrorHtml, $message);
}
unset($contingent);
unset($httpCode);
}
}

View File

@ -1,187 +0,0 @@
<?php
namespace App\Library;
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
final class ContingentManager
{
private static ?ContingentManager $instance;
private ?GenericBuilder $builder;
private function __construct()
{
$this->builder = new GenericBuilder();
}
public static function getInstance() : ContingentManager
{
self::$instance ??= new self();
return self::$instance;
}
public function getSites(Database $db): array
{
// select kod as org_id, site from niimko.s_vuzes
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
$params = ['vuz', 'n', 'n', 'RU'];
$query = $this->builder->select()
->setTable('s_vuzes')
->setColumns(['org_id' => 'kod', 'site'])
->where('AND')
->equals('ootype', 'vuz')
->equals('deleted', 'n')
->equals('fake', 'n')
->equals('country', 'RU')
->end();
$sql = $this->builder->write($query);
$sites = $db->selectQuery($sql, $params);
return $sites;
}
public function getSitesFromMiccedu(Database $db, array $params) : array
{
// select site, vuzkod as org_id from opendata.miccedu_monitoring
// where year = 2023 and (vuzkod = :val1 or vuzkod = :val2 or ...)
$year = 2023;
foreach ($params as $key => $org) {
$params[$key] = (int)$org['org_id'];
}
$query = $this->builder->select()
->setTable('miccedu_monitoring')
->setColumns(['org_id' => 'vuzkod','site'])
->where('AND')
->equals('year', $year)
->subWhere('OR');
foreach ($params as $orgId) {
$query->equals('vuzkod', $orgId);
}
$query = $query->end();
$sql = $this->builder->writeFormatted($query);
array_unshift($params, $year);
$sites = $db->selectQuery($sql, $params);
return $sites;
}
public function insertContingent(Database $db, array $contingent) : void
{
$params = ['spec_code', 'spec_name', 'edu_level', 'edu_forms', 'contingent', 'spec_id', 'org_id'];
$sql = "insert into sveden_education_contingent"
."(". implode(',', $params) .") values";
for ($i = 0; $i < count($contingent); $i++) {
$sql .= "(";
foreach ($contingent[$i] as $key => $value) {
$sql .= ":$key". ($i+1).",";
}
$sql = substr_replace($sql,"),", -1);
}
$sql = substr_replace($sql,"", -1);
$db->insertQuery($sql, $contingent);
}
public function getSpecializations(Database $db) : array
{
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
$params = ['gos3p'];
$query = $this->builder->select()
->setTable('s_specs')
->setColumns(['id', 'kod'])
->where()
->equals('oopkodes','gos3p')
->end();
$sql = $this->builder->write($query);
$specializations = $db->selectQuery($sql, $params);
return $specializations;
}
public function getOrgs(Database $db) : array
{
$sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent';
$org = $db->selectQuery($sql);
$orgs = [];
foreach ($org as $o) {
$orgs[] = $o['org_id'];
}
return $orgs;
}
public function buildBaseUri(string $url): string
{
// Строит -> https://<base_uri>
$url = str_replace("www/", "www.", $url);
if (strpos($url,'https://') === false
&& strpos($url,'http://') === false
) {
$url = "http://$url";
}
$url = str_replace("https://", "http://", $url);
$arr = parse_url($url);
$url = $arr['scheme'] .'://'. $arr['host'] . '/';
// $url = str_replace("www.", "", $url);
$url = str_replace("_","/", $url);
$url = $url."sveden/education/";
return trim($url);
}
public function addSpecId(array &$contingent, array $specializations) : void
{
foreach ($contingent as $key => $con) {
$buf = null;
$needle = $con['spec_code'];
foreach ($specializations as $spec) {
if ($needle == $spec['kod']) {
$buf = $spec['id'];
}
}
$contingent[$key]['spec_id'] = $buf;
unset($buf);
}
}
public function addOrgId(array &$contingent, int $orgId) : void
{
for($i = 0; $i < count($contingent); $i++) {
$contingent[$i]['org_id'] = $orgId;
}
}
public function checkContingent(array $contingent) : bool
{
$count = 0;
foreach ($contingent as $value) {
$count += $value['contingent'];
}
return $count ? true : false;
}
public function getExceptionsHtml(string $filename) : array
{
$errorSites = [];
$array = file($filename);
for ($i = 0; $i < count($array); $i++) {
$arr = explode(' ', $array[$i]);
$errorSites[] = $arr[2];
}
return $errorSites;
}
public function getExceptionsHttpCurl(string $filename) : array
{
$array = file($filename);
$orgHttpError = [];
foreach ($array as $str) {
$data = explode (' ', $str);
if (preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/", $data[0])
&& $data[3] != PHP_EOL) {
// $orgHttpError[] = $data[2];
$orgHttpError[] = ['org_id' => $data[2], 'site' => $data[3]];
}
}
// $orgHttpError = array_unique($orgHttpError);
ksort($orgHttpError);
return $orgHttpError;
}
}

View File

@ -1,84 +0,0 @@
<?php
namespace App\Library;
use App\Library\DatabaseConfig;
use App\Library\Logger;
use PDOException;
use PDO;
use Symfony\Component\Yaml\Yaml;
class Database
{
private PDO $pdo;
private static $logFile = 'database.log';
private DatabaseConfig $config;
public function __construct(DatabaseConfig $config)
{
$this->config = $config;
try {
$dsn = $this->config->getDsn();
$username = $this->config->getUsername();
$password = $this->config->getPassword();
$this->pdo = new PDO(
$dsn,
$username,
$password,
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
$message = "Подключение к ". $this->config->getDBName() ." успешно!";
Logger::log(self::$logFile, $message);
} catch (PDOException $e) {
$message = "Ошибка подключения к ". $this->config->getDBName() .": " . $e->getMessage();
Logger::log(self::$logFile, $message);
}
}
public function __destruct()
{
$message = "Подключение к ". $this->config->getDBName() ." прервано!";
Logger::log(self::$logFile, $message);
}
public function selectQuery(string $sql, array $params = []) : array
{
try {
$stmt = $this->pdo->prepare($sql);
// $params = array_values($params);
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":v".$i+1, $params[$i]);
}
$stmt->execute();
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (PDOException $e) {
$message = "Ошибка запроса: " . $e->getMessage();
Logger::log(self::$logFile, $message);
} finally {
return $array;
}
}
public function insertQuery(string $sql, array $params)
{
try {
$stmt = $this->pdo->prepare($sql);
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":spec_code".$i+1, $params[$i]['spec_code']);
$stmt->bindParam(":spec_name".$i+1, $params[$i]['spec_name']);
$stmt->bindParam(":edu_forms".$i+1, $params[$i]['edu_forms']);
$stmt->bindParam(":edu_level".$i+1, $params[$i]['edu_level']);
$stmt->bindParam(":contingent".$i+1, $params[$i]['contingent']);
$stmt->bindParam(":org_id".$i+1, $params[$i]['org_id']);
$stmt->bindParam(":spec_id".$i+1, $params[$i]['spec_id']);
}
$stmt->execute();
Logger::log(self::$logFile, "Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
Logger::log(self::$logFile, $message);
// При ошибке запроса сохраняем валидные данные в yaml-файл
if ($e->getCode() === "HY000") {
$yaml = Yaml::dump($params);
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
}
}
}
}

View File

@ -1,54 +0,0 @@
<?php
namespace App\Library;
class DatabaseConfig
{
private string $driver;
private string $host;
private string $dbname;
private string $port;
private string $charset;
private string $username;
private string $password;
public function __construct(string $db)
{
$envVars = parse_ini_file('.env', true);
$db = strtoupper($db);
foreach ($envVars as $dbname => $dbconfig) {
if ($dbname == $db) {
$config = $dbconfig;
}
}
$this->driver = $config['DB_DRIVER'];
$this->host = $config['DB_HOST'];
$this->dbname = $config['DB_NAME'];
$this->port = $config['DB_PORT'];
$this->charset = $config["DB_CHARSET"];
$this->username = $config['DB_USERNAME'];
$this->password = $config['DB_PASSWORD'];
}
public function getDBName(): string
{
return $this->dbname;
}
public function getDsn() : string
{
return $this->driver.":host=".$this->host
.";dbname=".$this->dbname
.";charset=".$this->charset
.";port=".$this->port;
}
public function getUsername() : string
{
return $this->username;
}
public function getPassword() : string
{
return $this->password;
}
}

View File

@ -1,11 +0,0 @@
<?php
namespace App\Library;
class Logger
{
public static function log($path, $message)
{
$log = date('Y-m-d H:i:s') . ' ' . $message;
file_put_contents($path, $log . PHP_EOL, FILE_APPEND);
}
}

View File

@ -1,27 +0,0 @@
<?php
// Численность обучающихся
// по разным формам бюджета
class Size
{
// Всего [обучающихся]
private int $all;
// Из низ иностранные граждане
private int $foreigners;
public function __construct() {}
public function update(
int|string $all,
int|string $foreigners
): void {
$this->all = (int)$all;
$this->foreigners = (int)$foreigners;
}
public function getData(): array{
return [
"Всего" => $this->all,
"Из них численность обучающихся,
являющихся иностранными гражданами" => $this->foreigners
];
}
}

View File

@ -1,13 +1,12 @@
{
"require": {
"guzzlehttp/guzzle": "^7.0",
"nilportugues/sql-query-builder": "^1.8",
"symfony/yaml": "^7.1"
"symfony/yaml": "^7.1",
"nilportugues/sql-query-builder": "^1.8"
},
"autoload": {
"psr-4": {
"App\\": "app/",
"App\\Library\\": "app/library"
"ContingentParser\\": "ContingentParser/"
}
}
}

2
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "171744ca730de2b7a0ee21a1d982266c",
"content-hash": "03ebe40648d54e718198609886034355",
"packages": [
{
"name": "guzzlehttp/guzzle",

View File

@ -1,11 +0,0 @@
create table sveden_education_contingent
(
id serial not null primary key,
org_id int null,
spec_id int null,
spec_code varchar(100) null comment 'Код',
spec_name text null comment 'Наименование',
edu_level text null comment 'Уровень образования',
edu_forms text null comment 'Формы обучения',
contingent int not null comment 'Общая численность обучающихся'
);

View File

@ -0,0 +1,10 @@
CREATE TABLE sveden_education_contingent(
id SERIAL NOT NULL PRIMARY KEY,
org_id INT NULL,
spec_id INT NULL,
spec_code VARCHAR(100) NULL COMMENT 'Код',
spec_name TEXT NULL COMMENT 'Наименование',
edu_level TEXT NULL COMMENT 'Уровень образования',
edu_forms TEXT NULL COMMENT 'Формы обучения',
contingent INT NOT NULL COMMENT 'Общая численность обучающихся'
);

View File

@ -1,109 +0,0 @@
<?php
use App\Library\ContingentManager;
use App\Library\ContingentParser;
use App\Library\Database;
use App\Library\DatabaseConfig;
use App\Library\Logger;
use Symfony\Component\Yaml\Yaml;
require_once(dirname(__FILE__) ."/vendor/autoload.php");
// require_once(dirname(__FILE__) ."/test.php");
$pathLogErrorHttp = __DIR__.'/log/'. date('Y-m-d') . '/error-http-curl.log';
$pathLogErrorHtml = __DIR__.'/log/'. date('Y-m-d') . '/error-html.log';
// $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
// print_r($sites);
$dbOpendata = new Database(new DatabaseConfig('opendata'));
$dbNiimko = new Database(new DatabaseConfig('niimko'));
// $sites = ContingentManager::getInstance()->getSites($dbNiimko);
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
$sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-28/error-http-curl.log');
$sites = ContingentManager::getInstance()->getSitesFromMiccedu($dbOpendata, $sites);
// print_r($sites);
for ($i = 0; $i < count($sites); $i++) {
// Нет URL сайта вуза
if (empty($sites[$i]['site'])) {
// $message = implode(' ', $sites[$i]);
Logger::log($pathLogErrorHttp, $message);
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
continue;
}
// Уже в базе
if (in_array($sites[$i]['org_id'], $orgs)) {
continue;
}
// С ошибками разметки игнорируем
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
// continue;
// }
// Без ошибок http игнорируем
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
// continue;
// }
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
$uri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
echo $uri . PHP_EOL;
$ua = 'Mozilla/5.0 (X11; Linux x86_64) '
.'AppleWebKit/537.36 (KHTML, like Gecko) '
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36';
// $html = get_content($uri);
$ch = curl_init($uri);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, $ua);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 90);
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($httpCode != 200 && $httpCode != 0) {
$message = implode(' ', $sites[$i]) . ' ' . $httpCode;
Logger::log($pathLogErrorHttp, $message);
unset($httpCode);
continue;
} else if ($httpCode == 0) {
$errno = curl_errno($ch);
$message = implode(' ', $sites[$i]);
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
Logger::log($pathLogErrorHttp, $message);
unset($httpCode);
continue;
}
curl_close($ch);
echo "HTTP-code: " . $httpCode . PHP_EOL;
if (empty($html)) continue;
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
$contingent = $parser->getDataTable();
// Добавляем поле spec_id по spec_code
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
// Добавляем поле org_id
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
if (empty($contingent)) {
echo "empty". PHP_EOL;
$message = implode(' ', $sites[$i]);
Logger::log($pathLogErrorHtml, $message);
} else {
print_r($contingent);
$set = ContingentManager::getInstance()->checkContingent($contingent);
if ($set) {
// Заносим в базу
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
} else {
$message = implode(' ', $sites[$i]);
Logger::log($pathLogErrorHtml, $message);
}
unset($contingent);
unset($httpCode);
}
}

View File

@ -1,3 +0,0 @@
<?php
require_once 'vendor/autoload.php';
require_once 'app/app.php';

14
main.php Normal file
View File

@ -0,0 +1,14 @@
<?php
use Symfony\Component\Yaml\Yaml;
require_once(dirname(__FILE__) ."/vendor/autoload.php");
use ContingentParser\Facade;
$facade = new Facade();
// $sites = $facade->getSites();
$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
for ($i = 0; $i < count($sites); $i++) {
print("\033[92m" . ($i+1). ' ');
$facade->collectDataFromContingent($sites[$i]);
}

15
test.php Normal file
View File

@ -0,0 +1,15 @@
<?php
use ContingentParser\Database\DatabaseFacade;
use ContingentParser\Facade;
require_once(dirname(__FILE__) ."/vendor/autoload.php");
$data = [];
$file = file(dirname(__FILE__) ."/update-sities.log");
foreach ($file as $line) {
$line = explode(" ", $line);
$data[] = $line[0];
$data[] = trim($line[1]);
}
$databaseFacade = new DatabaseFacade();
$databaseFacade->updateSitesOpendata($data);

2
vendor/autoload.php vendored
View File

@ -22,4 +22,4 @@ if (PHP_VERSION_ID < 50600) {
require_once __DIR__ . '/composer/autoload_real.php';
return ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1::getLoader();
return ComposerAutoloaderInit03ebe40648d54e718198609886034355::getLoader();

View File

@ -8,13 +8,12 @@ $baseDir = dirname($vendorDir);
return array(
'Symfony\\Polyfill\\Ctype\\' => array($vendorDir . '/symfony/polyfill-ctype'),
'Symfony\\Component\\Yaml\\' => array($vendorDir . '/symfony/yaml'),
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'),
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-message/src', $vendorDir . '/psr/http-factory/src'),
'Psr\\Http\\Client\\' => array($vendorDir . '/psr/http-client/src'),
'NilPortugues\\Sql\\QueryFormatter\\' => array($vendorDir . '/nilportugues/sql-query-formatter/src'),
'NilPortugues\\Sql\\QueryBuilder\\' => array($vendorDir . '/nilportugues/sql-query-builder/src'),
'GuzzleHttp\\Psr7\\' => array($vendorDir . '/guzzlehttp/psr7/src'),
'GuzzleHttp\\Promise\\' => array($vendorDir . '/guzzlehttp/promises/src'),
'GuzzleHttp\\' => array($vendorDir . '/guzzlehttp/guzzle/src'),
'App\\Library\\' => array($baseDir . '/app/library'),
'App\\' => array($baseDir . '/app'),
'ContingentParser\\' => array($baseDir . '/ContingentParser'),
);

View File

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1
class ComposerAutoloaderInit03ebe40648d54e718198609886034355
{
private static $loader;
@ -24,16 +24,16 @@ class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1
require __DIR__ . '/platform_check.php';
spl_autoload_register(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(__DIR__));
spl_autoload_unregister(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader'));
require __DIR__ . '/autoload_static.php';
call_user_func(\Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::getInitializer($loader));
call_user_func(\Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::getInitializer($loader));
$loader->register(true);
$filesToLoad = \Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$files;
$filesToLoad = \Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::$files;
$requireFile = \Closure::bind(static function ($fileIdentifier, $file) {
if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) {
$GLOBALS['__composer_autoload_files'][$fileIdentifier] = true;

View File

@ -4,7 +4,7 @@
namespace Composer\Autoload;
class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
class ComposerStaticInit03ebe40648d54e718198609886034355
{
public static $files = array (
'7b11c4dc42b3b3023073cb14e519683c' => __DIR__ . '/..' . '/ralouphie/getallheaders/src/getallheaders.php',
@ -35,10 +35,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
'GuzzleHttp\\Promise\\' => 19,
'GuzzleHttp\\' => 11,
),
'A' =>
'C' =>
array (
'App\\Library\\' => 12,
'App\\' => 4,
'ContingentParser\\' => 17,
),
);
@ -53,8 +52,8 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
),
'Psr\\Http\\Message\\' =>
array (
0 => __DIR__ . '/..' . '/psr/http-factory/src',
1 => __DIR__ . '/..' . '/psr/http-message/src',
0 => __DIR__ . '/..' . '/psr/http-message/src',
1 => __DIR__ . '/..' . '/psr/http-factory/src',
),
'Psr\\Http\\Client\\' =>
array (
@ -80,13 +79,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
array (
0 => __DIR__ . '/..' . '/guzzlehttp/guzzle/src',
),
'App\\Library\\' =>
'ContingentParser\\' =>
array (
0 => __DIR__ . '/../..' . '/app/library',
),
'App\\' =>
array (
0 => __DIR__ . '/../..' . '/app',
0 => __DIR__ . '/../..' . '/ContingentParser',
),
);
@ -97,9 +92,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
public static function getInitializer(ClassLoader $loader)
{
return \Closure::bind(function () use ($loader) {
$loader->prefixLengthsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixLengthsPsr4;
$loader->prefixDirsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixDirsPsr4;
$loader->classMap = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$classMap;
$loader->prefixLengthsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixLengthsPsr4;
$loader->prefixDirsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixDirsPsr4;
$loader->classMap = ComposerStaticInit03ebe40648d54e718198609886034355::$classMap;
}, null, ClassLoader::class);
}

View File

@ -1,9 +1,9 @@
<?php return array(
'root' => array(
'name' => '__root__',
'pretty_version' => 'dev-main',
'version' => 'dev-main',
'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f',
'pretty_version' => '1.0.0+no-version-set',
'version' => '1.0.0.0',
'reference' => null,
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
@ -11,9 +11,9 @@
),
'versions' => array(
'__root__' => array(
'pretty_version' => 'dev-main',
'version' => 'dev-main',
'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f',
'pretty_version' => '1.0.0+no-version-set',
'version' => '1.0.0.0',
'reference' => null,
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),