Все переписано на фасады
This commit is contained in:
parent
74ba48620c
commit
48b4d6cccf
132
ContingentParser/Database/Database.php
Normal file
132
ContingentParser/Database/Database.php
Normal file
@ -0,0 +1,132 @@
|
||||
<?php
|
||||
namespace ContingentParser\Database;
|
||||
|
||||
use ContingentParser\Logger\DatabaseLogger;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
use PDOException;
|
||||
use PDO;
|
||||
|
||||
final class Database
|
||||
{
|
||||
private PDO $_pdo;
|
||||
private static $_logFile = 'log/database.log';
|
||||
private DatabaseConfig $_databaseConfig;
|
||||
private DatabaseLogger $_logger;
|
||||
/**
|
||||
* Конструктор
|
||||
* @param \ContingentParser\Database\DatabaseConfig $config
|
||||
* Конфигурация подключения к базе данных
|
||||
*/
|
||||
public function __construct(DatabaseConfig $config)
|
||||
{
|
||||
$this->_logger = new DatabaseLogger(self::$_logFile);
|
||||
$this->_databaseConfig = $config;
|
||||
try {
|
||||
$dsn = $this->_databaseConfig->getDsn();
|
||||
$username = $this->_databaseConfig->getUsername();
|
||||
$password = $this->_databaseConfig->getPassword();
|
||||
$this->_pdo = new PDO(
|
||||
$dsn,
|
||||
$username,
|
||||
$password,
|
||||
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
|
||||
);
|
||||
$message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!";
|
||||
$this->_logger->log($message);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}";
|
||||
$this->_logger->log($message);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Сообщение о разрыве соединения
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
$message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!";
|
||||
$this->_logger->log($message);
|
||||
}
|
||||
/**
|
||||
* Выборка данных из базы
|
||||
* @param string $sql
|
||||
* SQL-запрос
|
||||
* @param array $params
|
||||
* Параметры запроса
|
||||
* @return array
|
||||
*/
|
||||
public function select(string $sql, array $params = []) : array
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
if (!empty($params)) {
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":v".($i+1), $params[$i]);
|
||||
}
|
||||
}
|
||||
$stmt->execute();
|
||||
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса: " . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
} finally {
|
||||
return $array;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Добавление данных в базу
|
||||
* @param string $sql
|
||||
* SQL-запрос
|
||||
* @param array $params
|
||||
* Параметры запроса
|
||||
* @return void
|
||||
*/
|
||||
public function insert(string $sql, array $params)
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
$count = 1;
|
||||
$size = count($params[0]);
|
||||
foreach ($params as $param) {
|
||||
for ($i = $count; $i <= $size; $i++) {
|
||||
$param = array_values($param);
|
||||
$stmt->bindParam(":v$i", $param[$i-$count]);
|
||||
}
|
||||
$count += count($param);
|
||||
$size += count($param);
|
||||
}
|
||||
$stmt->execute();
|
||||
$this->_logger->log("Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||
if ($e->getCode() === "HY000") {
|
||||
$yaml = Yaml::dump($params);
|
||||
file_put_contents('not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Обновление данных в базе
|
||||
* @param string $sql
|
||||
* SQL-запрос
|
||||
* @param array $params
|
||||
* Параметры запроса
|
||||
* @return void
|
||||
*/
|
||||
public function update(string $sql, array $params)
|
||||
{
|
||||
try {
|
||||
$stmt = $this->_pdo->prepare($sql);
|
||||
$count = count($params);
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
$stmt->bindParam(":v".($i+1), $params[$i]);
|
||||
}
|
||||
// $stmt->execute();
|
||||
$this->_logger->log("Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
$this->_logger->log($message);
|
||||
}
|
||||
}
|
||||
}
|
62
ContingentParser/Database/DatabaseConfig.php
Normal file
62
ContingentParser/Database/DatabaseConfig.php
Normal file
@ -0,0 +1,62 @@
|
||||
<?php
|
||||
namespace ContingentParser\Database;
|
||||
|
||||
final class DatabaseConfig
|
||||
{
|
||||
private string $_driver;
|
||||
private string $_host;
|
||||
private string $_dbname;
|
||||
private string $_port;
|
||||
private string $_charset;
|
||||
private string $_username;
|
||||
private string $_password;
|
||||
|
||||
public function __construct(string $db)
|
||||
{
|
||||
$config = $this->getDataEnv($db);
|
||||
|
||||
$this->_driver = $config['DB_DRIVER'];
|
||||
$this->_host = $config['DB_HOST'];
|
||||
$this->_dbname = $config['DB_NAME'];
|
||||
$this->_port = $config['DB_PORT'];
|
||||
$this->_charset = $config["DB_CHARSET"];
|
||||
$this->_username = $config['DB_USERNAME'];
|
||||
$this->_password = $config['DB_PASSWORD'];
|
||||
}
|
||||
|
||||
private function getDataEnv(string $db) : array
|
||||
{
|
||||
$envVars = parse_ini_file('.env', true);
|
||||
$db = strtoupper($db);
|
||||
$config = [];
|
||||
foreach ($envVars as $dbname => $dbconfig) {
|
||||
if ($dbname == $db) {
|
||||
$config = $dbconfig;
|
||||
}
|
||||
}
|
||||
return $config;
|
||||
}
|
||||
|
||||
public function getDBName(): string
|
||||
{
|
||||
return $this->_dbname;
|
||||
}
|
||||
|
||||
public function getDsn() : string
|
||||
{
|
||||
return $this->_driver.":host=".$this->_host
|
||||
.";dbname=".$this->_dbname
|
||||
.";charset=".$this->_charset
|
||||
.";port=".$this->_port;
|
||||
}
|
||||
|
||||
public function getUsername() : string
|
||||
{
|
||||
return $this->_username;
|
||||
}
|
||||
|
||||
public function getPassword() : string
|
||||
{
|
||||
return $this->_password;
|
||||
}
|
||||
}
|
214
ContingentParser/Database/DatabaseFacade.php
Normal file
214
ContingentParser/Database/DatabaseFacade.php
Normal file
@ -0,0 +1,214 @@
|
||||
<?php
|
||||
namespace ContingentParser\Database;
|
||||
|
||||
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
|
||||
|
||||
class DatabaseFacade
|
||||
{
|
||||
private GenericBuilder $builder;
|
||||
private Database $opendata;
|
||||
private Database $niimko;
|
||||
private array $specialties;
|
||||
private array $universities;
|
||||
/**
|
||||
* Конструктор
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->niimko = new Database(new DatabaseConfig('niimko'));
|
||||
$this->opendata = new Database(new DatabaseConfig('opendata'));
|
||||
$this->specialties = $this->getSpecialties();
|
||||
$this->universities = $this->getUniversities();
|
||||
}
|
||||
/**
|
||||
* Извлечение URL сайтов из базы данных niimko
|
||||
* @return array
|
||||
*/
|
||||
public function getSitesFromNiimko() : array
|
||||
{
|
||||
/*
|
||||
SELECT kod AS org_id, site FROM niimko.s_vuzes
|
||||
WHERE ootype = 'vuz' AND deleted = 'n' AND fake = 'n'
|
||||
*/
|
||||
$builder = new GenericBuilder();
|
||||
$params = ['vuz', 'n', 'n', 'RU'];
|
||||
$query = $builder->select()
|
||||
->setTable('s_vuzes')
|
||||
->setColumns(['org_id' => 'kod', 'site'])
|
||||
->where('AND')
|
||||
->equals('ootype', 'vuz')
|
||||
->equals('deleted', 'n')
|
||||
->equals('fake', 'n')
|
||||
->equals('country', 'RU')
|
||||
->end();
|
||||
$sql = $builder->write($query);
|
||||
$sites = $this->niimko->select($sql, $params);
|
||||
|
||||
return $sites;
|
||||
}
|
||||
/**
|
||||
* Извлечение сайтов базы данных opendata
|
||||
* из таблицы miccedu_monitoring.
|
||||
* @param array $params
|
||||
* Сайты, у которых устаревшие URL
|
||||
* @return array
|
||||
*/
|
||||
public function getSitesFromMiccedu(array $params) : array
|
||||
{
|
||||
/*
|
||||
SELECT site, vuzkod AS org_id FROM opendata.miccedu_monitoring
|
||||
WHERE year = 2023 AND (vuzkod = :val1 OR vuzkod = :val2 OR ...)
|
||||
*/
|
||||
$builder = new GenericBuilder();
|
||||
$year = 2023;
|
||||
foreach ($params as $key => $org) {
|
||||
$params[$key] = (int)$org['org_id'];
|
||||
}
|
||||
$query = $builder->select()
|
||||
->setTable('miccedu_monitoring')
|
||||
->setColumns(['org_id' => 'vuzkod','site'])
|
||||
->where('AND')
|
||||
->equals('year', $year)
|
||||
->subWhere('OR');
|
||||
foreach ($params as $orgId) {
|
||||
$query->equals('vuzkod', $orgId);
|
||||
}
|
||||
$query = $query->end();
|
||||
$sql = $builder->write($query);
|
||||
array_unshift($params, $year);
|
||||
$sites = $this->opendata->select($sql, $params);
|
||||
|
||||
return $sites;
|
||||
}
|
||||
/**
|
||||
* Внесение данных численности обучающихся в базу данных opendata
|
||||
* @param array $contingent
|
||||
* Массив записей численности по специальностям
|
||||
* @return void
|
||||
*/
|
||||
public function insertContingent(array $contingent) : void
|
||||
{
|
||||
/*
|
||||
INSERT INTO sveden_education_contingent
|
||||
(org_id, spec_id, spec_code, spec_name, edu_level, edu_forms, contingent)
|
||||
VALUES
|
||||
(:v1, :v2, :v3, :v4, :v5, :v6, :v7)
|
||||
...
|
||||
*/
|
||||
$builder = new GenericBuilder();
|
||||
$countAtributes = count($contingent[0]);
|
||||
$size = $countAtributes * (count($contingent) - 1);
|
||||
$query = $builder->insert()
|
||||
->setTable('sveden_education_contingent')
|
||||
->setValues(
|
||||
$contingent[0]
|
||||
);
|
||||
$sql = $builder->write($query);
|
||||
for ($i = $countAtributes; $i <= $size;) {
|
||||
$sql .= " (:v".(++$i).", :v".(++$i).", :v".(++$i).", :v"
|
||||
.(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).")\n";
|
||||
}
|
||||
$sql = preg_replace('/\)\s*VALUES\s*/', ') VALUES ', $sql);
|
||||
$sql = preg_replace('/\)\s*\(/', '), (', $sql);
|
||||
$this->opendata->insert($sql, $contingent);
|
||||
}
|
||||
/**
|
||||
* Публичное получение специальностей
|
||||
* @return array
|
||||
*/
|
||||
public function specialties() : array
|
||||
{
|
||||
return $this->specialties ? $this->universities : [];
|
||||
}
|
||||
/**
|
||||
* Публичное получение id вузов, занесенных в базу opendata
|
||||
* @return array
|
||||
*/
|
||||
public function universities() : array
|
||||
{
|
||||
return $this->universities ? $this->specialties : [];
|
||||
}
|
||||
/**
|
||||
* Извлечение кодов специальности из базы данных niimko
|
||||
* @return array
|
||||
*/
|
||||
private function getSpecialties() : array
|
||||
{
|
||||
/*
|
||||
SELECT id AS spec_id, kod AS spec_code FROM niimko.s_specs
|
||||
WHERE oopkodes = 'gos3p'
|
||||
*/
|
||||
$builder = new GenericBuilder();
|
||||
$params = ['gos3p'];
|
||||
$query = $builder->select()
|
||||
->setTable('s_specs')
|
||||
->setColumns(['spec_id' =>'id', 'spec_code' => 'kod'])
|
||||
->where()
|
||||
->equals('oopkodes','gos3p')
|
||||
->end();
|
||||
$sql = $builder->write($query);
|
||||
$specialties = $this->niimko->select($sql, $params);
|
||||
|
||||
return $specialties;
|
||||
}
|
||||
/**
|
||||
* Извлечение id вузов, занесенных в базу opendata
|
||||
* @return array
|
||||
*/
|
||||
private function getUniversities() : array
|
||||
{
|
||||
/*
|
||||
SELECT DISTINCT org_id FROM sveden_education_contingent
|
||||
*/
|
||||
$builder = new GenericBuilder();
|
||||
$query = $builder->select()
|
||||
->setTable('sveden_education_contingent')
|
||||
->setColumns(['org_id'])
|
||||
->where()
|
||||
->greaterThan('org_id', 0)
|
||||
->end();
|
||||
$sql = $builder->write($query);
|
||||
$sql = preg_replace("/ WHERE.*/", '', $sql);
|
||||
$sql = preg_replace('/SELECT/', 'SELECT DISTINCT', $sql);
|
||||
$universities = $this->opendata->select($sql);
|
||||
|
||||
return array_column($universities, 'org_id');
|
||||
}
|
||||
/**
|
||||
* Обновление сайтов в базе данных niimko
|
||||
* @param array $params
|
||||
* Массив [['org_id' => val1, 'site' => val1,],...]
|
||||
* @return void
|
||||
*/
|
||||
public function updateSitesOpendata(array $params) : void
|
||||
{
|
||||
/*
|
||||
UPDATE niimko.s_vuzes
|
||||
SET site = CASE kod
|
||||
WHEN :v1 THEN :v2
|
||||
WHEN :v3 THEN :v4
|
||||
...
|
||||
ELSE kod
|
||||
END
|
||||
WHERE kod IN (:v1, :v2...)
|
||||
*/
|
||||
$count = count($params);
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
if ($i % 2 == 0) {
|
||||
$params[] = $params[$i];
|
||||
}
|
||||
}
|
||||
$sql = "UPDATE niimko.s_vuzes\nSET site = CASE kod\n";
|
||||
|
||||
for ($i = 0; $i < $count;) {
|
||||
$sql .= "WHEN :v".++$i." THEN :v".++$i."\n";
|
||||
}
|
||||
$sql .= "ELSE kod\nEND\nWHERE kod in(";
|
||||
for ($i = $count++; $i < count($params);) {
|
||||
$sql .= ":v".++$i.",\n";
|
||||
}
|
||||
$sql = rtrim($sql,",\n") .")\n";
|
||||
|
||||
$this->opendata->update($sql, $params);
|
||||
}
|
||||
}
|
105
ContingentParser/Facade.php
Normal file
105
ContingentParser/Facade.php
Normal file
@ -0,0 +1,105 @@
|
||||
<?php
|
||||
namespace ContingentParser;
|
||||
|
||||
use ContingentParser\Database\DatabaseFacade;
|
||||
use ContingentParser\Http\HttpClientFacade;
|
||||
use ContingentParser\Http\UrlBuilder;
|
||||
use ContingentParser\Logger\HtmlLogger;
|
||||
use ContingentParser\Parser\ContingentFacade;
|
||||
|
||||
class Facade
|
||||
{
|
||||
private DatabaseFacade $databaseFacade;
|
||||
private HttpClientFacade $httpClientFacade;
|
||||
private ContingentFacade $contingentFacade;
|
||||
private UrlBuilder $urlBuilder;
|
||||
private HtmlLogger $htmlLogger;
|
||||
/**
|
||||
* Конструктор
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->databaseFacade = new DatabaseFacade();
|
||||
$this->httpClientFacade = new HttpClientFacade();
|
||||
$this->urlBuilder = new UrlBuilder();
|
||||
$this->contingentFacade = new ContingentFacade();
|
||||
$this->htmlLogger = new HtmlLogger('log/html.log');
|
||||
}
|
||||
/**
|
||||
* Получить массив сайтов
|
||||
* @param array $params
|
||||
* Массив сайтов, у которых нужны обновиленные URL
|
||||
* @return array
|
||||
*/
|
||||
public function getSites(array $params = []) : array
|
||||
{
|
||||
if (empty($params)) {
|
||||
return $this->databaseFacade->getSitesFromNiimko();
|
||||
} else {
|
||||
return $this->databaseFacade->getSitesFromMiccedu($params);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Cобирает из микроразметки данные таблицы
|
||||
* "Информация о численности обучающихся" в разделе "Образование"
|
||||
* @param array $site
|
||||
* Сайт содержащий id организации и URL
|
||||
* @return void
|
||||
*/
|
||||
public function collectDataFromContingent(array $site) : void
|
||||
{
|
||||
list('org_id' => $orgId, 'site' => $url) = $site;
|
||||
// Нет URL сайта вуза
|
||||
if (empty($site)) {
|
||||
// $httpLogger->log($orgId);
|
||||
return;
|
||||
}
|
||||
// Уже в базе
|
||||
if (in_array($orgId, $this->databaseFacade->universities())) {
|
||||
return;
|
||||
}
|
||||
$url = $this->urlBuilder->build($url);
|
||||
print(implode(' ', $site). "\033[0m" . PHP_EOL);
|
||||
$html = $this->httpClientFacade->processEducationContingentSites(
|
||||
$url,
|
||||
$site
|
||||
);
|
||||
|
||||
$contingent = $this->contingentFacade->getContingent(
|
||||
$html,
|
||||
$this->databaseFacade->specialties(),
|
||||
$orgId
|
||||
);
|
||||
|
||||
if (empty($contingent)) {
|
||||
print("\033[91mNo result\033[0m\n");
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
} else {
|
||||
if ($this->contingentFacade->isValidContingent($contingent)) {
|
||||
// Заносим в базу
|
||||
print_r($contingent);
|
||||
// $this->databaseFacade->insertContingent($contingent);
|
||||
} else {
|
||||
$this->htmlLogger->log("$orgId $url");
|
||||
print("\033[91mNo result\033[0m\n");
|
||||
}
|
||||
}
|
||||
|
||||
unset($contingent);
|
||||
print(PHP_EOL);
|
||||
}
|
||||
|
||||
public function getExclusionSites(string $path) : array
|
||||
{
|
||||
$logs = file($path);
|
||||
$result = [];
|
||||
foreach ($logs as $log) {
|
||||
$data = explode(' ', $log);
|
||||
$result[] = [
|
||||
'org_id' => $data[2],
|
||||
'site' => $data[3] ? $data[3] : ''
|
||||
];
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
99
ContingentParser/Http/CurlHelper.php
Normal file
99
ContingentParser/Http/CurlHelper.php
Normal file
@ -0,0 +1,99 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use ContingentParser\Logger\HttpLogger;
|
||||
use CurlHandle;
|
||||
/**
|
||||
* Summary of CurlHelper
|
||||
*/
|
||||
final class CurlHelper
|
||||
{
|
||||
private CurlHandle|bool $curl;
|
||||
private string $url;
|
||||
private array $site;
|
||||
/**
|
||||
* Коснтруктор
|
||||
* Инициализация сессии
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* Идентификатор организации и базовый URL сайта
|
||||
*/
|
||||
public function __construct(string $url, array $site)
|
||||
{
|
||||
$this->url = $url;
|
||||
$this->site = $site;
|
||||
|
||||
$this->curl = curl_init();
|
||||
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($this->curl, CURLOPT_HEADER, true);
|
||||
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($this->curl, CURLOPT_USERAGENT,
|
||||
'Mozilla/5.0 (X11; Linux x86_64) '
|
||||
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'
|
||||
);
|
||||
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
|
||||
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90);
|
||||
}
|
||||
/**
|
||||
* Прекратить сессии
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
curl_close($this->curl);
|
||||
}
|
||||
/**
|
||||
* Получить html-разметку
|
||||
* @return string
|
||||
*/
|
||||
public function getContent() : string
|
||||
{
|
||||
curl_setopt($this->curl, CURLOPT_URL, $this->url);
|
||||
$html = curl_exec($this->curl);
|
||||
if ($this->checkLocation($this->url, $html)) {
|
||||
$html = $this->getContent();
|
||||
}
|
||||
$this->reportError();
|
||||
return $html;
|
||||
}
|
||||
/**
|
||||
* Summary of checkLocation
|
||||
* @param string $html
|
||||
* @return bool
|
||||
*/
|
||||
private function checkLocation(string &$url, string $html) : bool
|
||||
{
|
||||
preg_match('/location:(.*?)\n/i', $html, $matches);
|
||||
if (empty($matches)) return false;
|
||||
$target = $matches[1];
|
||||
$target = preg_replace("/[^a-z0-9\-:.\/,]/iu", '', $target);
|
||||
$url = $target ? $target : $url;
|
||||
|
||||
return $target ? true : false;
|
||||
}
|
||||
/**
|
||||
* Сообщить об ошибке
|
||||
* @return void
|
||||
*/
|
||||
private function reportError() : void
|
||||
{
|
||||
$httpLogger = new HttpLogger('log/http-curl.log');
|
||||
|
||||
$httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
|
||||
|
||||
if ($httpCode != 200 && $httpCode != 0) {
|
||||
print("\033[91mHTTP-code: $httpCode\033[0m\n");
|
||||
$message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')';
|
||||
$httpLogger->log($message, $httpCode);
|
||||
} else if ($httpCode == 0) {
|
||||
$errno = curl_errno($this->curl);
|
||||
$message = implode(' ', $this->site);
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
$httpLogger->log($message);
|
||||
} else {
|
||||
print("\033[94mHTTP-code: $httpCode\033[0m\n");
|
||||
}
|
||||
}
|
||||
}
|
110
ContingentParser/Http/HttpClientFacade.php
Normal file
110
ContingentParser/Http/HttpClientFacade.php
Normal file
@ -0,0 +1,110 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
||||
use GuzzleHttp\TransferStats;
|
||||
|
||||
final class HttpClientFacade
|
||||
{
|
||||
private Client $client;
|
||||
private array $config;
|
||||
|
||||
public function __construct() {}
|
||||
/**
|
||||
* Обработка численности обучающихся
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* Идентификатор организации, и базовый URL
|
||||
* @return string
|
||||
*/
|
||||
public function processEducationContingentSites(
|
||||
string $url,
|
||||
array $site
|
||||
) : string {
|
||||
try {
|
||||
$client = $this->createClient($url);
|
||||
// Запрос по базовому uri
|
||||
$response = $client->get('', [
|
||||
'on_stats' => function (TransferStats $stats) use (&$redirectUrl) {
|
||||
$redirectUrl = $stats->getEffectiveUri();
|
||||
}
|
||||
]);
|
||||
|
||||
print("Redirect $url -> $redirectUrl" . PHP_EOL);
|
||||
$url .= substr($url, -1) == '/' ? '':'/';
|
||||
$url .= "sveden/education/";
|
||||
print("Parsing for $url" . PHP_EOL);
|
||||
|
||||
$response = $client->get($url);
|
||||
$httpCode = $response->getStatusCode();
|
||||
print("\033[94mHTTP-code: $httpCode\033[0m\n");
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
} catch (ClientException
|
||||
| RequestException
|
||||
| ConnectException
|
||||
| ServerException
|
||||
| MalformedUriException $e
|
||||
) {
|
||||
print("\033[91mHTTP-code: ". $e->getCode(). "\033[0m\n");
|
||||
$html = $this->handleException($url, $site);
|
||||
} finally {
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Обработка исключения
|
||||
* Повторная попытка спомощью CurlHelper
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* @return string
|
||||
*/
|
||||
private function handleException(string $url, array $site) : string
|
||||
{
|
||||
$curlHelper = new CurlHelper($url, $site);
|
||||
return $curlHelper->getContent();
|
||||
}
|
||||
/**
|
||||
* Создать клиента с базовым URL
|
||||
* @param string $url
|
||||
* @return \GuzzleHttp\Client
|
||||
*/
|
||||
private function createClient(string $url) : Client
|
||||
{
|
||||
$this->config = $this->config() + ["base_uri" => $url];
|
||||
return new Client($this->config);
|
||||
}
|
||||
/**
|
||||
* Конфигурация клиента
|
||||
* @return array
|
||||
*/
|
||||
private function config() : array
|
||||
{
|
||||
return [
|
||||
'force_ip_resolve' => 'v4',
|
||||
'debug' => fopen("log/debug-http.log", "w"),
|
||||
'allow_directs' => [
|
||||
'max' => 5,
|
||||
'strict' => true,
|
||||
'referer' => true,
|
||||
'protocols' => ['http', 'https'],
|
||||
'track_redirects' => true
|
||||
],
|
||||
'connect_timeout' => 300.0,
|
||||
'verify' => false,
|
||||
'headers' => [
|
||||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) '
|
||||
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
'Content-Type' => 'text/html;charset=utf-8'
|
||||
]
|
||||
];
|
||||
}
|
||||
}
|
30
ContingentParser/Http/UrlBuilder.php
Normal file
30
ContingentParser/Http/UrlBuilder.php
Normal file
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
class UrlBuilder
|
||||
{
|
||||
public function __construct() {}
|
||||
/**
|
||||
* Строит валидный URL сайта
|
||||
* @param string $url
|
||||
* Изначальный URL
|
||||
* @return string
|
||||
*/
|
||||
public function build(string $url) : string
|
||||
{
|
||||
// Строит -> https://<base_uri>
|
||||
$url = trim(strtolower($url));
|
||||
$url = preg_replace('/\s+/', '', $url);
|
||||
$url = str_replace("www/", "www.", $url);
|
||||
$url = str_replace("http:\\\\", "", $url);
|
||||
if (!preg_match('#^https?://#', $url)) {
|
||||
$url = "http://$url";
|
||||
}
|
||||
// $url = str_replace("http://", "https://", $url);
|
||||
$arr = parse_url($url);
|
||||
$url = $arr['scheme'] . '://' . $arr['host'] . '/';
|
||||
// $url = str_replace("www.", "", $url);
|
||||
$url = str_replace("_", "/", $url);
|
||||
return trim($url);
|
||||
}
|
||||
}
|
12
ContingentParser/Logger/DatabaseLogger.php
Normal file
12
ContingentParser/Logger/DatabaseLogger.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
namespace ContingentParser\Logger;
|
||||
|
||||
final class DatabaseLogger extends Logger
|
||||
{
|
||||
public function log(string $message) : void
|
||||
{
|
||||
$date = date('Y-m-d H:i:s');
|
||||
$logMessage = "[$date] $message\n";
|
||||
file_put_contents($this->_path, $logMessage, FILE_APPEND);
|
||||
}
|
||||
}
|
12
ContingentParser/Logger/HtmlLogger.php
Normal file
12
ContingentParser/Logger/HtmlLogger.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
namespace ContingentParser\Logger;
|
||||
|
||||
class HtmlLogger extends Logger
|
||||
{
|
||||
public function log(string $message) : void
|
||||
{
|
||||
$date = date('Y-m-d H:i:s');
|
||||
$logMessage = "[$date] $message\n";
|
||||
file_put_contents($this->_path, $logMessage, FILE_APPEND);
|
||||
}
|
||||
}
|
70
ContingentParser/Logger/HttpLogger.php
Normal file
70
ContingentParser/Logger/HttpLogger.php
Normal file
@ -0,0 +1,70 @@
|
||||
<?php
|
||||
namespace ContingentParser\Logger;
|
||||
|
||||
final class HttpLogger extends Logger
|
||||
{
|
||||
private const ARR_HTTP_STATUS_CODE = array(
|
||||
100 => 'Continue',
|
||||
101 => 'Switching Protocols',
|
||||
200 => 'OK',
|
||||
201 => 'Created',
|
||||
202 => 'Accepted',
|
||||
203 => 'Non-Authoritative Information',
|
||||
204 => 'No Content',
|
||||
205 => 'Reset Content',
|
||||
206 => 'Partial Content',
|
||||
300 => 'Multiple Choices',
|
||||
301 => 'Moved Permanently',
|
||||
302 => 'Found',
|
||||
303 => 'See Other',
|
||||
304 => 'Not Modified',
|
||||
305 => 'Use Proxy',
|
||||
306 => 'Switch Proxy',
|
||||
307 => 'Temporary Redirect',
|
||||
400 => 'Bad Request',
|
||||
401 => 'Unauthorized',
|
||||
402 => 'Payment Required',
|
||||
403 => 'Forbidden',
|
||||
404 => 'Not Found',
|
||||
405 => 'Method Not Allowed',
|
||||
406 => 'Not Acceptable',
|
||||
407 => 'Proxy Authentication Required',
|
||||
408 => 'Request Timeout',
|
||||
409 => 'Conflict',
|
||||
410 => 'Gone',
|
||||
411 => 'Length Required',
|
||||
412 => 'Precondition Failed',
|
||||
413 => 'Payload Too Large',
|
||||
414 => 'URI Too Long',
|
||||
415 => 'Unsupported Media Type',
|
||||
416 => 'Range Not Satisfiable',
|
||||
417 => 'Expectation Failed',
|
||||
418 => 'I\'m a teapot',
|
||||
429 => 'Too Many Requests',
|
||||
451 => 'Unavailable For Legal Reasons',
|
||||
500 => 'Internal Server Error',
|
||||
501 => 'Not Implemented',
|
||||
502 => 'Bad Gateway',
|
||||
503 => 'Service Unavailable',
|
||||
504 => 'Gateway Timeout',
|
||||
505 => 'HTTP Version Not Supported',
|
||||
506 => 'Variant Also Negotiates',
|
||||
507 => 'Insufficient Storage',
|
||||
508 => 'Loop Detected',
|
||||
509 => 'Bandwidth Limit Exceeded',
|
||||
510 => 'Not Extended',
|
||||
511 => 'Network Authentication Required'
|
||||
);
|
||||
public function log(string $message, int $httpCode = null) : void
|
||||
{
|
||||
$date = date('Y-m-d H:i:s');
|
||||
if (empty($httpCode)) {
|
||||
$logMessage = "[$date] $message\n";
|
||||
file_put_contents($this->_path, $logMessage, FILE_APPEND);
|
||||
} else {
|
||||
$logMessage = "[$date] $message "
|
||||
.self::ARR_HTTP_STATUS_CODE[$httpCode]."\n";
|
||||
file_put_contents($this->_path, $logMessage, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
}
|
12
ContingentParser/Logger/Logger.php
Normal file
12
ContingentParser/Logger/Logger.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
namespace ContingentParser\Logger;
|
||||
|
||||
abstract class Logger
|
||||
{
|
||||
protected string $_path;
|
||||
|
||||
public function __construct(string $path)
|
||||
{
|
||||
$this->_path = $path;
|
||||
}
|
||||
}
|
71
ContingentParser/Parser/ContingentFacade.php
Normal file
71
ContingentParser/Parser/ContingentFacade.php
Normal file
@ -0,0 +1,71 @@
|
||||
<?php
|
||||
namespace ContingentParser\Parser;
|
||||
|
||||
class ContingentFacade
|
||||
{
|
||||
/**
|
||||
* Получить данные о численности
|
||||
* @param string $html
|
||||
* Разметка сайта вуза
|
||||
* @param mixed $specialties
|
||||
* Массив специальностей
|
||||
* @param int $orgId
|
||||
* Идентификатор организации
|
||||
* @return array
|
||||
*/
|
||||
public function getContingent(
|
||||
string $html,
|
||||
array $specialties,
|
||||
int $orgId
|
||||
) : array {
|
||||
$parser = new ContingentParser($html);
|
||||
$contingent = $parser->getDataTable();
|
||||
$this->addSpecId($contingent, $specialties);
|
||||
$this->addOrgId($contingent, $orgId);
|
||||
|
||||
return $contingent;
|
||||
}
|
||||
/**
|
||||
* Проверка на валидность записи численнести
|
||||
* @param array $contingent
|
||||
* Массив численности по специальностям
|
||||
* @return bool
|
||||
*/
|
||||
public function isValidContingent(array $contingent) : bool
|
||||
{
|
||||
$count = 0;
|
||||
foreach ($contingent as $value) {
|
||||
$count += $value['contingent'];
|
||||
}
|
||||
return $count ? true : false;
|
||||
}
|
||||
/**
|
||||
* Добавить идентификатор специальности в запись численности
|
||||
* @param array $contingent
|
||||
* Массив численности по специальностям
|
||||
* @param array $specialties
|
||||
* Массив специальностей
|
||||
* @return void
|
||||
*/
|
||||
private function addSpecId(array &$contingent, array $specialties) : void
|
||||
{
|
||||
$specIdMap = array_column($specialties, 'spec_id', 'spec_code');
|
||||
foreach ($contingent as $key => $con) {
|
||||
$contingent[$key]['spec_id'] = $specIdMap[$con['spec_code']] ?? null;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Добавить идентификатор организации в запись численности
|
||||
* @param array $contingent
|
||||
* Массив численности по специальностям
|
||||
* @param int $orgId
|
||||
* Идентифиактор организации
|
||||
* @return void
|
||||
*/
|
||||
private function addOrgId(array &$contingent, int $orgId): void
|
||||
{
|
||||
foreach ($contingent as &$con) {
|
||||
$con['org_id'] = $orgId;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,71 +1,77 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
namespace ContingentParser\Parser;
|
||||
|
||||
use App\Library\ContingentRow;
|
||||
use DOMDocument;
|
||||
use DOMXPath;
|
||||
|
||||
class ContingentParser
|
||||
{
|
||||
private \DOMXPath $xpath;
|
||||
private string $template;
|
||||
private ?DOMXPath $xpath;
|
||||
private const TEMPLATE = '//tr[@itemprop="eduChislen"]//';
|
||||
private const ENCODING = "UTF-8";
|
||||
private const FIELDS = [
|
||||
"eduCode" => "td",
|
||||
"eduName" => "td",
|
||||
"eduLevel" => "td",
|
||||
"eduForm" => "td",
|
||||
// "numberBF" => "th",
|
||||
// "numberBFF" => "th",
|
||||
// "numberBR" => "th",
|
||||
// "numberBRF" => "th",
|
||||
// "numberBM" => "th",
|
||||
// "numberBMF" => "th",
|
||||
// "numberP" => "th",
|
||||
// "numberPF" => "th",
|
||||
"numberAll" => ["th", "td"]
|
||||
];
|
||||
|
||||
public function __construct(string $html, string $template)
|
||||
public function __construct(string $html)
|
||||
{
|
||||
libxml_use_internal_errors(true);
|
||||
$dom = new \DOMDocument(
|
||||
// encoding: "UTF-8"
|
||||
$dom = new DOMDocument(
|
||||
encoding: self::ENCODING
|
||||
);
|
||||
if (empty($html)) {
|
||||
$this->xpath = null;
|
||||
} else {
|
||||
$this->setEncoding($html);
|
||||
$dom->loadHTML($html);
|
||||
$this->xpath = new DOMXPath($dom);
|
||||
}
|
||||
}
|
||||
|
||||
private function setEncoding(string &$html) : void
|
||||
{
|
||||
$encoding = mb_detect_encoding($html, 'UTF-8, windows-1251');
|
||||
if ($encoding != "UTF-8") {
|
||||
if ($encoding != self::ENCODING) {
|
||||
$html = mb_convert_encoding(
|
||||
$html,
|
||||
'UTF-8',
|
||||
self::ENCODING,
|
||||
$encoding
|
||||
);
|
||||
$html = str_replace('windows-1251','utf-8', $html);
|
||||
$html = str_replace('windows-1251',self::ENCODING, $html);
|
||||
}
|
||||
$dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8'));
|
||||
|
||||
$this->xpath = new \DOMXPath($dom);
|
||||
$this->template = $template;
|
||||
$html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8');
|
||||
}
|
||||
|
||||
private function parse(): array
|
||||
private function parse() : array
|
||||
{
|
||||
$data = array();
|
||||
$data = [];
|
||||
foreach (self::FIELDS as $field => $tag) {
|
||||
if (!is_array($tag)) {
|
||||
$data[$field] = $this->xpath->query($this->template . $tag . "[@itemprop=\"$field\"]");
|
||||
$data[$field] = $this->xpath->query(
|
||||
self::TEMPLATE . $tag . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
} else {
|
||||
$th = $this->xpath->query($this->template . $tag[0] . "[@itemprop=\"$field\"]");
|
||||
$td = $this->xpath->query($this->template . $tag[1] . "[@itemprop=\"$field\"]");
|
||||
$th = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$td = $this->xpath->query(
|
||||
self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]"
|
||||
);
|
||||
$data[$field] = $th->length > $td->length ? $th : $td;
|
||||
}
|
||||
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getDataTable() : array
|
||||
{
|
||||
if (empty($this->xpath)) return [];
|
||||
|
||||
$data = $this->parse();
|
||||
$records = array();
|
||||
// var_dump($data);
|
||||
// exit(0);
|
||||
$records = [];
|
||||
if ($data == null) return [];
|
||||
|
||||
$equal = $data['eduName']->length;
|
@ -1,6 +1,6 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
// Специальность, направление подготовки
|
||||
namespace ContingentParser\Parser;
|
||||
|
||||
class ContingentRow
|
||||
{
|
||||
public function __construct(
|
@ -1,37 +0,0 @@
|
||||
<?php
|
||||
use App\Library\ContingentManager;
|
||||
use App\Library\Database;
|
||||
use App\Library\DatabaseConfig;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
// При ошибке в запросе к БД. Распарсингованные данные
|
||||
// сохраняются в yaml-файле. Скрипт парсит этот файл и заносит в БД
|
||||
require_once "vendor/autoload.php";
|
||||
|
||||
function array_depth(array $array)
|
||||
{
|
||||
$max_depth = 1;
|
||||
|
||||
foreach ($array as $value) {
|
||||
if (is_array($value)) {
|
||||
$depth = array_depth($value) + 1;
|
||||
|
||||
if ($depth > $max_depth) {
|
||||
$max_depth = $depth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $max_depth;
|
||||
}
|
||||
|
||||
$data = Yaml::parse(file_get_contents(__DIR__ . '/not-recorded-in-db.yaml'));
|
||||
$db = new Database(new DatabaseConfig('opendata'));
|
||||
// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
|
||||
if(array_depth($data) == 2){
|
||||
ContingentManager::getInstance()->insertContingent($db, $data);
|
||||
} else {
|
||||
foreach ($data as $value) {
|
||||
ContingentManager::getInstance()->insertContingent($db, $value);
|
||||
}
|
||||
}
|
215
app/app.php
215
app/app.php
@ -1,215 +0,0 @@
|
||||
<?php
|
||||
use App\Library\ContingentManager;
|
||||
use App\Library\DatabaseConfig;
|
||||
use App\Library\Logger;
|
||||
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
||||
use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use App\Library\ContingentParser;
|
||||
use App\Library\Database;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\TransferStats;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
|
||||
$pathLogErrorHtml = __DIR__.'/../log/'. date('Y-m-d') . '/error-html.log';
|
||||
$pathLogErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http-curl.log';
|
||||
$pathErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http.log';
|
||||
|
||||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||
|
||||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||
// $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
|
||||
// $exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('select-html-error.log');
|
||||
// $exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
|
||||
|
||||
// print_r($exceptionsOrgHttpCurl);
|
||||
|
||||
// echo count($exceptionsOrgHttpCurl) . " - http-error sites" . PHP_EOL;
|
||||
|
||||
// $start = 794;
|
||||
// for ($i = $start; $i < count($sites); $i++) {
|
||||
// // Нет URL сайта вуза
|
||||
// if (empty($sites[$i]['site'])) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
// continue;
|
||||
// }
|
||||
// // Уже в базе
|
||||
// if (in_array($sites[$i]['org_id'], $orgs)) {
|
||||
// continue;
|
||||
// }
|
||||
// // С ошибками разметки игнорируем
|
||||
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
// continue;
|
||||
// }
|
||||
// // Без ошибок http игнорируем
|
||||
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
// continue;
|
||||
// }
|
||||
// try {
|
||||
// $baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
||||
// $client = new Client([
|
||||
// 'force_ip_resolve' => 'v4',
|
||||
// 'debug' => fopen("debug-http.log", "a"),
|
||||
// 'base_uri' => $baseUri,
|
||||
// 'allow_directs' => [
|
||||
// 'max' => 5,
|
||||
// 'strict' => true,
|
||||
// 'referer' => true,
|
||||
// 'protocols' => ['http', 'https'],
|
||||
// 'track_redirects' => true
|
||||
// ],
|
||||
// 'connect_timeout' => 300.0,
|
||||
// 'verify' => false,
|
||||
// 'headers' => [
|
||||
// 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
// 'Content-Type' => 'text/html;charset=utf-8'
|
||||
// ]
|
||||
// ]);
|
||||
// $response = $client->get('', [
|
||||
// 'on_stats' => function (TransferStats $stats) use (&$url) {
|
||||
// $url = $stats->getEffectiveUri();
|
||||
// }
|
||||
// ]);
|
||||
|
||||
// print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||
// if (substr($url, -1) == '/') {
|
||||
// $url = $url."sveden/education/";
|
||||
// } else {
|
||||
// $url = $url."/sveden/education/";
|
||||
// }
|
||||
// echo $url .PHP_EOL;
|
||||
// $response = $client->get($url, [
|
||||
// 'on_stats' => function (TransferStats $stats) use (&$url) {
|
||||
// $url = $stats->getEffectiveUri();
|
||||
// }
|
||||
// ]);
|
||||
// echo $url . PHP_EOL;
|
||||
|
||||
// $html = $response->getBody()->getContents();
|
||||
// if (empty($html)) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// continue;
|
||||
// }
|
||||
// $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
// $contingent = $parser->getDataTable();
|
||||
|
||||
// // Добавляем поле spec_id по spec_code
|
||||
// ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
|
||||
// // Добавляем поле org_id
|
||||
// ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
// print_r($contingent);
|
||||
// if (empty($contingent)) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// } else {
|
||||
// $set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
// if ($set) {
|
||||
// // Заносим в базу
|
||||
// ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
// } else {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// }
|
||||
// unset($contingent);
|
||||
// }
|
||||
// } catch (ClientException
|
||||
// | RequestException
|
||||
// | ConnectException
|
||||
// | ServerException
|
||||
// | MalformedUriException $e
|
||||
// ) {
|
||||
// $message = implode(' ', $sites[$i]) . " " . $e->getCode() . " " . $e->getMessage();
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
// }
|
||||
// }
|
||||
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
|
||||
// print_r($sites);
|
||||
|
||||
for ($i = 0; $i < count($sites); $i++) {
|
||||
// Нет URL сайта вуза
|
||||
if (empty($sites[$i]['site'])) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
continue;
|
||||
}
|
||||
// Уже в базе
|
||||
if (in_array($sites[$i]['org_id'], $orgs)) {
|
||||
continue;
|
||||
}
|
||||
// С ошибками разметки игнорируем
|
||||
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
// continue;
|
||||
// }
|
||||
// Без ошибок http игнорируем
|
||||
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
// continue;
|
||||
// }
|
||||
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||
$uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']));
|
||||
$uri = str_replace("_","/", $uri);
|
||||
if (substr($uri, -1) == '/') {
|
||||
$uri = $uri."sveden/education/";
|
||||
} else {
|
||||
$uri = $uri."/sveden/education/";
|
||||
}
|
||||
echo $uri . PHP_EOL;
|
||||
$ch = curl_init($uri);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
$html = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
if ($httpCode != 200) {
|
||||
$errno = curl_errno($ch);
|
||||
$message = implode(' ', $sites[$i]);
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
unset($httpCode);
|
||||
continue;
|
||||
}
|
||||
|
||||
curl_close($ch);
|
||||
echo "HTTP-code: " . $httpCode . PHP_EOL;
|
||||
if (empty($html)) continue;
|
||||
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
$contingent = $parser->getDataTable();
|
||||
// Добавляем поле spec_id по spec_code
|
||||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
// Добавляем поле org_id
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
|
||||
|
||||
if (empty($contingent)) {
|
||||
echo "empty". PHP_EOL;
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
} else {
|
||||
print_r($contingent);
|
||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
if ($set) {
|
||||
// Заносим в базу
|
||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
} else {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
}
|
||||
unset($contingent);
|
||||
unset($httpCode);
|
||||
}
|
||||
}
|
@ -1,187 +0,0 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
|
||||
|
||||
final class ContingentManager
|
||||
{
|
||||
private static ?ContingentManager $instance;
|
||||
private ?GenericBuilder $builder;
|
||||
private function __construct()
|
||||
{
|
||||
$this->builder = new GenericBuilder();
|
||||
}
|
||||
|
||||
public static function getInstance() : ContingentManager
|
||||
{
|
||||
self::$instance ??= new self();
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
public function getSites(Database $db): array
|
||||
{
|
||||
// select kod as org_id, site from niimko.s_vuzes
|
||||
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
|
||||
$params = ['vuz', 'n', 'n', 'RU'];
|
||||
$query = $this->builder->select()
|
||||
->setTable('s_vuzes')
|
||||
->setColumns(['org_id' => 'kod', 'site'])
|
||||
->where('AND')
|
||||
->equals('ootype', 'vuz')
|
||||
->equals('deleted', 'n')
|
||||
->equals('fake', 'n')
|
||||
->equals('country', 'RU')
|
||||
->end();
|
||||
$sql = $this->builder->write($query);
|
||||
$sites = $db->selectQuery($sql, $params);
|
||||
|
||||
return $sites;
|
||||
}
|
||||
|
||||
public function getSitesFromMiccedu(Database $db, array $params) : array
|
||||
{
|
||||
// select site, vuzkod as org_id from opendata.miccedu_monitoring
|
||||
// where year = 2023 and (vuzkod = :val1 or vuzkod = :val2 or ...)
|
||||
$year = 2023;
|
||||
foreach ($params as $key => $org) {
|
||||
$params[$key] = (int)$org['org_id'];
|
||||
}
|
||||
$query = $this->builder->select()
|
||||
->setTable('miccedu_monitoring')
|
||||
->setColumns(['org_id' => 'vuzkod','site'])
|
||||
->where('AND')
|
||||
->equals('year', $year)
|
||||
->subWhere('OR');
|
||||
foreach ($params as $orgId) {
|
||||
$query->equals('vuzkod', $orgId);
|
||||
}
|
||||
$query = $query->end();
|
||||
$sql = $this->builder->writeFormatted($query);
|
||||
array_unshift($params, $year);
|
||||
$sites = $db->selectQuery($sql, $params);
|
||||
|
||||
return $sites;
|
||||
}
|
||||
|
||||
public function insertContingent(Database $db, array $contingent) : void
|
||||
{
|
||||
$params = ['spec_code', 'spec_name', 'edu_level', 'edu_forms', 'contingent', 'spec_id', 'org_id'];
|
||||
$sql = "insert into sveden_education_contingent"
|
||||
."(". implode(',', $params) .") values";
|
||||
for ($i = 0; $i < count($contingent); $i++) {
|
||||
$sql .= "(";
|
||||
foreach ($contingent[$i] as $key => $value) {
|
||||
$sql .= ":$key". ($i+1).",";
|
||||
}
|
||||
$sql = substr_replace($sql,"),", -1);
|
||||
}
|
||||
$sql = substr_replace($sql,"", -1);
|
||||
|
||||
$db->insertQuery($sql, $contingent);
|
||||
}
|
||||
|
||||
public function getSpecializations(Database $db) : array
|
||||
{
|
||||
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
|
||||
$params = ['gos3p'];
|
||||
$query = $this->builder->select()
|
||||
->setTable('s_specs')
|
||||
->setColumns(['id', 'kod'])
|
||||
->where()
|
||||
->equals('oopkodes','gos3p')
|
||||
->end();
|
||||
$sql = $this->builder->write($query);
|
||||
$specializations = $db->selectQuery($sql, $params);
|
||||
|
||||
return $specializations;
|
||||
}
|
||||
|
||||
public function getOrgs(Database $db) : array
|
||||
{
|
||||
$sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent';
|
||||
$org = $db->selectQuery($sql);
|
||||
$orgs = [];
|
||||
foreach ($org as $o) {
|
||||
$orgs[] = $o['org_id'];
|
||||
}
|
||||
return $orgs;
|
||||
}
|
||||
|
||||
public function buildBaseUri(string $url): string
|
||||
{
|
||||
// Строит -> https://<base_uri>
|
||||
$url = str_replace("www/", "www.", $url);
|
||||
if (strpos($url,'https://') === false
|
||||
&& strpos($url,'http://') === false
|
||||
) {
|
||||
$url = "http://$url";
|
||||
}
|
||||
$url = str_replace("https://", "http://", $url);
|
||||
$arr = parse_url($url);
|
||||
$url = $arr['scheme'] .'://'. $arr['host'] . '/';
|
||||
|
||||
// $url = str_replace("www.", "", $url);
|
||||
$url = str_replace("_","/", $url);
|
||||
$url = $url."sveden/education/";
|
||||
|
||||
return trim($url);
|
||||
}
|
||||
|
||||
public function addSpecId(array &$contingent, array $specializations) : void
|
||||
{
|
||||
foreach ($contingent as $key => $con) {
|
||||
$buf = null;
|
||||
$needle = $con['spec_code'];
|
||||
foreach ($specializations as $spec) {
|
||||
if ($needle == $spec['kod']) {
|
||||
$buf = $spec['id'];
|
||||
}
|
||||
}
|
||||
$contingent[$key]['spec_id'] = $buf;
|
||||
unset($buf);
|
||||
}
|
||||
}
|
||||
|
||||
public function addOrgId(array &$contingent, int $orgId) : void
|
||||
{
|
||||
for($i = 0; $i < count($contingent); $i++) {
|
||||
$contingent[$i]['org_id'] = $orgId;
|
||||
}
|
||||
}
|
||||
|
||||
public function checkContingent(array $contingent) : bool
|
||||
{
|
||||
$count = 0;
|
||||
foreach ($contingent as $value) {
|
||||
$count += $value['contingent'];
|
||||
}
|
||||
return $count ? true : false;
|
||||
}
|
||||
|
||||
public function getExceptionsHtml(string $filename) : array
|
||||
{
|
||||
$errorSites = [];
|
||||
$array = file($filename);
|
||||
for ($i = 0; $i < count($array); $i++) {
|
||||
$arr = explode(' ', $array[$i]);
|
||||
$errorSites[] = $arr[2];
|
||||
}
|
||||
return $errorSites;
|
||||
}
|
||||
|
||||
public function getExceptionsHttpCurl(string $filename) : array
|
||||
{
|
||||
$array = file($filename);
|
||||
$orgHttpError = [];
|
||||
foreach ($array as $str) {
|
||||
$data = explode (' ', $str);
|
||||
if (preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/", $data[0])
|
||||
&& $data[3] != PHP_EOL) {
|
||||
// $orgHttpError[] = $data[2];
|
||||
$orgHttpError[] = ['org_id' => $data[2], 'site' => $data[3]];
|
||||
}
|
||||
}
|
||||
// $orgHttpError = array_unique($orgHttpError);
|
||||
ksort($orgHttpError);
|
||||
return $orgHttpError;
|
||||
}
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
|
||||
use App\Library\DatabaseConfig;
|
||||
use App\Library\Logger;
|
||||
use PDOException;
|
||||
use PDO;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
class Database
|
||||
{
|
||||
private PDO $pdo;
|
||||
private static $logFile = 'database.log';
|
||||
private DatabaseConfig $config;
|
||||
public function __construct(DatabaseConfig $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
try {
|
||||
$dsn = $this->config->getDsn();
|
||||
$username = $this->config->getUsername();
|
||||
$password = $this->config->getPassword();
|
||||
$this->pdo = new PDO(
|
||||
$dsn,
|
||||
$username,
|
||||
$password,
|
||||
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
|
||||
);
|
||||
$message = "Подключение к ". $this->config->getDBName() ." успешно!";
|
||||
Logger::log(self::$logFile, $message);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка подключения к ". $this->config->getDBName() .": " . $e->getMessage();
|
||||
Logger::log(self::$logFile, $message);
|
||||
}
|
||||
}
|
||||
|
||||
public function __destruct()
|
||||
{
|
||||
$message = "Подключение к ". $this->config->getDBName() ." прервано!";
|
||||
Logger::log(self::$logFile, $message);
|
||||
}
|
||||
|
||||
public function selectQuery(string $sql, array $params = []) : array
|
||||
{
|
||||
try {
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
// $params = array_values($params);
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":v".$i+1, $params[$i]);
|
||||
}
|
||||
$stmt->execute();
|
||||
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса: " . $e->getMessage();
|
||||
Logger::log(self::$logFile, $message);
|
||||
} finally {
|
||||
return $array;
|
||||
}
|
||||
}
|
||||
|
||||
public function insertQuery(string $sql, array $params)
|
||||
{
|
||||
try {
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":spec_code".$i+1, $params[$i]['spec_code']);
|
||||
$stmt->bindParam(":spec_name".$i+1, $params[$i]['spec_name']);
|
||||
$stmt->bindParam(":edu_forms".$i+1, $params[$i]['edu_forms']);
|
||||
$stmt->bindParam(":edu_level".$i+1, $params[$i]['edu_level']);
|
||||
$stmt->bindParam(":contingent".$i+1, $params[$i]['contingent']);
|
||||
$stmt->bindParam(":org_id".$i+1, $params[$i]['org_id']);
|
||||
$stmt->bindParam(":spec_id".$i+1, $params[$i]['spec_id']);
|
||||
}
|
||||
$stmt->execute();
|
||||
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
Logger::log(self::$logFile, $message);
|
||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||
if ($e->getCode() === "HY000") {
|
||||
$yaml = Yaml::dump($params);
|
||||
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
|
||||
class DatabaseConfig
|
||||
{
|
||||
private string $driver;
|
||||
private string $host;
|
||||
private string $dbname;
|
||||
private string $port;
|
||||
private string $charset;
|
||||
private string $username;
|
||||
private string $password;
|
||||
|
||||
public function __construct(string $db)
|
||||
{
|
||||
$envVars = parse_ini_file('.env', true);
|
||||
$db = strtoupper($db);
|
||||
foreach ($envVars as $dbname => $dbconfig) {
|
||||
if ($dbname == $db) {
|
||||
$config = $dbconfig;
|
||||
}
|
||||
}
|
||||
$this->driver = $config['DB_DRIVER'];
|
||||
$this->host = $config['DB_HOST'];
|
||||
$this->dbname = $config['DB_NAME'];
|
||||
$this->port = $config['DB_PORT'];
|
||||
$this->charset = $config["DB_CHARSET"];
|
||||
$this->username = $config['DB_USERNAME'];
|
||||
$this->password = $config['DB_PASSWORD'];
|
||||
}
|
||||
|
||||
public function getDBName(): string
|
||||
{
|
||||
return $this->dbname;
|
||||
}
|
||||
|
||||
public function getDsn() : string
|
||||
{
|
||||
return $this->driver.":host=".$this->host
|
||||
.";dbname=".$this->dbname
|
||||
.";charset=".$this->charset
|
||||
.";port=".$this->port;
|
||||
}
|
||||
|
||||
public function getUsername() : string
|
||||
{
|
||||
return $this->username;
|
||||
}
|
||||
|
||||
public function getPassword() : string
|
||||
{
|
||||
return $this->password;
|
||||
}
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
|
||||
class Logger
|
||||
{
|
||||
public static function log($path, $message)
|
||||
{
|
||||
$log = date('Y-m-d H:i:s') . ' ' . $message;
|
||||
file_put_contents($path, $log . PHP_EOL, FILE_APPEND);
|
||||
}
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
<?php
|
||||
// Численность обучающихся
|
||||
// по разным формам бюджета
|
||||
class Size
|
||||
{
|
||||
// Всего [обучающихся]
|
||||
private int $all;
|
||||
// Из низ иностранные граждане
|
||||
private int $foreigners;
|
||||
public function __construct() {}
|
||||
|
||||
public function update(
|
||||
int|string $all,
|
||||
int|string $foreigners
|
||||
): void {
|
||||
$this->all = (int)$all;
|
||||
$this->foreigners = (int)$foreigners;
|
||||
}
|
||||
|
||||
public function getData(): array{
|
||||
return [
|
||||
"Всего" => $this->all,
|
||||
"Из них численность обучающихся,
|
||||
являющихся иностранными гражданами" => $this->foreigners
|
||||
];
|
||||
}
|
||||
}
|
@ -1,13 +1,12 @@
|
||||
{
|
||||
"require": {
|
||||
"guzzlehttp/guzzle": "^7.0",
|
||||
"nilportugues/sql-query-builder": "^1.8",
|
||||
"symfony/yaml": "^7.1"
|
||||
"symfony/yaml": "^7.1",
|
||||
"nilportugues/sql-query-builder": "^1.8"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"App\\": "app/",
|
||||
"App\\Library\\": "app/library"
|
||||
"ContingentParser\\": "ContingentParser/"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
2
composer.lock
generated
2
composer.lock
generated
@ -4,7 +4,7 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "171744ca730de2b7a0ee21a1d982266c",
|
||||
"content-hash": "03ebe40648d54e718198609886034355",
|
||||
"packages": [
|
||||
{
|
||||
"name": "guzzlehttp/guzzle",
|
||||
|
@ -1,11 +0,0 @@
|
||||
create table sveden_education_contingent
|
||||
(
|
||||
id serial not null primary key,
|
||||
org_id int null,
|
||||
spec_id int null,
|
||||
spec_code varchar(100) null comment 'Код',
|
||||
spec_name text null comment 'Наименование',
|
||||
edu_level text null comment 'Уровень образования',
|
||||
edu_forms text null comment 'Формы обучения',
|
||||
contingent int not null comment 'Общая численность обучающихся'
|
||||
);
|
10
create_table_sveden_education_contingent.sql
Normal file
10
create_table_sveden_education_contingent.sql
Normal file
@ -0,0 +1,10 @@
|
||||
CREATE TABLE sveden_education_contingent(
|
||||
id SERIAL NOT NULL PRIMARY KEY,
|
||||
org_id INT NULL,
|
||||
spec_id INT NULL,
|
||||
spec_code VARCHAR(100) NULL COMMENT 'Код',
|
||||
spec_name TEXT NULL COMMENT 'Наименование',
|
||||
edu_level TEXT NULL COMMENT 'Уровень образования',
|
||||
edu_forms TEXT NULL COMMENT 'Формы обучения',
|
||||
contingent INT NOT NULL COMMENT 'Общая численность обучающихся'
|
||||
);
|
109
curl-helper.php
109
curl-helper.php
@ -1,109 +0,0 @@
|
||||
<?php
|
||||
use App\Library\ContingentManager;
|
||||
use App\Library\ContingentParser;
|
||||
use App\Library\Database;
|
||||
use App\Library\DatabaseConfig;
|
||||
use App\Library\Logger;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
|
||||
require_once(dirname(__FILE__) ."/vendor/autoload.php");
|
||||
// require_once(dirname(__FILE__) ."/test.php");
|
||||
|
||||
|
||||
$pathLogErrorHttp = __DIR__.'/log/'. date('Y-m-d') . '/error-http-curl.log';
|
||||
$pathLogErrorHtml = __DIR__.'/log/'. date('Y-m-d') . '/error-html.log';
|
||||
|
||||
// $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
|
||||
// print_r($sites);
|
||||
|
||||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||
// $sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
|
||||
$sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-28/error-http-curl.log');
|
||||
$sites = ContingentManager::getInstance()->getSitesFromMiccedu($dbOpendata, $sites);
|
||||
// print_r($sites);
|
||||
for ($i = 0; $i < count($sites); $i++) {
|
||||
// Нет URL сайта вуза
|
||||
if (empty($sites[$i]['site'])) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
continue;
|
||||
}
|
||||
// Уже в базе
|
||||
if (in_array($sites[$i]['org_id'], $orgs)) {
|
||||
continue;
|
||||
}
|
||||
// С ошибками разметки игнорируем
|
||||
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
// continue;
|
||||
// }
|
||||
// Без ошибок http игнорируем
|
||||
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
// continue;
|
||||
// }
|
||||
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||
$uri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
||||
echo $uri . PHP_EOL;
|
||||
$ua = 'Mozilla/5.0 (X11; Linux x86_64) '
|
||||
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36';
|
||||
// $html = get_content($uri);
|
||||
$ch = curl_init($uri);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $ua);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 90);
|
||||
$html = curl_exec($ch);
|
||||
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
if ($httpCode != 200 && $httpCode != 0) {
|
||||
$message = implode(' ', $sites[$i]) . ' ' . $httpCode;
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
unset($httpCode);
|
||||
continue;
|
||||
} else if ($httpCode == 0) {
|
||||
$errno = curl_errno($ch);
|
||||
$message = implode(' ', $sites[$i]);
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
unset($httpCode);
|
||||
continue;
|
||||
}
|
||||
|
||||
curl_close($ch);
|
||||
echo "HTTP-code: " . $httpCode . PHP_EOL;
|
||||
if (empty($html)) continue;
|
||||
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
$contingent = $parser->getDataTable();
|
||||
// Добавляем поле spec_id по spec_code
|
||||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
// Добавляем поле org_id
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
|
||||
|
||||
if (empty($contingent)) {
|
||||
echo "empty". PHP_EOL;
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
} else {
|
||||
print_r($contingent);
|
||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
if ($set) {
|
||||
// Заносим в базу
|
||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
} else {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
}
|
||||
unset($contingent);
|
||||
unset($httpCode);
|
||||
}
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
<?php
|
||||
require_once 'vendor/autoload.php';
|
||||
require_once 'app/app.php';
|
14
main.php
Normal file
14
main.php
Normal file
@ -0,0 +1,14 @@
|
||||
<?php
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
require_once(dirname(__FILE__) ."/vendor/autoload.php");
|
||||
|
||||
use ContingentParser\Facade;
|
||||
|
||||
$facade = new Facade();
|
||||
// $sites = $facade->getSites();
|
||||
$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
|
||||
|
||||
for ($i = 0; $i < count($sites); $i++) {
|
||||
print("\033[92m" . ($i+1). ' ');
|
||||
$facade->collectDataFromContingent($sites[$i]);
|
||||
}
|
15
test.php
Normal file
15
test.php
Normal file
@ -0,0 +1,15 @@
|
||||
<?php
|
||||
use ContingentParser\Database\DatabaseFacade;
|
||||
use ContingentParser\Facade;
|
||||
require_once(dirname(__FILE__) ."/vendor/autoload.php");
|
||||
|
||||
$data = [];
|
||||
$file = file(dirname(__FILE__) ."/update-sities.log");
|
||||
foreach ($file as $line) {
|
||||
$line = explode(" ", $line);
|
||||
$data[] = $line[0];
|
||||
$data[] = trim($line[1]);
|
||||
}
|
||||
|
||||
$databaseFacade = new DatabaseFacade();
|
||||
$databaseFacade->updateSitesOpendata($data);
|
2
vendor/autoload.php
vendored
2
vendor/autoload.php
vendored
@ -22,4 +22,4 @@ if (PHP_VERSION_ID < 50600) {
|
||||
|
||||
require_once __DIR__ . '/composer/autoload_real.php';
|
||||
|
||||
return ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1::getLoader();
|
||||
return ComposerAutoloaderInit03ebe40648d54e718198609886034355::getLoader();
|
||||
|
5
vendor/composer/autoload_psr4.php
vendored
5
vendor/composer/autoload_psr4.php
vendored
@ -8,13 +8,12 @@ $baseDir = dirname($vendorDir);
|
||||
return array(
|
||||
'Symfony\\Polyfill\\Ctype\\' => array($vendorDir . '/symfony/polyfill-ctype'),
|
||||
'Symfony\\Component\\Yaml\\' => array($vendorDir . '/symfony/yaml'),
|
||||
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'),
|
||||
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-message/src', $vendorDir . '/psr/http-factory/src'),
|
||||
'Psr\\Http\\Client\\' => array($vendorDir . '/psr/http-client/src'),
|
||||
'NilPortugues\\Sql\\QueryFormatter\\' => array($vendorDir . '/nilportugues/sql-query-formatter/src'),
|
||||
'NilPortugues\\Sql\\QueryBuilder\\' => array($vendorDir . '/nilportugues/sql-query-builder/src'),
|
||||
'GuzzleHttp\\Psr7\\' => array($vendorDir . '/guzzlehttp/psr7/src'),
|
||||
'GuzzleHttp\\Promise\\' => array($vendorDir . '/guzzlehttp/promises/src'),
|
||||
'GuzzleHttp\\' => array($vendorDir . '/guzzlehttp/guzzle/src'),
|
||||
'App\\Library\\' => array($baseDir . '/app/library'),
|
||||
'App\\' => array($baseDir . '/app'),
|
||||
'ContingentParser\\' => array($baseDir . '/ContingentParser'),
|
||||
);
|
||||
|
10
vendor/composer/autoload_real.php
vendored
10
vendor/composer/autoload_real.php
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
// autoload_real.php @generated by Composer
|
||||
|
||||
class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1
|
||||
class ComposerAutoloaderInit03ebe40648d54e718198609886034355
|
||||
{
|
||||
private static $loader;
|
||||
|
||||
@ -24,16 +24,16 @@ class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1
|
||||
|
||||
require __DIR__ . '/platform_check.php';
|
||||
|
||||
spl_autoload_register(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader'), true, true);
|
||||
spl_autoload_register(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader'), true, true);
|
||||
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(__DIR__));
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader'));
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader'));
|
||||
|
||||
require __DIR__ . '/autoload_static.php';
|
||||
call_user_func(\Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::getInitializer($loader));
|
||||
call_user_func(\Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::getInitializer($loader));
|
||||
|
||||
$loader->register(true);
|
||||
|
||||
$filesToLoad = \Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$files;
|
||||
$filesToLoad = \Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::$files;
|
||||
$requireFile = \Closure::bind(static function ($fileIdentifier, $file) {
|
||||
if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) {
|
||||
$GLOBALS['__composer_autoload_files'][$fileIdentifier] = true;
|
||||
|
25
vendor/composer/autoload_static.php
vendored
25
vendor/composer/autoload_static.php
vendored
@ -4,7 +4,7 @@
|
||||
|
||||
namespace Composer\Autoload;
|
||||
|
||||
class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
|
||||
class ComposerStaticInit03ebe40648d54e718198609886034355
|
||||
{
|
||||
public static $files = array (
|
||||
'7b11c4dc42b3b3023073cb14e519683c' => __DIR__ . '/..' . '/ralouphie/getallheaders/src/getallheaders.php',
|
||||
@ -35,10 +35,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
|
||||
'GuzzleHttp\\Promise\\' => 19,
|
||||
'GuzzleHttp\\' => 11,
|
||||
),
|
||||
'A' =>
|
||||
'C' =>
|
||||
array (
|
||||
'App\\Library\\' => 12,
|
||||
'App\\' => 4,
|
||||
'ContingentParser\\' => 17,
|
||||
),
|
||||
);
|
||||
|
||||
@ -53,8 +52,8 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
|
||||
),
|
||||
'Psr\\Http\\Message\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/psr/http-factory/src',
|
||||
1 => __DIR__ . '/..' . '/psr/http-message/src',
|
||||
0 => __DIR__ . '/..' . '/psr/http-message/src',
|
||||
1 => __DIR__ . '/..' . '/psr/http-factory/src',
|
||||
),
|
||||
'Psr\\Http\\Client\\' =>
|
||||
array (
|
||||
@ -80,13 +79,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/guzzlehttp/guzzle/src',
|
||||
),
|
||||
'App\\Library\\' =>
|
||||
'ContingentParser\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/app/library',
|
||||
),
|
||||
'App\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/app',
|
||||
0 => __DIR__ . '/../..' . '/ContingentParser',
|
||||
),
|
||||
);
|
||||
|
||||
@ -97,9 +92,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1
|
||||
public static function getInitializer(ClassLoader $loader)
|
||||
{
|
||||
return \Closure::bind(function () use ($loader) {
|
||||
$loader->prefixLengthsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixLengthsPsr4;
|
||||
$loader->prefixDirsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixDirsPsr4;
|
||||
$loader->classMap = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$classMap;
|
||||
$loader->prefixLengthsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixLengthsPsr4;
|
||||
$loader->prefixDirsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixDirsPsr4;
|
||||
$loader->classMap = ComposerStaticInit03ebe40648d54e718198609886034355::$classMap;
|
||||
|
||||
}, null, ClassLoader::class);
|
||||
}
|
||||
|
12
vendor/composer/installed.php
vendored
12
vendor/composer/installed.php
vendored
@ -1,9 +1,9 @@
|
||||
<?php return array(
|
||||
'root' => array(
|
||||
'name' => '__root__',
|
||||
'pretty_version' => 'dev-main',
|
||||
'version' => 'dev-main',
|
||||
'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f',
|
||||
'pretty_version' => '1.0.0+no-version-set',
|
||||
'version' => '1.0.0.0',
|
||||
'reference' => null,
|
||||
'type' => 'library',
|
||||
'install_path' => __DIR__ . '/../../',
|
||||
'aliases' => array(),
|
||||
@ -11,9 +11,9 @@
|
||||
),
|
||||
'versions' => array(
|
||||
'__root__' => array(
|
||||
'pretty_version' => 'dev-main',
|
||||
'version' => 'dev-main',
|
||||
'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f',
|
||||
'pretty_version' => '1.0.0+no-version-set',
|
||||
'version' => '1.0.0.0',
|
||||
'reference' => null,
|
||||
'type' => 'library',
|
||||
'install_path' => __DIR__ . '/../../',
|
||||
'aliases' => array(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user