ПОЛНЫЙ РЕФАКТОРИНГ

This commit is contained in:
2024-08-29 21:57:53 +03:00
commit 88d85865a0
299 changed files with 52566 additions and 0 deletions

View File

@ -0,0 +1,86 @@
<?php
namespace ContingentParser\Database;
use ContingentParser\Logger\DatabaseLogger;
use Symfony\Component\Yaml\Yaml;
use PDOException;
use PDO;
final class Database
{
private PDO $_pdo;
private static $_logFile = 'log/database.log';
private DatabaseConfig $_databaseConfig;
private DatabaseLogger $_logger;
public function __construct(DatabaseConfig $config)
{
$this->_logger = new DatabaseLogger(self::$_logFile);
$this->_databaseConfig = $config;
try {
$dsn = $this->_databaseConfig->getDsn();
$username = $this->_databaseConfig->getUsername();
$password = $this->_databaseConfig->getPassword();
$this->_pdo = new PDO(
$dsn,
$username,
$password,
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
$message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!";
$this->_logger->log($message);
} catch (PDOException $e) {
$message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}";
$this->_logger->log($message);
}
}
public function __destruct()
{
$message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!";
$this->_logger->log($message);
}
// Массив $params должен начанаться с 1
public function select(string $sql, array $params = []) : array
{
try {
$stmt = $this->_pdo->prepare($sql);
for ($i = 1; $i < count($params); $i++) {
$stmt->bindParam(":v$i", $params[$i]);
}
$stmt->execute();
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (PDOException $e) {
$message = "Ошибка запроса: " . $e->getMessage();
$this->_logger->log($message);
} finally {
return $array;
}
}
public function insert(string $sql, array $params)
{
try {
$stmt = $this->_pdo->prepare($sql);
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":spec_code".$i+1, $params[$i]['spec_code']);
$stmt->bindParam(":spec_name".$i+1, $params[$i]['spec_name']);
$stmt->bindParam(":edu_forms".$i+1, $params[$i]['edu_forms']);
$stmt->bindParam(":edu_level".$i+1, $params[$i]['edu_level']);
$stmt->bindParam(":contingent".$i+1, $params[$i]['contingent']);
$stmt->bindParam(":org_id".$i+1, $params[$i]['org_id']);
$stmt->bindParam(":spec_id".$i+1, $params[$i]['spec_id']);
}
$stmt->execute();
$this->_logger->log("Запрос выполнен успешно!");
} catch (PDOException $e) {
$message = "Ошибка запроса:" . $e->getMessage();
$this->_logger->log($message);
// При ошибке запроса сохраняем валидные данные в yaml-файл
if ($e->getCode() === "HY000") {
$yaml = Yaml::dump($params);
file_put_contents('/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
}
}
}
}

View File

@ -0,0 +1,62 @@
<?php
namespace ContingentParser\Database;
final class DatabaseConfig
{
private string $_driver;
private string $_host;
private string $_dbname;
private string $_port;
private string $_charset;
private string $_username;
private string $_password;
public function __construct(string $db)
{
$config = $this->getDataEnv($db);
$this->_driver = $config['DB_DRIVER'];
$this->_host = $config['DB_HOST'];
$this->_dbname = $config['DB_NAME'];
$this->_port = $config['DB_PORT'];
$this->_charset = $config["DB_CHARSET"];
$this->_username = $config['DB_USERNAME'];
$this->_password = $config['DB_PASSWORD'];
}
private function getDataEnv(string $db) : array
{
$envVars = parse_ini_file('.env', true);
$db = strtoupper($db);
$config = [];
foreach ($envVars as $dbname => $dbconfig) {
if ($dbname == $db) {
$config = $dbconfig;
}
}
return $config;
}
public function getDBName(): string
{
return $this->_dbname;
}
public function getDsn() : string
{
return $this->_driver.":host=".$this->_host
.";dbname=".$this->_dbname
.";charset=".$this->_charset
.";port=".$this->_port;
}
public function getUsername() : string
{
return $this->_username;
}
public function getPassword() : string
{
return $this->_password;
}
}

215
ContingentParser/Facade.php Normal file
View File

@ -0,0 +1,215 @@
<?php
namespace ContingentParser;
use ContingentParser\Database\Database;
use ContingentParser\Parser\ContingentParser;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\Psr7\Exception\MalformedUriException;
use GuzzleHttp\TransferStats;
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
class Facade
{
private GenericBuilder $_builder;
public function __construct()
{
$this->_builder = new GenericBuilder();
}
public function getSitesFromDatabase(Database $db) : array
{
// SELECT kod AS org_id, site FROM niimko.s_vuzes
// WHERE ootype = 'vuz' AND deleted = 'n' AND fake = 'n'
$params = [1 => 'vuz', 'n', 'n', 'RU'];
$query = $this->_builder->select()
->setTable('s_vuzes')
->setColumns(['org_id' => 'kod', 'site'])
->where('AND')
->equals('ootype', 'vuz')
->equals('deleted', 'n')
->equals('fake', 'n')
->equals('country', 'RU')
->end();
$sql = $this->_builder->write($query);
$sites = $db->select($sql, $params);
return $sites;
}
public function getSpecialtiesFromDatabase(Database $db) : array
{
// SELECT id AS spec_id, kod AS spec_code FROM niimko.s_specs
// WHERE oopkodes = 'gos3p'
$params = [1 => 'gos3p'];
$query = $this->_builder->select()
->setTable('s_specs')
->setColumns(['spec_id' =>'id', 'spec_code' => 'kod'])
->where()
->equals('oopkodes','gos3p')
->end();
$sql = $this->_builder->write($query);
var_dump($sql);
$specialties = $db->select($sql, $params);
return $specialties;
}
public function getUniversitiesFromDatabase(Database $db) : array
{
// SELECT DISTINCT org_id FROM sveden_education_contingent
$params = [1 => 'org_id'];
$query = $this->_builder->select()
->setTable('sveden_education_contingent')
->setColumns(['org_id'])
->where()
->greaterThan('org_id', 0)
->end();
$sql = $this->_builder->write($query);
$sql = preg_replace("/ WHERE.*/", '', $sql);
$sql = preg_replace('/SELECT/', 'SELECT DISTINCT', $sql);
$specialties = $db->select($sql, $params);
return $specialties;
}
public function getBaseUri(string $url) : string
{
// Строит -> https://<base_uri>
$url = trim(strtolower($url));
$url = preg_replace('/\s+/', '', $url);
$url = str_replace("www/", "www.", $url);
$url = str_replace("http:\\\\", "", $url);
if (!preg_match('#^https?://#', $url)) {
$url = "https://$url";
}
// $url = str_replace("http://", "https://", $url);
$arr = parse_url($url);
$url = $arr['scheme'] . '://' . $arr['host'] . '/';
// $url = str_replace("www.", "", $url);
$url = str_replace("_", "/", $url);
return trim($url);
}
public function handleEducationContingentSites(
string $uri,
array $site
) : string {
try {
$client = new Client(
$this->setConfigClient($uri)
);
// Запрос по базовому uri
$response = $client->get('', [
'on_stats' => function (TransferStats $stats) use (&$url) {
$url = $stats->getEffectiveUri();
}
]);
print("Redirect $uri -> $url" . PHP_EOL);
if (substr($url, -1) == '/') {
$url .= "sveden/education/";
} else {
$url .= "/sveden/education/";
}
print("Parsing for $url" . PHP_EOL);
$response = $client->get($url);
$html = $response->getBody()->getContents();
} catch (ClientException
| RequestException
| ConnectException
| ServerException
| MalformedUriException $e
) {
$html = '';
} finally {
return $html;
}
}
private function setConfigClient(string $baseUri) : array
{
return [
'force_ip_resolve' => 'v4',
'debug' => fopen("debug-http.log", "a"),
'base_uri' => $baseUri,
'allow_directs' => [
'max' => 5,
'strict' => true,
'referer' => true,
'protocols' => ['http', 'https'],
'track_redirects' => true
],
'connect_timeout' => 300.0,
'verify' => false,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
'Content-Type' => 'text/html;charset=utf-8'
]
];
}
public function getContingent(
string $html,
?array $specialties,
int $orgId
) : array {
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
$contingent = $parser->getDataTable();
// $this->addSpecId($contingent, $specialties);
$this->addOrgId($contingent, $orgId);
return $contingent;
}
private function addSpecId(array &$contingent, array $specialties) : void
{
$specIdMap = array_column($specialties, 'spec_id', 'spec_code');
print_r($specIdMap);
foreach ($contingent as $key => $con) {
$contingent[$key]['spec_id'] = $specIdMap[$con['spec_code']] ?? null;
}
}
private function addOrgId(array &$contingent, int $orgId): void
{
foreach ($contingent as &$con) {
$con['org_id'] = $orgId;
}
}
public function isValidContingent(array $contingent) : bool
{
$count = 0;
foreach ($contingent as $value) {
$count += $value['contingent'];
}
return $count ? true : false;
}
public function insertContingent(array $contingent) : void
{
$countAtributes = count($contingent[0]);
$size = count($contingent) * ($countAtributes - 1);
$query = $this->_builder->insert()
->setTable('sveden_education_contingent')
->setValues([
'org_id' => '',
'spec_id' => '',
'edu_code' => '',
'edu_name' => '',
'edu_form' => '',
'edu_level' => '',
'contingent' => ''
]);
$sql = $this->_builder->writeFormatted($query);
for ($i = $countAtributes; $i <= $size;) {
$sql .= " (:v".(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).")\n";
}
echo $sql;
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace ContingentParser\Logger;
final class DatabaseLogger extends Logger
{
public function log(string $message) : void
{
$date = date('Y-m-d H:i:s');
$logMessage = "[$date] $message\n";
file_put_contents($this->_path, $logMessage, FILE_APPEND);
}
}

View File

@ -0,0 +1,10 @@
<?php
namespace ContingentParser\Logger;
final class HttpLogger extends Logger
{
public function log(string $message) : void
{
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace ContingentParser\Logger;
abstract class Logger
{
protected string $_path;
public function __construct(string $path)
{
$this->_path = $path;
}
abstract public function log(string $message) : void;
}

View File

@ -0,0 +1,89 @@
<?php
namespace ContingentParser\Parser;
use DOMDocument;
use DOMXPath;
class ContingentParser
{
private DOMXPath $xpath;
private string $template;
private const ENCODING = "UTF-8";
private const FIELDS = [
"eduCode" => "td",
"eduName" => "td",
"eduLevel" => "td",
"eduForm" => "td",
"numberAll" => ["th", "td"]
];
public function __construct(string $html, string $template)
{
libxml_use_internal_errors(true);
$dom = new DOMDocument(
encoding: self::ENCODING
);
$this->setEncoding($html);
$dom->loadHTML($html);
$this->xpath = new DOMXPath($dom);
$this->template = $template;
}
private function setEncoding(string &$html) : void
{
$encoding = mb_detect_encoding($html, 'UTF-8, windows-1251');
if ($encoding != self::ENCODING) {
$html = mb_convert_encoding(
$html,
self::ENCODING,
$encoding
);
$html = str_replace('windows-1251',self::ENCODING, $html);
}
}
private function parse() : array
{
$data = [];
foreach (self::FIELDS as $field => $tag) {
if (!is_array($tag)) {
$data[$field] = $this->xpath->query($this->template . $tag . "[@itemprop=\"$field\"]");
} else {
$th = $this->xpath->query($this->template . $tag[0] . "[@itemprop=\"$field\"]");
$td = $this->xpath->query($this->template . $tag[1] . "[@itemprop=\"$field\"]");
$data[$field] = $th->length > $td->length ? $th : $td;
}
}
return $data;
}
public function getDataTable() : array
{
$data = $this->parse();
$records = [];
if ($data == null) return [];
$equal = $data['eduName']->length;
foreach ($data as $field) {
if ($field->length == 0) {
return [];
}
if ($field->length != $equal) {
return [];
}
}
for ($i = 0; $i < $data['eduCode']->length; $i++) {
$contingentRow = new ContingentRow(
$data['eduCode']->item($i)->textContent,
$data['eduName']->item($i)->textContent,
$data['eduLevel']->item($i)->textContent,
$data['eduForm']->item($i)->textContent,
(int)$data['numberAll']->item($i)->textContent
);
$records[] = $contingentRow->getData();
}
return $records;
}
}

View File

@ -0,0 +1,33 @@
<?php
namespace ContingentParser\Parser;
class ContingentRow
{
public function __construct(
private string $eduCode,
private string $eduName,
private string $eduLevel,
private string $eduForm,
private int $contingent
) {
if ($contingent < 0) {
throw new \Exception("Недействительная численность обучающихся!");
}
$this->eduCode = trim($eduCode);
$this->eduName = trim($eduName);
$this->eduLevel = trim($eduLevel);
$this->eduForm = trim($eduForm);
$this->contingent = $contingent;
}
public function getData() : array
{
return [
"spec_code" => $this->eduCode,
"spec_name" => $this->eduName,
"edu_level" => $this->eduLevel,
"edu_forms"=> $this->eduForm,
"contingent" => $this->contingent
];
}
}

View File

@ -0,0 +1 @@
<?php