Добавлен класс ContingentManager и начата обработка неудачных попыток парсинга (запись в yaml-файл)

This commit is contained in:
2024-08-13 16:05:24 +03:00
parent bf76820498
commit f06eee3e95
36 changed files with 4662 additions and 67 deletions

View File

@ -1,96 +1,57 @@
<?php
namespace App;
use App\Library\ContingentManager;
use App\Library\DatabaseConfig;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\RequestOptions;
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
use App\Library\ContingentParser;
use App\Library\Database;
use GuzzleHttp\Client;
use Symfony\Component\Yaml\Yaml;
$dbOpendata = new Database(new DatabaseConfig('opendata'));
$dbNiimko = new Database(new DatabaseConfig('niimko'));
$builder = new GenericBuilder();
// select kod as org_id, site from niimko.s_vuzes
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
$params = ['vuz', 'n', 'n'];
$query = $builder->select()
->setTable('s_vuzes')
->setColumns(['org_id' => 'kod', 'site'])
->where('AND')
->equals('ootype', 'vuz')
->equals('deleted', 'n')
->equals('fake', 'n')
->end();
$sql = $builder->write($query);
$sites = $dbNiimko->executeQuery($sql, $params);
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
$params = ['gos3p'];
$query = $builder->select()
->setTable('s_specs')
->setColumns(['id', 'kod'])
->where()
->equals('oopkodes','gos3p')
->end();
$sql = $builder->write($query);
$specializations = $dbNiimko->executeQuery($sql, $params);
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
// print_r($sites);
// print_r($specializations);
// $sites = [ ['site' => "http://marsu.ru"], ['site' => "http://voenmeh.ru"], ['site' => "http://angtu.ru"] ];
$i = 0;
$status = null;
$succes = 0;
foreach ($sites as $site) {
$failed = array();
for ($i = 0; $i < 100; $i++) {
try {
$client = new Client([
RequestOptions::ALLOW_REDIRECTS => [
'max' => 10, // allow at most 10 redirects.
'strict' => true, // use "strict" RFC compliant redirects.
'referer' => true, // add a Referer header
'max' => 10,
'strict' => true,
'referer' => true,
'track_redirects' => true,
],
]
]);
$baseURL = $sites[$i]['site'];
$url = ContingentManager::getInstance()->buildURL($baseURL);
print(($i+1).". Current url: $url\n");
$route = "{$site['site']}/sveden/education/";
$route = str_replace("http","https", $route);
$route = str_replace("www.","", $route);
print(++$i.". Current url: $route\n");
$response = $client->get($route);
print("StatusCode: ".$response->getStatusCode() . "\n");
$response = $client->get($url, ['timeout' => 300]);
$status = $response->getStatusCode();
$html = $response->getBody()->getContents();
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
$contingent = $parser->getDataTable();
// Добавляем поле spec_id по spec_code
foreach ($contingent as $key => $con) {
$needle = $con['spec_code'];
foreach ($specializations as $spec) {
if ($needle == $spec['kod']) {
$con['spec_id'] = $spec['id'];
}
}
$contingent[$key]['spec_id'] = $con['spec_id'];
}
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
// Добавляем поле org_id
foreach ($contingent as $key => $spec) {
$contingent[$key]['org_id'] = $site['org_id'];
}
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
print_r($contingent);
if ($response->getStatusCode() == 200 && !empty($contingent)){
print("Succes: ".++$succes."\n");
}
} catch (ClientException $e) {
$response = $e->getCode();
} catch (RequestException $e) {
@ -99,8 +60,19 @@ foreach ($sites as $site) {
$response = $e->getCode();
} catch (ServerException $e) {
$response = $e->getCode();
} finally {
if ($status == 200 && !empty($contingent)) {
$status = 0;
print("Succes: ".++$succes."\n");
// TODO - здесь заносим в базу
} else {
// Сайты, которые распарсить не удолось
$failed[] = $sites[$i];
}
}
}
$yaml = Yaml::dump($failed);
file_put_contents(__DIR__ . '/../failed.yaml', $yaml);
// Чтобы не дублировались в базе
// $dbOpendata->insert('sveden_education_contingent', $data);

View File

@ -0,0 +1,88 @@
<?php
namespace App\Library;
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
final class ContingentManager
{
private static ?ContingentManager $instance;
private ?GenericBuilder $builder;
private function __construct()
{
$this->builder = new GenericBuilder();
}
public static function getInstance() : ContingentManager
{
self::$instance ??= new self();
return self::$instance;
}
public function getSites(Database $db): array
{
// select kod as org_id, site from niimko.s_vuzes
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
$params = ['vuz', 'n', 'n'];
$query = $this->builder->select()
->setTable('s_vuzes')
->setColumns(['org_id' => 'kod', 'site'])
->where('AND')
->equals('ootype', 'vuz')
->equals('deleted', 'n')
->equals('fake', 'n')
->end();
$sql = $this->builder->write($query);
$sites = $db->executeQuery($sql, $params);
return $sites;
}
public function getSpecializations(Database $db) : array
{
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
$params = ['gos3p'];
$query = $this->builder->select()
->setTable('s_specs')
->setColumns(['id', 'kod'])
->where()
->equals('oopkodes','gos3p')
->end();
$sql = $this->builder->write($query);
$specializations = $db->executeQuery($sql, $params);
return $specializations;
}
public function buildURL(string $url): string
{
// TODO - сделать base_url
$url = "$url/sveden/education/";
if (str_contains($url, "http://")) {
$url = str_replace("http://","https://", $url);
} else {
$url = "https://$url";
}
$url = str_replace("www.","", $url);
return $url;
}
public function addSpecId(array &$contingent, array $specializations) : void
{
foreach ($contingent as $key => $con) {
$needle = $con['spec_code'];
foreach ($specializations as $spec) {
if ($needle == $spec['kod']) {
$cont['spec_id'] = $spec['id'];
}
}
$contingent[$key]['spec_id'] = $cont['spec_id'];
}
}
public function addOrgId(array &$contingent, int $orgId) : void
{
for($i = 0; $i < count($contingent); $i++) {
$contingent[$i]['org_id'] = $orgId;
}
}
}

View File

@ -19,7 +19,7 @@ class Database
$password,
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
);
echo "Подлючено успешно!\n";
print("Подлючено успешно!\n");
} catch (PDOException $e) {
echo "Ошибка подключения:". $e->getMessage() . "\n";
}
@ -34,9 +34,9 @@ class Database
{
try {
$stmt = $this->pdo->prepare($sql);
$params = array_values($params);
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":v".$i++, $params[$i]);
// $params = array_values($params);
for ($i = 0; $i < count($params); $i++) {
$stmt->bindParam(":v".$i+1, $params[$i]);
}
$stmt->execute();
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);