Добавлен класс ContingentManager и начата обработка неудачных попыток парсинга (запись в yaml-файл)
This commit is contained in:
86
app/app.php
86
app/app.php
@ -1,96 +1,57 @@
|
||||
<?php
|
||||
namespace App;
|
||||
|
||||
use App\Library\ContingentManager;
|
||||
use App\Library\DatabaseConfig;
|
||||
use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use GuzzleHttp\RequestOptions;
|
||||
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
|
||||
use App\Library\ContingentParser;
|
||||
use App\Library\Database;
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
|
||||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||
|
||||
$builder = new GenericBuilder();
|
||||
|
||||
// select kod as org_id, site from niimko.s_vuzes
|
||||
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
|
||||
$params = ['vuz', 'n', 'n'];
|
||||
$query = $builder->select()
|
||||
->setTable('s_vuzes')
|
||||
->setColumns(['org_id' => 'kod', 'site'])
|
||||
->where('AND')
|
||||
->equals('ootype', 'vuz')
|
||||
->equals('deleted', 'n')
|
||||
->equals('fake', 'n')
|
||||
->end();
|
||||
$sql = $builder->write($query);
|
||||
$sites = $dbNiimko->executeQuery($sql, $params);
|
||||
|
||||
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
|
||||
$params = ['gos3p'];
|
||||
$query = $builder->select()
|
||||
->setTable('s_specs')
|
||||
->setColumns(['id', 'kod'])
|
||||
->where()
|
||||
->equals('oopkodes','gos3p')
|
||||
->end();
|
||||
$sql = $builder->write($query);
|
||||
$specializations = $dbNiimko->executeQuery($sql, $params);
|
||||
|
||||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
// print_r($sites);
|
||||
// print_r($specializations);
|
||||
|
||||
// $sites = [ ['site' => "http://marsu.ru"], ['site' => "http://voenmeh.ru"], ['site' => "http://angtu.ru"] ];
|
||||
$i = 0;
|
||||
$status = null;
|
||||
$succes = 0;
|
||||
foreach ($sites as $site) {
|
||||
$failed = array();
|
||||
for ($i = 0; $i < 100; $i++) {
|
||||
try {
|
||||
$client = new Client([
|
||||
RequestOptions::ALLOW_REDIRECTS => [
|
||||
'max' => 10, // allow at most 10 redirects.
|
||||
'strict' => true, // use "strict" RFC compliant redirects.
|
||||
'referer' => true, // add a Referer header
|
||||
'max' => 10,
|
||||
'strict' => true,
|
||||
'referer' => true,
|
||||
'track_redirects' => true,
|
||||
],
|
||||
]
|
||||
]);
|
||||
$baseURL = $sites[$i]['site'];
|
||||
$url = ContingentManager::getInstance()->buildURL($baseURL);
|
||||
print(($i+1).". Current url: $url\n");
|
||||
|
||||
$route = "{$site['site']}/sveden/education/";
|
||||
$route = str_replace("http","https", $route);
|
||||
$route = str_replace("www.","", $route);
|
||||
print(++$i.". Current url: $route\n");
|
||||
|
||||
$response = $client->get($route);
|
||||
print("StatusCode: ".$response->getStatusCode() . "\n");
|
||||
$response = $client->get($url, ['timeout' => 300]);
|
||||
$status = $response->getStatusCode();
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
$contingent = $parser->getDataTable();
|
||||
|
||||
// Добавляем поле spec_id по spec_code
|
||||
foreach ($contingent as $key => $con) {
|
||||
$needle = $con['spec_code'];
|
||||
foreach ($specializations as $spec) {
|
||||
if ($needle == $spec['kod']) {
|
||||
$con['spec_id'] = $spec['id'];
|
||||
}
|
||||
}
|
||||
$contingent[$key]['spec_id'] = $con['spec_id'];
|
||||
}
|
||||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
|
||||
// Добавляем поле org_id
|
||||
foreach ($contingent as $key => $spec) {
|
||||
$contingent[$key]['org_id'] = $site['org_id'];
|
||||
}
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
|
||||
print_r($contingent);
|
||||
if ($response->getStatusCode() == 200 && !empty($contingent)){
|
||||
print("Succes: ".++$succes."\n");
|
||||
}
|
||||
} catch (ClientException $e) {
|
||||
$response = $e->getCode();
|
||||
} catch (RequestException $e) {
|
||||
@ -99,8 +60,19 @@ foreach ($sites as $site) {
|
||||
$response = $e->getCode();
|
||||
} catch (ServerException $e) {
|
||||
$response = $e->getCode();
|
||||
} finally {
|
||||
if ($status == 200 && !empty($contingent)) {
|
||||
$status = 0;
|
||||
print("Succes: ".++$succes."\n");
|
||||
// TODO - здесь заносим в базу
|
||||
} else {
|
||||
// Сайты, которые распарсить не удолось
|
||||
$failed[] = $sites[$i];
|
||||
}
|
||||
}
|
||||
}
|
||||
$yaml = Yaml::dump($failed);
|
||||
file_put_contents(__DIR__ . '/../failed.yaml', $yaml);
|
||||
|
||||
// Чтобы не дублировались в базе
|
||||
// $dbOpendata->insert('sveden_education_contingent', $data);
|
||||
|
88
app/library/ContingentManager.php
Normal file
88
app/library/ContingentManager.php
Normal file
@ -0,0 +1,88 @@
|
||||
<?php
|
||||
namespace App\Library;
|
||||
use NilPortugues\Sql\QueryBuilder\Builder\GenericBuilder;
|
||||
|
||||
final class ContingentManager
|
||||
{
|
||||
private static ?ContingentManager $instance;
|
||||
private ?GenericBuilder $builder;
|
||||
private function __construct()
|
||||
{
|
||||
$this->builder = new GenericBuilder();
|
||||
}
|
||||
|
||||
public static function getInstance() : ContingentManager
|
||||
{
|
||||
self::$instance ??= new self();
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
public function getSites(Database $db): array
|
||||
{
|
||||
// select kod as org_id, site from niimko.s_vuzes
|
||||
// where ootype = 'vuz' and deleted = 'n' and fake = 'n'
|
||||
$params = ['vuz', 'n', 'n'];
|
||||
$query = $this->builder->select()
|
||||
->setTable('s_vuzes')
|
||||
->setColumns(['org_id' => 'kod', 'site'])
|
||||
->where('AND')
|
||||
->equals('ootype', 'vuz')
|
||||
->equals('deleted', 'n')
|
||||
->equals('fake', 'n')
|
||||
->end();
|
||||
$sql = $this->builder->write($query);
|
||||
$sites = $db->executeQuery($sql, $params);
|
||||
|
||||
return $sites;
|
||||
}
|
||||
|
||||
public function getSpecializations(Database $db) : array
|
||||
{
|
||||
// select id, kod from niimko.s_specs where oopkodes = 'gos3p'
|
||||
$params = ['gos3p'];
|
||||
$query = $this->builder->select()
|
||||
->setTable('s_specs')
|
||||
->setColumns(['id', 'kod'])
|
||||
->where()
|
||||
->equals('oopkodes','gos3p')
|
||||
->end();
|
||||
$sql = $this->builder->write($query);
|
||||
$specializations = $db->executeQuery($sql, $params);
|
||||
|
||||
return $specializations;
|
||||
}
|
||||
|
||||
public function buildURL(string $url): string
|
||||
{
|
||||
// TODO - сделать base_url
|
||||
$url = "$url/sveden/education/";
|
||||
if (str_contains($url, "http://")) {
|
||||
$url = str_replace("http://","https://", $url);
|
||||
} else {
|
||||
$url = "https://$url";
|
||||
}
|
||||
$url = str_replace("www.","", $url);
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function addSpecId(array &$contingent, array $specializations) : void
|
||||
{
|
||||
foreach ($contingent as $key => $con) {
|
||||
$needle = $con['spec_code'];
|
||||
foreach ($specializations as $spec) {
|
||||
if ($needle == $spec['kod']) {
|
||||
$cont['spec_id'] = $spec['id'];
|
||||
}
|
||||
}
|
||||
$contingent[$key]['spec_id'] = $cont['spec_id'];
|
||||
}
|
||||
}
|
||||
|
||||
public function addOrgId(array &$contingent, int $orgId) : void
|
||||
{
|
||||
for($i = 0; $i < count($contingent); $i++) {
|
||||
$contingent[$i]['org_id'] = $orgId;
|
||||
}
|
||||
}
|
||||
}
|
@ -19,7 +19,7 @@ class Database
|
||||
$password,
|
||||
[PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]
|
||||
);
|
||||
echo "Подлючено успешно!\n";
|
||||
print("Подлючено успешно!\n");
|
||||
} catch (PDOException $e) {
|
||||
echo "Ошибка подключения:". $e->getMessage() . "\n";
|
||||
}
|
||||
@ -34,9 +34,9 @@ class Database
|
||||
{
|
||||
try {
|
||||
$stmt = $this->pdo->prepare($sql);
|
||||
$params = array_values($params);
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":v".$i++, $params[$i]);
|
||||
// $params = array_values($params);
|
||||
for ($i = 0; $i < count($params); $i++) {
|
||||
$stmt->bindParam(":v".$i+1, $params[$i]);
|
||||
}
|
||||
$stmt->execute();
|
||||
$array = $stmt->fetchAll(PDO::FETCH_ASSOC);
|
||||
|
Reference in New Issue
Block a user