2024-08-08 12:32:27 +02:00
|
|
|
<?php
|
|
|
|
namespace App;
|
|
|
|
|
2024-08-13 15:05:24 +02:00
|
|
|
use App\Library\ContingentManager;
|
2024-08-13 12:15:21 +02:00
|
|
|
use App\Library\DatabaseConfig;
|
2024-08-12 14:14:49 +02:00
|
|
|
use GuzzleHttp\Exception\ClientException;
|
|
|
|
use GuzzleHttp\Exception\ConnectException;
|
|
|
|
use GuzzleHttp\Exception\RequestException;
|
|
|
|
use GuzzleHttp\Exception\ServerException;
|
2024-08-12 15:58:12 +02:00
|
|
|
use GuzzleHttp\RequestOptions;
|
2024-08-08 15:38:54 +02:00
|
|
|
use App\Library\ContingentParser;
|
2024-08-08 12:32:27 +02:00
|
|
|
use App\Library\Database;
|
|
|
|
use GuzzleHttp\Client;
|
2024-08-13 15:05:24 +02:00
|
|
|
use Symfony\Component\Yaml\Yaml;
|
2024-08-08 12:32:27 +02:00
|
|
|
|
2024-08-13 12:15:21 +02:00
|
|
|
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
|
|
|
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
2024-08-12 15:58:12 +02:00
|
|
|
|
2024-08-13 15:05:24 +02:00
|
|
|
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
|
|
|
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
2024-08-12 14:14:49 +02:00
|
|
|
// print_r($sites);
|
2024-08-12 15:58:12 +02:00
|
|
|
// print_r($specializations);
|
|
|
|
|
2024-08-13 15:05:24 +02:00
|
|
|
$status = null;
|
2024-08-12 15:58:12 +02:00
|
|
|
$succes = 0;
|
2024-08-13 15:05:24 +02:00
|
|
|
$failed = array();
|
|
|
|
for ($i = 0; $i < 100; $i++) {
|
2024-08-12 14:14:49 +02:00
|
|
|
try {
|
2024-08-12 15:58:12 +02:00
|
|
|
$client = new Client([
|
|
|
|
RequestOptions::ALLOW_REDIRECTS => [
|
2024-08-13 15:05:24 +02:00
|
|
|
'max' => 10,
|
|
|
|
'strict' => true,
|
|
|
|
'referer' => true,
|
2024-08-12 15:58:12 +02:00
|
|
|
'track_redirects' => true,
|
2024-08-13 15:05:24 +02:00
|
|
|
]
|
2024-08-12 15:58:12 +02:00
|
|
|
]);
|
2024-08-13 15:05:24 +02:00
|
|
|
$baseURL = $sites[$i]['site'];
|
|
|
|
$url = ContingentManager::getInstance()->buildURL($baseURL);
|
|
|
|
print(($i+1).". Current url: $url\n");
|
2024-08-08 15:38:54 +02:00
|
|
|
|
2024-08-13 15:05:24 +02:00
|
|
|
$response = $client->get($url, ['timeout' => 300]);
|
|
|
|
$status = $response->getStatusCode();
|
2024-08-12 14:14:49 +02:00
|
|
|
|
|
|
|
$html = $response->getBody()->getContents();
|
|
|
|
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
|
|
|
$contingent = $parser->getDataTable();
|
2024-08-12 15:58:12 +02:00
|
|
|
|
|
|
|
// Добавляем поле spec_id по spec_code
|
2024-08-13 15:05:24 +02:00
|
|
|
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
2024-08-12 15:58:12 +02:00
|
|
|
|
|
|
|
// Добавляем поле org_id
|
2024-08-13 15:05:24 +02:00
|
|
|
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
2024-08-12 15:58:12 +02:00
|
|
|
|
2024-08-12 14:14:49 +02:00
|
|
|
print_r($contingent);
|
|
|
|
} catch (ClientException $e) {
|
|
|
|
$response = $e->getCode();
|
|
|
|
} catch (RequestException $e) {
|
|
|
|
$response = $e->getCode();
|
|
|
|
} catch (ConnectException $e) {
|
|
|
|
$response = $e->getCode();
|
|
|
|
} catch (ServerException $e) {
|
|
|
|
$response = $e->getCode();
|
2024-08-13 15:05:24 +02:00
|
|
|
} finally {
|
|
|
|
if ($status == 200 && !empty($contingent)) {
|
|
|
|
$status = 0;
|
|
|
|
print("Succes: ".++$succes."\n");
|
|
|
|
// TODO - здесь заносим в базу
|
|
|
|
} else {
|
|
|
|
// Сайты, которые распарсить не удолось
|
|
|
|
$failed[] = $sites[$i];
|
|
|
|
}
|
2024-08-08 15:38:54 +02:00
|
|
|
}
|
|
|
|
}
|
2024-08-13 15:05:24 +02:00
|
|
|
$yaml = Yaml::dump($failed);
|
|
|
|
file_put_contents(__DIR__ . '/../failed.yaml', $yaml);
|
2024-08-12 14:14:49 +02:00
|
|
|
|
|
|
|
// Чтобы не дублировались в базе
|
2024-08-08 15:38:54 +02:00
|
|
|
// $dbOpendata->insert('sveden_education_contingent', $data);
|
2024-08-12 14:14:49 +02:00
|
|
|
// $dbOpendata->update('sveden_education_contingent', $specializations);
|
|
|
|
// $data = $dbOpendata->select('sveden_education_contingent');
|