sveden-parser/app/app.php

80 lines
2.8 KiB
PHP
Raw Normal View History

2024-08-08 12:32:27 +02:00
<?php
namespace App;
use App\Library\ContingentManager;
use App\Library\DatabaseConfig;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\RequestOptions;
use App\Library\ContingentParser;
2024-08-08 12:32:27 +02:00
use App\Library\Database;
use GuzzleHttp\Client;
use Symfony\Component\Yaml\Yaml;
2024-08-08 12:32:27 +02:00
$dbOpendata = new Database(new DatabaseConfig('opendata'));
$dbNiimko = new Database(new DatabaseConfig('niimko'));
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
// print_r($sites);
// print_r($specializations);
$status = null;
$succes = 0;
$failed = array();
for ($i = 0; $i < 100; $i++) {
try {
$client = new Client([
RequestOptions::ALLOW_REDIRECTS => [
'max' => 10,
'strict' => true,
'referer' => true,
'track_redirects' => true,
]
]);
$baseURL = $sites[$i]['site'];
$url = ContingentManager::getInstance()->buildURL($baseURL);
print(($i+1).". Current url: $url\n");
$response = $client->get($url, ['timeout' => 300]);
$status = $response->getStatusCode();
$html = $response->getBody()->getContents();
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
$contingent = $parser->getDataTable();
// Добавляем поле spec_id по spec_code
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
// Добавляем поле org_id
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
print_r($contingent);
} catch (ClientException $e) {
$response = $e->getCode();
} catch (RequestException $e) {
$response = $e->getCode();
} catch (ConnectException $e) {
$response = $e->getCode();
} catch (ServerException $e) {
$response = $e->getCode();
} finally {
if ($status == 200 && !empty($contingent)) {
$status = 0;
print("Succes: ".++$succes."\n");
// TODO - здесь заносим в базу
} else {
// Сайты, которые распарсить не удолось
$failed[] = $sites[$i];
}
}
}
$yaml = Yaml::dump($failed);
file_put_contents(__DIR__ . '/../failed.yaml', $yaml);
// Чтобы не дублировались в базе
// $dbOpendata->insert('sveden_education_contingent', $data);
// $dbOpendata->update('sveden_education_contingent', $specializations);
// $data = $dbOpendata->select('sveden_education_contingent');