121 lines
4.1 KiB
PHP
121 lines
4.1 KiB
PHP
<?php
|
||
namespace App;
|
||
|
||
use App\Library\ContingentManager;
|
||
use App\Library\DatabaseConfig;
|
||
use App\Library\Logger;
|
||
use GuzzleHttp\Exception\ClientException;
|
||
use GuzzleHttp\Exception\ConnectException;
|
||
use GuzzleHttp\Exception\RequestException;
|
||
use GuzzleHttp\Exception\ServerException;
|
||
use App\Library\ContingentParser;
|
||
use App\Library\Database;
|
||
use GuzzleHttp\Client;
|
||
|
||
$pathLogErrorHtml = 'error-html.log';
|
||
$pathLogErrorHttp = 'error-http-curl.log';
|
||
|
||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||
|
||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||
// print_r($sites);
|
||
// print_r($specializations);
|
||
// print_r($org);
|
||
|
||
// $errorSites = [];
|
||
// $filename = 'error-html.log';
|
||
// $array = file($filename);
|
||
// for ($i = 0; $i < count($array); $i++) {
|
||
// $arr = explode(' ', $array[$i]);
|
||
// if (!in_array($arr[2], $orgs)) {
|
||
// $errorSites[] = $arr[2];
|
||
// }
|
||
// }
|
||
// $filename = 'error-http.log';
|
||
// $array = file($filename);
|
||
// for ($i = 0; $i < count($array); $i++) {
|
||
// $arr = explode(' ', $array[$i]);
|
||
// if (!in_array($arr[2], $orgs)) {
|
||
// $errorSites[] = $arr[2];
|
||
// }
|
||
// }
|
||
|
||
$start = 999;
|
||
|
||
for ($i = $start; $i < count($sites); $i++) {
|
||
// Нет URL сайта вуза
|
||
if (empty($sites[$i]['site'])) {
|
||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||
Logger::log($pathLogErrorHttp, $message);
|
||
continue;
|
||
}
|
||
// Уже в базе
|
||
if (in_array($sites[$i]['org_id'], $orgs)) {
|
||
continue;
|
||
}
|
||
// С ошибками разметки игнорируем
|
||
// if (in_array($sites[$i]['org_id'], $errorSites)) {
|
||
// continue;
|
||
// }
|
||
try {
|
||
$client = new Client([
|
||
'allow_directs' => true,
|
||
'track_redirects' => true,
|
||
'connect_timeout' => 300.0,
|
||
'verify' => false,
|
||
// 'http_errors' => false,
|
||
'headers' => [
|
||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||
'Content-Type' => 'text/html;charset=utf-8'
|
||
]
|
||
]);
|
||
|
||
$url = $sites[$i]['site'];
|
||
$url = ContingentManager::getInstance()->buildURL($url);
|
||
print(($i+1).". Current url: $url\n");
|
||
|
||
$response = $client->get($url);
|
||
echo $response->getStatusCode() .PHP_EOL;
|
||
|
||
$html = $response->getBody()->getContents();
|
||
if (empty($html)) {
|
||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||
Logger::log($pathLogErrorHtml, $message);
|
||
continue;
|
||
}
|
||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||
$contingent = $parser->getDataTable();
|
||
|
||
// Добавляем поле spec_id по spec_code
|
||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||
|
||
// Добавляем поле org_id
|
||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||
print_r($contingent);
|
||
if (empty($contingent)) {
|
||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||
Logger::log($pathLogErrorHtml, $message);
|
||
} else {
|
||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||
if ($set) {
|
||
// Заносим в базу
|
||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||
} else {
|
||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||
Logger::log($pathLogErrorHtml, $message);
|
||
}
|
||
unset($contingent);
|
||
}
|
||
} catch (ClientException
|
||
| RequestException
|
||
| ConnectException
|
||
| ServerException $e
|
||
) {
|
||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||
Logger::log($pathLogErrorHttp, $message);
|
||
}
|
||
}
|