2024-08-08 13:32:27 +03:00
|
|
|
|
<?php
|
|
|
|
|
namespace App;
|
|
|
|
|
|
2024-08-13 16:05:24 +03:00
|
|
|
|
use App\Library\ContingentManager;
|
2024-08-13 13:15:21 +03:00
|
|
|
|
use App\Library\DatabaseConfig;
|
2024-08-16 15:00:37 +03:00
|
|
|
|
use App\Library\Logger;
|
2024-08-22 17:03:25 +03:00
|
|
|
|
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
2024-08-12 15:14:49 +03:00
|
|
|
|
use GuzzleHttp\Exception\ClientException;
|
|
|
|
|
use GuzzleHttp\Exception\ConnectException;
|
|
|
|
|
use GuzzleHttp\Exception\RequestException;
|
|
|
|
|
use GuzzleHttp\Exception\ServerException;
|
2024-08-08 16:38:54 +03:00
|
|
|
|
use App\Library\ContingentParser;
|
2024-08-08 13:32:27 +03:00
|
|
|
|
use App\Library\Database;
|
|
|
|
|
use GuzzleHttp\Client;
|
2024-08-22 17:03:25 +03:00
|
|
|
|
use Psr\Http\Message\RequestInterface;
|
|
|
|
|
use Psr\Http\Message\ResponseInterface;
|
|
|
|
|
use Psr\Http\Message\UriInterface;
|
2024-08-08 13:32:27 +03:00
|
|
|
|
|
2024-08-16 15:00:37 +03:00
|
|
|
|
$pathLogErrorHtml = 'error-html.log';
|
2024-08-19 10:17:54 +03:00
|
|
|
|
$pathLogErrorHttp = 'error-http-curl.log';
|
2024-08-16 15:00:37 +03:00
|
|
|
|
|
2024-08-13 13:15:21 +03:00
|
|
|
|
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
|
|
|
|
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
2024-08-19 10:17:54 +03:00
|
|
|
|
|
2024-08-13 16:05:24 +03:00
|
|
|
|
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
|
|
|
|
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
2024-08-19 10:17:54 +03:00
|
|
|
|
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
2024-08-12 16:58:12 +03:00
|
|
|
|
|
2024-08-22 17:03:25 +03:00
|
|
|
|
$exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('error-html.log');
|
|
|
|
|
$exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('error-http-curl.log');
|
2024-08-16 13:44:49 +03:00
|
|
|
|
|
2024-08-22 17:03:25 +03:00
|
|
|
|
// print_r($exceptionsOrgHttpCurl);
|
|
|
|
|
|
|
|
|
|
$start = 0;
|
2024-08-16 13:44:49 +03:00
|
|
|
|
|
|
|
|
|
for ($i = $start; $i < count($sites); $i++) {
|
|
|
|
|
// Нет URL сайта вуза
|
|
|
|
|
if (empty($sites[$i]['site'])) {
|
2024-08-16 15:00:37 +03:00
|
|
|
|
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
|
|
|
|
Logger::log($pathLogErrorHttp, $message);
|
2024-08-16 13:44:49 +03:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// Уже в базе
|
|
|
|
|
if (in_array($sites[$i]['org_id'], $orgs)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// С ошибками разметки игнорируем
|
2024-08-22 17:03:25 +03:00
|
|
|
|
if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2024-08-12 15:14:49 +03:00
|
|
|
|
try {
|
2024-08-22 17:03:25 +03:00
|
|
|
|
$baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
|
|
|
|
$onRedirect = function(
|
|
|
|
|
RequestInterface $request,
|
|
|
|
|
ResponseInterface $res,
|
|
|
|
|
UriInterface $uri
|
|
|
|
|
) {
|
|
|
|
|
echo 'Redirecting! ' . $request->getUri() . ' to ' . $uri . "\n";
|
|
|
|
|
};
|
2024-08-12 16:58:12 +03:00
|
|
|
|
$client = new Client([
|
2024-08-22 17:03:25 +03:00
|
|
|
|
'force_ip_resolve' => 'v4',
|
|
|
|
|
'debug' => fopen("debug-http.log", "a"),
|
|
|
|
|
'base_uri' => $baseUri,
|
|
|
|
|
'allow_directs' => [
|
|
|
|
|
'max' => 5,
|
|
|
|
|
'strict' => true,
|
|
|
|
|
'referer' => true,
|
|
|
|
|
'protocols' => ['http', 'https'],
|
|
|
|
|
'on_redirect' => $onRedirect,
|
|
|
|
|
'track_redirects' => true
|
|
|
|
|
],
|
2024-08-21 14:51:01 +03:00
|
|
|
|
'connect_timeout' => 300.0,
|
2024-08-19 10:17:54 +03:00
|
|
|
|
'verify' => false,
|
|
|
|
|
// 'http_errors' => false,
|
|
|
|
|
'headers' => [
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
2024-08-21 14:51:01 +03:00
|
|
|
|
'Content-Type' => 'text/html;charset=utf-8'
|
2024-08-13 16:05:24 +03:00
|
|
|
|
]
|
2024-08-12 16:58:12 +03:00
|
|
|
|
]);
|
2024-08-22 17:03:25 +03:00
|
|
|
|
print(($i+1). '. ' . implode(' ', $sites[$i]) . "\n");
|
|
|
|
|
$response = $client->get('/sveden/education/');
|
2024-08-19 10:17:54 +03:00
|
|
|
|
echo $response->getStatusCode() .PHP_EOL;
|
2024-08-22 17:03:25 +03:00
|
|
|
|
var_dump($response->getHeaderLine("'X-Guzzle-Redirect-History") . PHP_EOL);
|
2024-08-12 15:14:49 +03:00
|
|
|
|
|
|
|
|
|
$html = $response->getBody()->getContents();
|
2024-08-20 11:47:27 +03:00
|
|
|
|
if (empty($html)) {
|
2024-08-22 17:03:25 +03:00
|
|
|
|
$message = implode(' ', $sites[$i]);
|
2024-08-20 11:47:27 +03:00
|
|
|
|
Logger::log($pathLogErrorHtml, $message);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2024-08-12 15:14:49 +03:00
|
|
|
|
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
|
|
|
|
$contingent = $parser->getDataTable();
|
2024-08-12 16:58:12 +03:00
|
|
|
|
|
|
|
|
|
// Добавляем поле spec_id по spec_code
|
2024-08-13 16:05:24 +03:00
|
|
|
|
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
2024-08-12 16:58:12 +03:00
|
|
|
|
|
|
|
|
|
// Добавляем поле org_id
|
2024-08-13 16:05:24 +03:00
|
|
|
|
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
2024-08-12 15:14:49 +03:00
|
|
|
|
print_r($contingent);
|
2024-08-19 10:17:54 +03:00
|
|
|
|
if (empty($contingent)) {
|
2024-08-22 17:03:25 +03:00
|
|
|
|
$message = implode(' ', $sites[$i]);
|
2024-08-16 15:00:37 +03:00
|
|
|
|
Logger::log($pathLogErrorHtml, $message);
|
2024-08-13 16:05:24 +03:00
|
|
|
|
} else {
|
2024-08-16 13:44:49 +03:00
|
|
|
|
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
|
|
|
|
if ($set) {
|
|
|
|
|
// Заносим в базу
|
2024-08-19 10:17:54 +03:00
|
|
|
|
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
2024-08-16 13:44:49 +03:00
|
|
|
|
} else {
|
2024-08-22 17:03:25 +03:00
|
|
|
|
$message = implode(' ', $sites[$i]);
|
2024-08-16 15:00:37 +03:00
|
|
|
|
Logger::log($pathLogErrorHtml, $message);
|
2024-08-16 13:44:49 +03:00
|
|
|
|
}
|
|
|
|
|
unset($contingent);
|
2024-08-13 16:05:24 +03:00
|
|
|
|
}
|
2024-08-21 14:51:01 +03:00
|
|
|
|
} catch (ClientException
|
|
|
|
|
| RequestException
|
|
|
|
|
| ConnectException
|
2024-08-22 17:03:25 +03:00
|
|
|
|
| ServerException
|
|
|
|
|
| MalformedUriException $e
|
2024-08-21 14:51:01 +03:00
|
|
|
|
) {
|
2024-08-19 10:17:54 +03:00
|
|
|
|
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
|
|
|
|
Logger::log($pathLogErrorHttp, $message);
|
2024-08-21 14:51:01 +03:00
|
|
|
|
}
|
2024-08-16 15:00:37 +03:00
|
|
|
|
}
|