Добавлена выборка актульных сайтов по непройденным вариантам
This commit is contained in:
273
app/app.php
273
app/app.php
@ -1,6 +1,4 @@
|
||||
<?php
|
||||
namespace App;
|
||||
|
||||
use App\Library\ContingentManager;
|
||||
use App\Library\DatabaseConfig;
|
||||
use App\Library\Logger;
|
||||
@ -12,32 +10,140 @@ use GuzzleHttp\Exception\ServerException;
|
||||
use App\Library\ContingentParser;
|
||||
use App\Library\Database;
|
||||
use GuzzleHttp\Client;
|
||||
use Psr\Http\Message\RequestInterface;
|
||||
use Psr\Http\Message\ResponseInterface;
|
||||
use Psr\Http\Message\UriInterface;
|
||||
use GuzzleHttp\TransferStats;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
|
||||
$pathLogErrorHtml = 'error-html.log';
|
||||
$pathLogErrorHttp = 'error-http-curl.log';
|
||||
$pathLogErrorHtml = __DIR__.'/../log/'. date('Y-m-d') . '/error-html.log';
|
||||
$pathLogErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http-curl.log';
|
||||
$pathErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http.log';
|
||||
|
||||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||
|
||||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
// $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
|
||||
$exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('error-html.log');
|
||||
$exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('error-http-curl.log');
|
||||
// $exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('select-html-error.log');
|
||||
// $exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
|
||||
|
||||
// print_r($exceptionsOrgHttpCurl);
|
||||
|
||||
$start = 0;
|
||||
// echo count($exceptionsOrgHttpCurl) . " - http-error sites" . PHP_EOL;
|
||||
|
||||
for ($i = $start; $i < count($sites); $i++) {
|
||||
// $start = 794;
|
||||
// for ($i = $start; $i < count($sites); $i++) {
|
||||
// // Нет URL сайта вуза
|
||||
// if (empty($sites[$i]['site'])) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
// continue;
|
||||
// }
|
||||
// // Уже в базе
|
||||
// if (in_array($sites[$i]['org_id'], $orgs)) {
|
||||
// continue;
|
||||
// }
|
||||
// // С ошибками разметки игнорируем
|
||||
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
// continue;
|
||||
// }
|
||||
// // Без ошибок http игнорируем
|
||||
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
// continue;
|
||||
// }
|
||||
// try {
|
||||
// $baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
||||
// $client = new Client([
|
||||
// 'force_ip_resolve' => 'v4',
|
||||
// 'debug' => fopen("debug-http.log", "a"),
|
||||
// 'base_uri' => $baseUri,
|
||||
// 'allow_directs' => [
|
||||
// 'max' => 5,
|
||||
// 'strict' => true,
|
||||
// 'referer' => true,
|
||||
// 'protocols' => ['http', 'https'],
|
||||
// 'track_redirects' => true
|
||||
// ],
|
||||
// 'connect_timeout' => 300.0,
|
||||
// 'verify' => false,
|
||||
// 'headers' => [
|
||||
// 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
// 'Content-Type' => 'text/html;charset=utf-8'
|
||||
// ]
|
||||
// ]);
|
||||
// $response = $client->get('', [
|
||||
// 'on_stats' => function (TransferStats $stats) use (&$url) {
|
||||
// $url = $stats->getEffectiveUri();
|
||||
// }
|
||||
// ]);
|
||||
|
||||
// print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||
// if (substr($url, -1) == '/') {
|
||||
// $url = $url."sveden/education/";
|
||||
// } else {
|
||||
// $url = $url."/sveden/education/";
|
||||
// }
|
||||
// echo $url .PHP_EOL;
|
||||
// $response = $client->get($url, [
|
||||
// 'on_stats' => function (TransferStats $stats) use (&$url) {
|
||||
// $url = $stats->getEffectiveUri();
|
||||
// }
|
||||
// ]);
|
||||
// echo $url . PHP_EOL;
|
||||
|
||||
// $html = $response->getBody()->getContents();
|
||||
// if (empty($html)) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// continue;
|
||||
// }
|
||||
// $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
// $contingent = $parser->getDataTable();
|
||||
|
||||
// // Добавляем поле spec_id по spec_code
|
||||
// ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
|
||||
// // Добавляем поле org_id
|
||||
// ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
// print_r($contingent);
|
||||
// if (empty($contingent)) {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// } else {
|
||||
// $set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
// if ($set) {
|
||||
// // Заносим в базу
|
||||
// ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
// } else {
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHtml, $message);
|
||||
// }
|
||||
// unset($contingent);
|
||||
// }
|
||||
// } catch (ClientException
|
||||
// | RequestException
|
||||
// | ConnectException
|
||||
// | ServerException
|
||||
// | MalformedUriException $e
|
||||
// ) {
|
||||
// $message = implode(' ', $sites[$i]) . " " . $e->getCode() . " " . $e->getMessage();
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
// }
|
||||
// }
|
||||
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
|
||||
// print_r($sites);
|
||||
|
||||
for ($i = 0; $i < count($sites); $i++) {
|
||||
// Нет URL сайта вуза
|
||||
if (empty($sites[$i]['site'])) {
|
||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
// $message = implode(' ', $sites[$i]);
|
||||
// Logger::log($pathLogErrorHttp, $message);
|
||||
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||
continue;
|
||||
}
|
||||
// Уже в базе
|
||||
@ -45,82 +151,65 @@ for ($i = $start; $i < count($sites); $i++) {
|
||||
continue;
|
||||
}
|
||||
// С ошибками разметки игнорируем
|
||||
if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
continue;
|
||||
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||
// continue;
|
||||
// }
|
||||
// Без ошибок http игнорируем
|
||||
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
// continue;
|
||||
// }
|
||||
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||
$uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']));
|
||||
$uri = str_replace("_","/", $uri);
|
||||
if (substr($uri, -1) == '/') {
|
||||
$uri = $uri."sveden/education/";
|
||||
} else {
|
||||
$uri = $uri."/sveden/education/";
|
||||
}
|
||||
if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
$baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
||||
$onRedirect = function(
|
||||
RequestInterface $request,
|
||||
ResponseInterface $res,
|
||||
UriInterface $uri
|
||||
) {
|
||||
echo 'Redirecting! ' . $request->getUri() . ' to ' . $uri . "\n";
|
||||
};
|
||||
$client = new Client([
|
||||
'force_ip_resolve' => 'v4',
|
||||
'debug' => fopen("debug-http.log", "a"),
|
||||
'base_uri' => $baseUri,
|
||||
'allow_directs' => [
|
||||
'max' => 5,
|
||||
'strict' => true,
|
||||
'referer' => true,
|
||||
'protocols' => ['http', 'https'],
|
||||
'on_redirect' => $onRedirect,
|
||||
'track_redirects' => true
|
||||
],
|
||||
'connect_timeout' => 300.0,
|
||||
'verify' => false,
|
||||
// 'http_errors' => false,
|
||||
'headers' => [
|
||||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
'Content-Type' => 'text/html;charset=utf-8'
|
||||
]
|
||||
]);
|
||||
print(($i+1). '. ' . implode(' ', $sites[$i]) . "\n");
|
||||
$response = $client->get('/sveden/education/');
|
||||
echo $response->getStatusCode() .PHP_EOL;
|
||||
var_dump($response->getHeaderLine("'X-Guzzle-Redirect-History") . PHP_EOL);
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
if (empty($html)) {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
continue;
|
||||
}
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
$contingent = $parser->getDataTable();
|
||||
|
||||
// Добавляем поле spec_id по spec_code
|
||||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
|
||||
// Добавляем поле org_id
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
print_r($contingent);
|
||||
if (empty($contingent)) {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
} else {
|
||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
if ($set) {
|
||||
// Заносим в базу
|
||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
} else {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
}
|
||||
unset($contingent);
|
||||
}
|
||||
} catch (ClientException
|
||||
| RequestException
|
||||
| ConnectException
|
||||
| ServerException
|
||||
| MalformedUriException $e
|
||||
) {
|
||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||
echo $uri . PHP_EOL;
|
||||
$ch = curl_init($uri);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
$html = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
if ($httpCode != 200) {
|
||||
$errno = curl_errno($ch);
|
||||
$message = implode(' ', $sites[$i]);
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
}
|
||||
}
|
||||
unset($httpCode);
|
||||
continue;
|
||||
}
|
||||
|
||||
curl_close($ch);
|
||||
echo "HTTP-code: " . $httpCode . PHP_EOL;
|
||||
if (empty($html)) continue;
|
||||
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
$contingent = $parser->getDataTable();
|
||||
// Добавляем поле spec_id по spec_code
|
||||
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||
// Добавляем поле org_id
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
|
||||
|
||||
if (empty($contingent)) {
|
||||
echo "empty". PHP_EOL;
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
} else {
|
||||
print_r($contingent);
|
||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
if ($set) {
|
||||
// Заносим в базу
|
||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
} else {
|
||||
$message = implode(' ', $sites[$i]);
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
}
|
||||
unset($contingent);
|
||||
unset($httpCode);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user