Добавлена выборка актульных сайтов по непройденным вариантам

This commit is contained in:
2024-08-29 09:36:04 +03:00
parent 5e0f59f8c5
commit 74ba48620c
4 changed files with 237 additions and 150 deletions

View File

@ -7,45 +7,8 @@ use App\Library\Logger;
use Symfony\Component\Yaml\Yaml;
require_once(dirname(__FILE__) ."/vendor/autoload.php");
// require_once(dirname(__FILE__) ."/test.php");
function curl_redir_exec($ch)
{
static $curl_loops = 0;
static $curl_max_loops = 20;
if ($curl_loops++ >= $curl_max_loops) {
$curl_loops = 0;
return false;
}
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
list($header, $data) = explode("\n\n", $data, 2);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($http_code == 301 || $http_code == 302) {
$matches = [];
preg_match('/Location:(.*?)\n/', $header, $matches);
$url = @parse_url(trim(array_pop($matches)));
if (!$url) {
//couldn't process the url to redirect to
$curl_loops = 0;
return $data;
}
$last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
if (!$url['scheme'])
$url['scheme'] = $last_url['scheme'];
if (!$url['host'])
$url['host'] = $last_url['host'];
if (!$url['path'])
$url['path'] = $last_url['path'];
$new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . ($url['query']?'?'.$url['query']:'');
curl_setopt($ch, CURLOPT_URL, $new_url);
// debug('Redirecting to', $new_url);
return curl_redir_exec($ch);
} else {
$curl_loops=0;
return $data;
}
}
$pathLogErrorHttp = __DIR__.'/log/'. date('Y-m-d') . '/error-http-curl.log';
$pathLogErrorHtml = __DIR__.'/log/'. date('Y-m-d') . '/error-html.log';
@ -58,8 +21,9 @@ $dbNiimko = new Database(new DatabaseConfig('niimko'));
// $sites = ContingentManager::getInstance()->getSites($dbNiimko);
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
// $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-27/error-http-curl.log');
// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
$sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-28/error-http-curl.log');
$sites = ContingentManager::getInstance()->getSitesFromMiccedu($dbOpendata, $sites);
// print_r($sites);
for ($i = 0; $i < count($sites); $i++) {
// Нет URL сайта вуза
@ -82,23 +46,22 @@ for ($i = 0; $i < count($sites); $i++) {
// continue;
// }
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
$uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']));
$uri = str_replace("_","/", $uri);
if (substr($uri, -1) == '/') {
$uri = $uri."sveden/education/";
} else {
$uri = $uri."/sveden/education/";
}
$uri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
echo $uri . PHP_EOL;
$ua = 'Mozilla/5.0 (X11; Linux x86_64) '
.'AppleWebKit/537.36 (KHTML, like Gecko) '
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36';
// $html = get_content($uri);
$ch = curl_init($uri);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36');
curl_setopt($ch, CURLOPT_USERAGENT, $ua);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 90);
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($httpCode != 200 && $httpCode != 0) {
$message = implode(' ', $sites[$i]) . ' ' . $httpCode;