= $curl_max_loops) { $curl_loops = 0; return false; } curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $data = curl_exec($ch); list($header, $data) = explode("\n\n", $data, 2); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($http_code == 301 || $http_code == 302) { $matches = []; preg_match('/Location:(.*?)\n/', $header, $matches); $url = @parse_url(trim(array_pop($matches))); if (!$url) { //couldn't process the url to redirect to $curl_loops = 0; return $data; } $last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); if (!$url['scheme']) $url['scheme'] = $last_url['scheme']; if (!$url['host']) $url['host'] = $last_url['host']; if (!$url['path']) $url['path'] = $last_url['path']; $new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . ($url['query']?'?'.$url['query']:''); curl_setopt($ch, CURLOPT_URL, $new_url); // debug('Redirecting to', $new_url); return curl_redir_exec($ch); } else { $curl_loops=0; return $data; } } $pathLogErrorHttp = __DIR__.'/log/'. date('Y-m-d') . '/error-http-curl.log'; $pathLogErrorHtml = __DIR__.'/log/'. date('Y-m-d') . '/error-html.log'; // $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log'); // print_r($sites); $dbOpendata = new Database(new DatabaseConfig('opendata')); $dbNiimko = new Database(new DatabaseConfig('niimko')); // $sites = ContingentManager::getInstance()->getSites($dbNiimko); $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); // $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-27/error-http-curl.log'); // print_r($sites); for ($i = 0; $i < count($sites); $i++) { // Нет URL сайта вуза if (empty($sites[$i]['site'])) { // $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHttp, $message); // Logger::log($pathErrorHttp, implode(' ', $sites[$i])); continue; } // Уже в базе if (in_array($sites[$i]['org_id'], $orgs)) { continue; } // С ошибками разметки игнорируем // if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { // continue; // } // Без ошибок http игнорируем // if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { // continue; // } print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); $uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site'])); $uri = str_replace("_","/", $uri); if (substr($uri, -1) == '/') { $uri = $uri."sveden/education/"; } else { $uri = $uri."/sveden/education/"; } echo $uri . PHP_EOL; $ch = curl_init($uri); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); $html = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($httpCode != 200 && $httpCode != 0) { $message = implode(' ', $sites[$i]) . ' ' . $httpCode; Logger::log($pathLogErrorHttp, $message); unset($httpCode); continue; } else if ($httpCode == 0) { $errno = curl_errno($ch); $message = implode(' ', $sites[$i]); $message .= " cURL error ({$errno}): ".curl_strerror($errno); Logger::log($pathLogErrorHttp, $message); unset($httpCode); continue; } curl_close($ch); echo "HTTP-code: " . $httpCode . PHP_EOL; if (empty($html)) continue; $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); $contingent = $parser->getDataTable(); // Добавляем поле spec_id по spec_code ContingentManager::getInstance()->addSpecId($contingent, $specializations); // Добавляем поле org_id ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); if (empty($contingent)) { echo "empty". PHP_EOL; $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } else { print_r($contingent); $set = ContingentManager::getInstance()->checkContingent($contingent); if ($set) { // Заносим в базу ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); } else { $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } unset($contingent); unset($httpCode); } }