getExceptionsHttpCurl('select-http-error.log'); // print_r($sites); $dbOpendata = new Database(new DatabaseConfig('opendata')); $dbNiimko = new Database(new DatabaseConfig('niimko')); // $sites = ContingentManager::getInstance()->getSites($dbNiimko); $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); // $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-28/error-http-curl.log'); $sites = ContingentManager::getInstance()->getSitesFromMiccedu($dbOpendata, $sites); // print_r($sites); for ($i = 0; $i < count($sites); $i++) { // Нет URL сайта вуза if (empty($sites[$i]['site'])) { // $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHttp, $message); // Logger::log($pathErrorHttp, implode(' ', $sites[$i])); continue; } // Уже в базе if (in_array($sites[$i]['org_id'], $orgs)) { continue; } // С ошибками разметки игнорируем // if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { // continue; // } // Без ошибок http игнорируем // if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { // continue; // } print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); $uri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']); echo $uri . PHP_EOL; $ua = 'Mozilla/5.0 (X11; Linux x86_64) ' .'AppleWebKit/537.36 (KHTML, like Gecko) ' .'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'; // $html = get_content($uri); $ch = curl_init($uri); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_USERAGENT, $ua); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 90); $html = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($httpCode != 200 && $httpCode != 0) { $message = implode(' ', $sites[$i]) . ' ' . $httpCode; Logger::log($pathLogErrorHttp, $message); unset($httpCode); continue; } else if ($httpCode == 0) { $errno = curl_errno($ch); $message = implode(' ', $sites[$i]); $message .= " cURL error ({$errno}): ".curl_strerror($errno); Logger::log($pathLogErrorHttp, $message); unset($httpCode); continue; } curl_close($ch); echo "HTTP-code: " . $httpCode . PHP_EOL; if (empty($html)) continue; $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); $contingent = $parser->getDataTable(); // Добавляем поле spec_id по spec_code ContingentManager::getInstance()->addSpecId($contingent, $specializations); // Добавляем поле org_id ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); if (empty($contingent)) { echo "empty". PHP_EOL; $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } else { print_r($contingent); $set = ContingentManager::getInstance()->checkContingent($contingent); if ($set) { // Заносим в базу ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); } else { $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } unset($contingent); unset($httpCode); } }