getSites($dbNiimko); $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); // print_r($sites); // print_r($specializations); // print_r($org); // $errorSites = []; // $filename = 'error-html.log'; // $array = file($filename); // for ($i = 0; $i < count($array); $i++) { // $arr = explode(' ', $array[$i]); // if (!in_array($arr[2], $orgs)) { // $errorSites[] = $arr[2]; // } // } // $filename = 'error-http.log'; // $array = file($filename); // for ($i = 0; $i < count($array); $i++) { // $arr = explode(' ', $array[$i]); // if (!in_array($arr[2], $orgs)) { // $errorSites[] = $arr[2]; // } // } $start = 999; for ($i = $start; $i < count($sites); $i++) { // Нет URL сайта вуза if (empty($sites[$i]['site'])) { $message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; Logger::log($pathLogErrorHttp, $message); continue; } // Уже в базе if (in_array($sites[$i]['org_id'], $orgs)) { continue; } // С ошибками разметки игнорируем // if (in_array($sites[$i]['org_id'], $errorSites)) { // continue; // } try { $client = new Client([ 'allow_directs' => true, 'track_redirects' => true, 'connect_timeout' => 300.0, 'verify' => false, // 'http_errors' => false, 'headers' => [ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36', 'Content-Type' => 'text/html;charset=utf-8' ] ]); $url = $sites[$i]['site']; $url = ContingentManager::getInstance()->buildURL($url); print(($i+1).". Current url: $url\n"); $response = $client->get($url); echo $response->getStatusCode() .PHP_EOL; $html = $response->getBody()->getContents(); if (empty($html)) { $message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; Logger::log($pathLogErrorHtml, $message); continue; } $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); $contingent = $parser->getDataTable(); // Добавляем поле spec_id по spec_code ContingentManager::getInstance()->addSpecId($contingent, $specializations); // Добавляем поле org_id ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); print_r($contingent); if (empty($contingent)) { $message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; Logger::log($pathLogErrorHtml, $message); } else { $set = ContingentManager::getInstance()->checkContingent($contingent); if ($set) { // Заносим в базу ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); } else { $message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; Logger::log($pathLogErrorHtml, $message); } unset($contingent); } } catch (ClientException | RequestException | ConnectException | ServerException $e ) { $message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage(); Logger::log($pathLogErrorHttp, $message); } }