getSites($dbNiimko); $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); $exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('error-html.log'); $exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('error-http-curl.log'); // print_r($exceptionsOrgHttpCurl); $start = 0; for ($i = $start; $i < count($sites); $i++) { // Нет URL сайта вуза if (empty($sites[$i]['site'])) { $message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; Logger::log($pathLogErrorHttp, $message); continue; } // Уже в базе if (in_array($sites[$i]['org_id'], $orgs)) { continue; } // С ошибками разметки игнорируем if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { continue; } if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { continue; } try { $baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']); $onRedirect = function( RequestInterface $request, ResponseInterface $res, UriInterface $uri ) { echo 'Redirecting! ' . $request->getUri() . ' to ' . $uri . "\n"; }; $client = new Client([ 'force_ip_resolve' => 'v4', 'debug' => fopen("debug-http.log", "a"), 'base_uri' => $baseUri, 'allow_directs' => [ 'max' => 5, 'strict' => true, 'referer' => true, 'protocols' => ['http', 'https'], 'on_redirect' => $onRedirect, 'track_redirects' => true ], 'connect_timeout' => 300.0, 'verify' => false, // 'http_errors' => false, 'headers' => [ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36', 'Content-Type' => 'text/html;charset=utf-8' ] ]); print(($i+1). '. ' . implode(' ', $sites[$i]) . "\n"); $response = $client->get('/sveden/education/'); echo $response->getStatusCode() .PHP_EOL; var_dump($response->getHeaderLine("'X-Guzzle-Redirect-History") . PHP_EOL); $html = $response->getBody()->getContents(); if (empty($html)) { $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); continue; } $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); $contingent = $parser->getDataTable(); // Добавляем поле spec_id по spec_code ContingentManager::getInstance()->addSpecId($contingent, $specializations); // Добавляем поле org_id ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); print_r($contingent); if (empty($contingent)) { $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } else { $set = ContingentManager::getInstance()->checkContingent($contingent); if ($set) { // Заносим в базу ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); } else { $message = implode(' ', $sites[$i]); Logger::log($pathLogErrorHtml, $message); } unset($contingent); } } catch (ClientException | RequestException | ConnectException | ServerException | MalformedUriException $e ) { $message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage(); Logger::log($pathLogErrorHttp, $message); } }