getSites($dbNiimko); $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); $sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent'; $org = $dbOpendata->selectQuery($sql); print_r($sites); // print_r($specializations); // print_r($org); $orgs = []; foreach ($org as $o) { $orgs[] = $o['org_id']; } unset($org); $errorSites = []; $filename = 'error-html.log'; $array = file($filename); for ($i = 0; $i < count($array); $i++) { $arr = explode(' ', $array[$i]); if (!in_array($arr[2], $orgs)) { $errorSites[] = $arr[2]; } } $filename = 'error-http.log'; $array = file($filename); for ($i = 0; $i < count($array); $i++) { $arr = explode(' ', $array[$i]); if (!in_array($arr[2], $orgs)) { $errorSites[] = $arr[2]; } } $status = null; $start = 600; for ($i = $start; $i < count($sites); $i++) { // Нет URL сайта вуза if (empty($sites[$i]['site'])) { $log = date('Y-m-d H:i:s') . ' ' . $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; file_put_contents(__DIR__ . '/../error-http.log', $log . PHP_EOL, FILE_APPEND); continue; } // Уже в базе if (in_array($sites[$i]['org_id'], $orgs)) { continue; } // С ошибками разметки игнорируем if (in_array($sites[$i]['org_id'], $errorSites)) { continue; } try { $client = new Client([ RequestOptions::ALLOW_REDIRECTS => [ 'max' => 10, 'strict' => true, 'referer' => true, 'allow_directs' => true, 'track_redirects' => true, 'headers' => [ 'User-Agent' => ' Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36 ', 'Content-Type' => 'text/html;', 'charset' => 'utf-8' ] ] ]); $url = $sites[$i]['site']; $url = ContingentManager::getInstance()->buildURL($url); print(($i+1).". Current url: $url\n"); $response = $client->get($url, ['timeout' => 300]); $status = $response->getStatusCode(); $html = $response->getBody()->getContents(); $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); $contingent = $parser->getDataTable(); // Добавляем поле spec_id по spec_code ContingentManager::getInstance()->addSpecId($contingent, $specializations); // Добавляем поле org_id ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); print_r($contingent); } catch (ClientException $e) { $status = 0; } catch (RequestException $e) { $status = 0; } catch (ConnectException $e) { $status = 0; } catch (ServerException $e) { $status = 0; } finally { if ($status != 200) { $log = date('Y-m-d H:i:s') . ' ' . $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; file_put_contents(__DIR__ . '/../error-http.log', $log . PHP_EOL, FILE_APPEND); } else if (empty($contingent)) { $log = date('Y-m-d H:i:s') . ' ' . $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; file_put_contents(__DIR__ . '/../error-html.log', $log . PHP_EOL, FILE_APPEND); } else { $set = ContingentManager::getInstance()->checkContingent($contingent); if ($set) { // Заносим в базу // ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); } else { $log = date('Y-m-d H:i:s') . ' ' . $sites[$i]['org_id'] . ' ' . $sites[$i]['site']; file_put_contents(__DIR__ . '/../error-html.log', $log . PHP_EOL, FILE_APPEND); } unset($contingent); } } }