From 48b4d6cccff4375101848746ccdf42d16363a978 Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 3 Sep 2024 15:41:45 +0300 Subject: [PATCH] =?UTF-8?q?=D0=92=D1=81=D0=B5=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BF=D0=B8=D1=81=D0=B0=D0=BD=D0=BE=20=D0=BD=D0=B0=20=D1=84?= =?UTF-8?q?=D0=B0=D1=81=D0=B0=D0=B4=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ContingentParser/Database/Database.php | 132 +++++++++++ ContingentParser/Database/DatabaseConfig.php | 62 +++++ ContingentParser/Database/DatabaseFacade.php | 214 +++++++++++++++++ ContingentParser/Facade.php | 105 +++++++++ ContingentParser/Http/CurlHelper.php | 99 ++++++++ ContingentParser/Http/HttpClientFacade.php | 110 +++++++++ ContingentParser/Http/UrlBuilder.php | 30 +++ ContingentParser/Logger/DatabaseLogger.php | 12 + ContingentParser/Logger/HtmlLogger.php | 12 + ContingentParser/Logger/HttpLogger.php | 70 ++++++ ContingentParser/Logger/Logger.php | 12 + ContingentParser/Parser/ContingentFacade.php | 71 ++++++ .../Parser}/ContingentParser.php | 70 +++--- .../Parser}/ContingentRow.php | 4 +- add_recording.php | 37 --- app/app.php | 215 ------------------ app/library/ContingentManager.php | 187 --------------- app/library/Database.php | 84 ------- app/library/DatabaseConfig.php | 54 ----- app/library/Logger.php | 11 - app/library/Size.php | 27 --- composer.json | 7 +- composer.lock | 2 +- create_table.sql | 11 - create_table_sveden_education_contingent.sql | 10 + curl-helper.php | 109 --------- index.php | 3 - main.php | 14 ++ test.php | 15 ++ vendor/autoload.php | 2 +- vendor/composer/autoload_psr4.php | 5 +- vendor/composer/autoload_real.php | 10 +- vendor/composer/autoload_static.php | 25 +- vendor/composer/installed.php | 12 +- 34 files changed, 1036 insertions(+), 807 deletions(-) create mode 100644 ContingentParser/Database/Database.php create mode 100644 ContingentParser/Database/DatabaseConfig.php create mode 100644 ContingentParser/Database/DatabaseFacade.php create mode 100644 ContingentParser/Facade.php create mode 100644 ContingentParser/Http/CurlHelper.php create mode 100644 ContingentParser/Http/HttpClientFacade.php create mode 100644 ContingentParser/Http/UrlBuilder.php create mode 100644 ContingentParser/Logger/DatabaseLogger.php create mode 100644 ContingentParser/Logger/HtmlLogger.php create mode 100644 ContingentParser/Logger/HttpLogger.php create mode 100644 ContingentParser/Logger/Logger.php create mode 100644 ContingentParser/Parser/ContingentFacade.php rename {app/library => ContingentParser/Parser}/ContingentParser.php (54%) rename {app/library => ContingentParser/Parser}/ContingentRow.php (90%) delete mode 100644 add_recording.php delete mode 100644 app/app.php delete mode 100644 app/library/ContingentManager.php delete mode 100644 app/library/Database.php delete mode 100644 app/library/DatabaseConfig.php delete mode 100644 app/library/Logger.php delete mode 100644 app/library/Size.php delete mode 100644 create_table.sql create mode 100644 create_table_sveden_education_contingent.sql delete mode 100644 curl-helper.php delete mode 100644 index.php create mode 100644 main.php create mode 100644 test.php diff --git a/ContingentParser/Database/Database.php b/ContingentParser/Database/Database.php new file mode 100644 index 0000000..152a734 --- /dev/null +++ b/ContingentParser/Database/Database.php @@ -0,0 +1,132 @@ +_logger = new DatabaseLogger(self::$_logFile); + $this->_databaseConfig = $config; + try { + $dsn = $this->_databaseConfig->getDsn(); + $username = $this->_databaseConfig->getUsername(); + $password = $this->_databaseConfig->getPassword(); + $this->_pdo = new PDO( + $dsn, + $username, + $password, + [PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION] + ); + $message = "Подключение к {$this->_databaseConfig->getDBName()} успешно!"; + $this->_logger->log($message); + } catch (PDOException $e) { + $message = "Ошибка подключения к {$this->_databaseConfig->getDBName()}: {$e->getMessage()}"; + $this->_logger->log($message); + } + } + /** + * Сообщение о разрыве соединения + */ + public function __destruct() + { + $message = "Подключение к {$this->_databaseConfig->getDBName()} прервано!"; + $this->_logger->log($message); + } + /** + * Выборка данных из базы + * @param string $sql + * SQL-запрос + * @param array $params + * Параметры запроса + * @return array + */ + public function select(string $sql, array $params = []) : array + { + try { + $stmt = $this->_pdo->prepare($sql); + if (!empty($params)) { + for ($i = 0; $i < count($params); $i++) { + $stmt->bindParam(":v".($i+1), $params[$i]); + } + } + $stmt->execute(); + $array = $stmt->fetchAll(PDO::FETCH_ASSOC); + } catch (PDOException $e) { + $message = "Ошибка запроса: " . $e->getMessage(); + $this->_logger->log($message); + } finally { + return $array; + } + } + /** + * Добавление данных в базу + * @param string $sql + * SQL-запрос + * @param array $params + * Параметры запроса + * @return void + */ + public function insert(string $sql, array $params) + { + try { + $stmt = $this->_pdo->prepare($sql); + $count = 1; + $size = count($params[0]); + foreach ($params as $param) { + for ($i = $count; $i <= $size; $i++) { + $param = array_values($param); + $stmt->bindParam(":v$i", $param[$i-$count]); + } + $count += count($param); + $size += count($param); + } + $stmt->execute(); + $this->_logger->log("Запрос выполнен успешно!"); + } catch (PDOException $e) { + $message = "Ошибка запроса:" . $e->getMessage(); + $this->_logger->log($message); + // При ошибке запроса сохраняем валидные данные в yaml-файл + if ($e->getCode() === "HY000") { + $yaml = Yaml::dump($params); + file_put_contents('not-recorded-in-db.yaml', $yaml, FILE_APPEND); + } + } + } + /** + * Обновление данных в базе + * @param string $sql + * SQL-запрос + * @param array $params + * Параметры запроса + * @return void + */ + public function update(string $sql, array $params) + { + try { + $stmt = $this->_pdo->prepare($sql); + $count = count($params); + for ($i = 0; $i < $count; $i++) { + $stmt->bindParam(":v".($i+1), $params[$i]); + } + // $stmt->execute(); + $this->_logger->log("Запрос выполнен успешно!"); + } catch (PDOException $e) { + $message = "Ошибка запроса:" . $e->getMessage(); + $this->_logger->log($message); + } + } +} \ No newline at end of file diff --git a/ContingentParser/Database/DatabaseConfig.php b/ContingentParser/Database/DatabaseConfig.php new file mode 100644 index 0000000..c578ccf --- /dev/null +++ b/ContingentParser/Database/DatabaseConfig.php @@ -0,0 +1,62 @@ +getDataEnv($db); + + $this->_driver = $config['DB_DRIVER']; + $this->_host = $config['DB_HOST']; + $this->_dbname = $config['DB_NAME']; + $this->_port = $config['DB_PORT']; + $this->_charset = $config["DB_CHARSET"]; + $this->_username = $config['DB_USERNAME']; + $this->_password = $config['DB_PASSWORD']; + } + + private function getDataEnv(string $db) : array + { + $envVars = parse_ini_file('.env', true); + $db = strtoupper($db); + $config = []; + foreach ($envVars as $dbname => $dbconfig) { + if ($dbname == $db) { + $config = $dbconfig; + } + } + return $config; + } + + public function getDBName(): string + { + return $this->_dbname; + } + + public function getDsn() : string + { + return $this->_driver.":host=".$this->_host + .";dbname=".$this->_dbname + .";charset=".$this->_charset + .";port=".$this->_port; + } + + public function getUsername() : string + { + return $this->_username; + } + + public function getPassword() : string + { + return $this->_password; + } +} \ No newline at end of file diff --git a/ContingentParser/Database/DatabaseFacade.php b/ContingentParser/Database/DatabaseFacade.php new file mode 100644 index 0000000..705350c --- /dev/null +++ b/ContingentParser/Database/DatabaseFacade.php @@ -0,0 +1,214 @@ +niimko = new Database(new DatabaseConfig('niimko')); + $this->opendata = new Database(new DatabaseConfig('opendata')); + $this->specialties = $this->getSpecialties(); + $this->universities = $this->getUniversities(); + } + /** + * Извлечение URL сайтов из базы данных niimko + * @return array + */ + public function getSitesFromNiimko() : array + { + /* + SELECT kod AS org_id, site FROM niimko.s_vuzes + WHERE ootype = 'vuz' AND deleted = 'n' AND fake = 'n' + */ + $builder = new GenericBuilder(); + $params = ['vuz', 'n', 'n', 'RU']; + $query = $builder->select() + ->setTable('s_vuzes') + ->setColumns(['org_id' => 'kod', 'site']) + ->where('AND') + ->equals('ootype', 'vuz') + ->equals('deleted', 'n') + ->equals('fake', 'n') + ->equals('country', 'RU') + ->end(); + $sql = $builder->write($query); + $sites = $this->niimko->select($sql, $params); + + return $sites; + } + /** + * Извлечение сайтов базы данных opendata + * из таблицы miccedu_monitoring. + * @param array $params + * Сайты, у которых устаревшие URL + * @return array + */ + public function getSitesFromMiccedu(array $params) : array + { + /* + SELECT site, vuzkod AS org_id FROM opendata.miccedu_monitoring + WHERE year = 2023 AND (vuzkod = :val1 OR vuzkod = :val2 OR ...) + */ + $builder = new GenericBuilder(); + $year = 2023; + foreach ($params as $key => $org) { + $params[$key] = (int)$org['org_id']; + } + $query = $builder->select() + ->setTable('miccedu_monitoring') + ->setColumns(['org_id' => 'vuzkod','site']) + ->where('AND') + ->equals('year', $year) + ->subWhere('OR'); + foreach ($params as $orgId) { + $query->equals('vuzkod', $orgId); + } + $query = $query->end(); + $sql = $builder->write($query); + array_unshift($params, $year); + $sites = $this->opendata->select($sql, $params); + + return $sites; + } + /** + * Внесение данных численности обучающихся в базу данных opendata + * @param array $contingent + * Массив записей численности по специальностям + * @return void + */ + public function insertContingent(array $contingent) : void + { + /* + INSERT INTO sveden_education_contingent + (org_id, spec_id, spec_code, spec_name, edu_level, edu_forms, contingent) + VALUES + (:v1, :v2, :v3, :v4, :v5, :v6, :v7) + ... + */ + $builder = new GenericBuilder(); + $countAtributes = count($contingent[0]); + $size = $countAtributes * (count($contingent) - 1); + $query = $builder->insert() + ->setTable('sveden_education_contingent') + ->setValues( + $contingent[0] + ); + $sql = $builder->write($query); + for ($i = $countAtributes; $i <= $size;) { + $sql .= " (:v".(++$i).", :v".(++$i).", :v".(++$i).", :v" + .(++$i).", :v".(++$i).", :v".(++$i).", :v".(++$i).")\n"; + } + $sql = preg_replace('/\)\s*VALUES\s*/', ') VALUES ', $sql); + $sql = preg_replace('/\)\s*\(/', '), (', $sql); + $this->opendata->insert($sql, $contingent); + } + /** + * Публичное получение специальностей + * @return array + */ + public function specialties() : array + { + return $this->specialties ? $this->universities : []; + } + /** + * Публичное получение id вузов, занесенных в базу opendata + * @return array + */ + public function universities() : array + { + return $this->universities ? $this->specialties : []; + } + /** + * Извлечение кодов специальности из базы данных niimko + * @return array + */ + private function getSpecialties() : array + { + /* + SELECT id AS spec_id, kod AS spec_code FROM niimko.s_specs + WHERE oopkodes = 'gos3p' + */ + $builder = new GenericBuilder(); + $params = ['gos3p']; + $query = $builder->select() + ->setTable('s_specs') + ->setColumns(['spec_id' =>'id', 'spec_code' => 'kod']) + ->where() + ->equals('oopkodes','gos3p') + ->end(); + $sql = $builder->write($query); + $specialties = $this->niimko->select($sql, $params); + + return $specialties; + } + /** + * Извлечение id вузов, занесенных в базу opendata + * @return array + */ + private function getUniversities() : array + { + /* + SELECT DISTINCT org_id FROM sveden_education_contingent + */ + $builder = new GenericBuilder(); + $query = $builder->select() + ->setTable('sveden_education_contingent') + ->setColumns(['org_id']) + ->where() + ->greaterThan('org_id', 0) + ->end(); + $sql = $builder->write($query); + $sql = preg_replace("/ WHERE.*/", '', $sql); + $sql = preg_replace('/SELECT/', 'SELECT DISTINCT', $sql); + $universities = $this->opendata->select($sql); + + return array_column($universities, 'org_id'); + } + /** + * Обновление сайтов в базе данных niimko + * @param array $params + * Массив [['org_id' => val1, 'site' => val1,],...] + * @return void + */ + public function updateSitesOpendata(array $params) : void + { + /* + UPDATE niimko.s_vuzes + SET site = CASE kod + WHEN :v1 THEN :v2 + WHEN :v3 THEN :v4 + ... + ELSE kod + END + WHERE kod IN (:v1, :v2...) + */ + $count = count($params); + for ($i = 0; $i < $count; $i++) { + if ($i % 2 == 0) { + $params[] = $params[$i]; + } + } + $sql = "UPDATE niimko.s_vuzes\nSET site = CASE kod\n"; + + for ($i = 0; $i < $count;) { + $sql .= "WHEN :v".++$i." THEN :v".++$i."\n"; + } + $sql .= "ELSE kod\nEND\nWHERE kod in("; + for ($i = $count++; $i < count($params);) { + $sql .= ":v".++$i.",\n"; + } + $sql = rtrim($sql,",\n") .")\n"; + + $this->opendata->update($sql, $params); + } +} \ No newline at end of file diff --git a/ContingentParser/Facade.php b/ContingentParser/Facade.php new file mode 100644 index 0000000..3caf8fd --- /dev/null +++ b/ContingentParser/Facade.php @@ -0,0 +1,105 @@ +databaseFacade = new DatabaseFacade(); + $this->httpClientFacade = new HttpClientFacade(); + $this->urlBuilder = new UrlBuilder(); + $this->contingentFacade = new ContingentFacade(); + $this->htmlLogger = new HtmlLogger('log/html.log'); + } + /** + * Получить массив сайтов + * @param array $params + * Массив сайтов, у которых нужны обновиленные URL + * @return array + */ + public function getSites(array $params = []) : array + { + if (empty($params)) { + return $this->databaseFacade->getSitesFromNiimko(); + } else { + return $this->databaseFacade->getSitesFromMiccedu($params); + } + } + /** + * Cобирает из микроразметки данные таблицы + * "Информация о численности обучающихся" в разделе "Образование" + * @param array $site + * Сайт содержащий id организации и URL + * @return void + */ + public function collectDataFromContingent(array $site) : void + { + list('org_id' => $orgId, 'site' => $url) = $site; + // Нет URL сайта вуза + if (empty($site)) { + // $httpLogger->log($orgId); + return; + } + // Уже в базе + if (in_array($orgId, $this->databaseFacade->universities())) { + return; + } + $url = $this->urlBuilder->build($url); + print(implode(' ', $site). "\033[0m" . PHP_EOL); + $html = $this->httpClientFacade->processEducationContingentSites( + $url, + $site + ); + + $contingent = $this->contingentFacade->getContingent( + $html, + $this->databaseFacade->specialties(), + $orgId + ); + + if (empty($contingent)) { + print("\033[91mNo result\033[0m\n"); + $this->htmlLogger->log("$orgId $url"); + } else { + if ($this->contingentFacade->isValidContingent($contingent)) { + // Заносим в базу + print_r($contingent); + // $this->databaseFacade->insertContingent($contingent); + } else { + $this->htmlLogger->log("$orgId $url"); + print("\033[91mNo result\033[0m\n"); + } + } + + unset($contingent); + print(PHP_EOL); + } + + public function getExclusionSites(string $path) : array + { + $logs = file($path); + $result = []; + foreach ($logs as $log) { + $data = explode(' ', $log); + $result[] = [ + 'org_id' => $data[2], + 'site' => $data[3] ? $data[3] : '' + ]; + } + return $result; + } +} \ No newline at end of file diff --git a/ContingentParser/Http/CurlHelper.php b/ContingentParser/Http/CurlHelper.php new file mode 100644 index 0000000..32c3403 --- /dev/null +++ b/ContingentParser/Http/CurlHelper.php @@ -0,0 +1,99 @@ +url = $url; + $this->site = $site; + + $this->curl = curl_init(); + curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($this->curl, CURLOPT_HEADER, true); + curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($this->curl, CURLOPT_USERAGENT, + 'Mozilla/5.0 (X11; Linux x86_64) ' + .'AppleWebKit/537.36 (KHTML, like Gecko) ' + .'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36' + ); + curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false); + curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90); + } + /** + * Прекратить сессии + */ + public function __destruct() + { + curl_close($this->curl); + } + /** + * Получить html-разметку + * @return string + */ + public function getContent() : string + { + curl_setopt($this->curl, CURLOPT_URL, $this->url); + $html = curl_exec($this->curl); + if ($this->checkLocation($this->url, $html)) { + $html = $this->getContent(); + } + $this->reportError(); + return $html; + } + /** + * Summary of checkLocation + * @param string $html + * @return bool + */ + private function checkLocation(string &$url, string $html) : bool + { + preg_match('/location:(.*?)\n/i', $html, $matches); + if (empty($matches)) return false; + $target = $matches[1]; + $target = preg_replace("/[^a-z0-9\-:.\/,]/iu", '', $target); + $url = $target ? $target : $url; + + return $target ? true : false; + } + /** + * Сообщить об ошибке + * @return void + */ + private function reportError() : void + { + $httpLogger = new HttpLogger('log/http-curl.log'); + + $httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE); + + if ($httpCode != 200 && $httpCode != 0) { + print("\033[91mHTTP-code: $httpCode\033[0m\n"); + $message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')'; + $httpLogger->log($message, $httpCode); + } else if ($httpCode == 0) { + $errno = curl_errno($this->curl); + $message = implode(' ', $this->site); + $message .= " cURL error ({$errno}): ".curl_strerror($errno); + $httpLogger->log($message); + } else { + print("\033[94mHTTP-code: $httpCode\033[0m\n"); + } + } +} \ No newline at end of file diff --git a/ContingentParser/Http/HttpClientFacade.php b/ContingentParser/Http/HttpClientFacade.php new file mode 100644 index 0000000..5f0b359 --- /dev/null +++ b/ContingentParser/Http/HttpClientFacade.php @@ -0,0 +1,110 @@ +createClient($url); + // Запрос по базовому uri + $response = $client->get('', [ + 'on_stats' => function (TransferStats $stats) use (&$redirectUrl) { + $redirectUrl = $stats->getEffectiveUri(); + } + ]); + + print("Redirect $url -> $redirectUrl" . PHP_EOL); + $url .= substr($url, -1) == '/' ? '':'/'; + $url .= "sveden/education/"; + print("Parsing for $url" . PHP_EOL); + + $response = $client->get($url); + $httpCode = $response->getStatusCode(); + print("\033[94mHTTP-code: $httpCode\033[0m\n"); + + $html = $response->getBody()->getContents(); + } catch (ClientException + | RequestException + | ConnectException + | ServerException + | MalformedUriException $e + ) { + print("\033[91mHTTP-code: ". $e->getCode(). "\033[0m\n"); + $html = $this->handleException($url, $site); + } finally { + return $html; + } + } + /** + * Обработка исключения + * Повторная попытка спомощью CurlHelper + * @param string $url + * URL сайта + * @param array $site + * @return string + */ + private function handleException(string $url, array $site) : string + { + $curlHelper = new CurlHelper($url, $site); + return $curlHelper->getContent(); + } + /** + * Создать клиента с базовым URL + * @param string $url + * @return \GuzzleHttp\Client + */ + private function createClient(string $url) : Client + { + $this->config = $this->config() + ["base_uri" => $url]; + return new Client($this->config); + } + /** + * Конфигурация клиента + * @return array + */ + private function config() : array + { + return [ + 'force_ip_resolve' => 'v4', + 'debug' => fopen("log/debug-http.log", "w"), + 'allow_directs' => [ + 'max' => 5, + 'strict' => true, + 'referer' => true, + 'protocols' => ['http', 'https'], + 'track_redirects' => true + ], + 'connect_timeout' => 300.0, + 'verify' => false, + 'headers' => [ + 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) ' + .'AppleWebKit/537.36 (KHTML, like Gecko) ' + .'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36', + 'Content-Type' => 'text/html;charset=utf-8' + ] + ]; + } +} \ No newline at end of file diff --git a/ContingentParser/Http/UrlBuilder.php b/ContingentParser/Http/UrlBuilder.php new file mode 100644 index 0000000..6f63cfb --- /dev/null +++ b/ContingentParser/Http/UrlBuilder.php @@ -0,0 +1,30 @@ + https:// + $url = trim(strtolower($url)); + $url = preg_replace('/\s+/', '', $url); + $url = str_replace("www/", "www.", $url); + $url = str_replace("http:\\\\", "", $url); + if (!preg_match('#^https?://#', $url)) { + $url = "http://$url"; + } + // $url = str_replace("http://", "https://", $url); + $arr = parse_url($url); + $url = $arr['scheme'] . '://' . $arr['host'] . '/'; + // $url = str_replace("www.", "", $url); + $url = str_replace("_", "/", $url); + return trim($url); + } +} \ No newline at end of file diff --git a/ContingentParser/Logger/DatabaseLogger.php b/ContingentParser/Logger/DatabaseLogger.php new file mode 100644 index 0000000..e335edc --- /dev/null +++ b/ContingentParser/Logger/DatabaseLogger.php @@ -0,0 +1,12 @@ +_path, $logMessage, FILE_APPEND); + } +} \ No newline at end of file diff --git a/ContingentParser/Logger/HtmlLogger.php b/ContingentParser/Logger/HtmlLogger.php new file mode 100644 index 0000000..d95f835 --- /dev/null +++ b/ContingentParser/Logger/HtmlLogger.php @@ -0,0 +1,12 @@ +_path, $logMessage, FILE_APPEND); + } +} \ No newline at end of file diff --git a/ContingentParser/Logger/HttpLogger.php b/ContingentParser/Logger/HttpLogger.php new file mode 100644 index 0000000..0cbe1d7 --- /dev/null +++ b/ContingentParser/Logger/HttpLogger.php @@ -0,0 +1,70 @@ + 'Continue', + 101 => 'Switching Protocols', + 200 => 'OK', + 201 => 'Created', + 202 => 'Accepted', + 203 => 'Non-Authoritative Information', + 204 => 'No Content', + 205 => 'Reset Content', + 206 => 'Partial Content', + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Found', + 303 => 'See Other', + 304 => 'Not Modified', + 305 => 'Use Proxy', + 306 => 'Switch Proxy', + 307 => 'Temporary Redirect', + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not Allowed', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Timeout', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Payload Too Large', + 414 => 'URI Too Long', + 415 => 'Unsupported Media Type', + 416 => 'Range Not Satisfiable', + 417 => 'Expectation Failed', + 418 => 'I\'m a teapot', + 429 => 'Too Many Requests', + 451 => 'Unavailable For Legal Reasons', + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Timeout', + 505 => 'HTTP Version Not Supported', + 506 => 'Variant Also Negotiates', + 507 => 'Insufficient Storage', + 508 => 'Loop Detected', + 509 => 'Bandwidth Limit Exceeded', + 510 => 'Not Extended', + 511 => 'Network Authentication Required' + ); + public function log(string $message, int $httpCode = null) : void + { + $date = date('Y-m-d H:i:s'); + if (empty($httpCode)) { + $logMessage = "[$date] $message\n"; + file_put_contents($this->_path, $logMessage, FILE_APPEND); + } else { + $logMessage = "[$date] $message " + .self::ARR_HTTP_STATUS_CODE[$httpCode]."\n"; + file_put_contents($this->_path, $logMessage, FILE_APPEND); + } + } +} \ No newline at end of file diff --git a/ContingentParser/Logger/Logger.php b/ContingentParser/Logger/Logger.php new file mode 100644 index 0000000..daacbf0 --- /dev/null +++ b/ContingentParser/Logger/Logger.php @@ -0,0 +1,12 @@ +_path = $path; + } +} \ No newline at end of file diff --git a/ContingentParser/Parser/ContingentFacade.php b/ContingentParser/Parser/ContingentFacade.php new file mode 100644 index 0000000..be58451 --- /dev/null +++ b/ContingentParser/Parser/ContingentFacade.php @@ -0,0 +1,71 @@ +getDataTable(); + $this->addSpecId($contingent, $specialties); + $this->addOrgId($contingent, $orgId); + + return $contingent; + } + /** + * Проверка на валидность записи численнести + * @param array $contingent + * Массив численности по специальностям + * @return bool + */ + public function isValidContingent(array $contingent) : bool + { + $count = 0; + foreach ($contingent as $value) { + $count += $value['contingent']; + } + return $count ? true : false; + } + /** + * Добавить идентификатор специальности в запись численности + * @param array $contingent + * Массив численности по специальностям + * @param array $specialties + * Массив специальностей + * @return void + */ + private function addSpecId(array &$contingent, array $specialties) : void + { + $specIdMap = array_column($specialties, 'spec_id', 'spec_code'); + foreach ($contingent as $key => $con) { + $contingent[$key]['spec_id'] = $specIdMap[$con['spec_code']] ?? null; + } + } + /** + * Добавить идентификатор организации в запись численности + * @param array $contingent + * Массив численности по специальностям + * @param int $orgId + * Идентифиактор организации + * @return void + */ + private function addOrgId(array &$contingent, int $orgId): void + { + foreach ($contingent as &$con) { + $con['org_id'] = $orgId; + } + } +} \ No newline at end of file diff --git a/app/library/ContingentParser.php b/ContingentParser/Parser/ContingentParser.php similarity index 54% rename from app/library/ContingentParser.php rename to ContingentParser/Parser/ContingentParser.php index a4d4f21..3fdad89 100644 --- a/app/library/ContingentParser.php +++ b/ContingentParser/Parser/ContingentParser.php @@ -1,71 +1,77 @@ "td", "eduName" => "td", "eduLevel" => "td", "eduForm" => "td", - // "numberBF" => "th", - // "numberBFF" => "th", - // "numberBR" => "th", - // "numberBRF" => "th", - // "numberBM" => "th", - // "numberBMF" => "th", - // "numberP" => "th", - // "numberPF" => "th", "numberAll" => ["th", "td"] ]; - public function __construct(string $html, string $template) + public function __construct(string $html) { libxml_use_internal_errors(true); - $dom = new \DOMDocument( - // encoding: "UTF-8" + $dom = new DOMDocument( + encoding: self::ENCODING ); + if (empty($html)) { + $this->xpath = null; + } else { + $this->setEncoding($html); + $dom->loadHTML($html); + $this->xpath = new DOMXPath($dom); + } + } + + private function setEncoding(string &$html) : void + { $encoding = mb_detect_encoding($html, 'UTF-8, windows-1251'); - if ($encoding != "UTF-8") { + if ($encoding != self::ENCODING) { $html = mb_convert_encoding( $html, - 'UTF-8', + self::ENCODING, $encoding ); - $html = str_replace('windows-1251','utf-8', $html); + $html = str_replace('windows-1251',self::ENCODING, $html); } - $dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8')); - - $this->xpath = new \DOMXPath($dom); - $this->template = $template; + $html = mb_convert_encoding($html,'HTML-ENTITIES','UTF-8'); } - - private function parse(): array + private function parse() : array { - $data = array(); + $data = []; foreach (self::FIELDS as $field => $tag) { if (!is_array($tag)) { - $data[$field] = $this->xpath->query($this->template . $tag . "[@itemprop=\"$field\"]"); + $data[$field] = $this->xpath->query( + self::TEMPLATE . $tag . "[@itemprop=\"$field\"]" + ); } else { - $th = $this->xpath->query($this->template . $tag[0] . "[@itemprop=\"$field\"]"); - $td = $this->xpath->query($this->template . $tag[1] . "[@itemprop=\"$field\"]"); + $th = $this->xpath->query( + self::TEMPLATE . $tag[0] . "[@itemprop=\"$field\"]" + ); + $td = $this->xpath->query( + self::TEMPLATE . $tag[1] . "[@itemprop=\"$field\"]" + ); $data[$field] = $th->length > $td->length ? $th : $td; } - } return $data; } public function getDataTable() : array { + if (empty($this->xpath)) return []; + $data = $this->parse(); - $records = array(); - // var_dump($data); - // exit(0); + $records = []; if ($data == null) return []; $equal = $data['eduName']->length; diff --git a/app/library/ContingentRow.php b/ContingentParser/Parser/ContingentRow.php similarity index 90% rename from app/library/ContingentRow.php rename to ContingentParser/Parser/ContingentRow.php index e362597..017470c 100644 --- a/app/library/ContingentRow.php +++ b/ContingentParser/Parser/ContingentRow.php @@ -1,6 +1,6 @@ $max_depth) { - $max_depth = $depth; - } - } - } - - return $max_depth; -} - -$data = Yaml::parse(file_get_contents(__DIR__ . '/not-recorded-in-db.yaml')); -$db = new Database(new DatabaseConfig('opendata')); -// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); - -if(array_depth($data) == 2){ - ContingentManager::getInstance()->insertContingent($db, $data); -} else { - foreach ($data as $value) { - ContingentManager::getInstance()->insertContingent($db, $value); - } -} diff --git a/app/app.php b/app/app.php deleted file mode 100644 index 6da9e2e..0000000 --- a/app/app.php +++ /dev/null @@ -1,215 +0,0 @@ -getSites($dbNiimko); -// $specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); -// $orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); - -// $exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('select-html-error.log'); -// $exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log'); - -// print_r($exceptionsOrgHttpCurl); - -// echo count($exceptionsOrgHttpCurl) . " - http-error sites" . PHP_EOL; - -// $start = 794; -// for ($i = $start; $i < count($sites); $i++) { -// // Нет URL сайта вуза -// if (empty($sites[$i]['site'])) { -// $message = implode(' ', $sites[$i]); -// Logger::log($pathLogErrorHttp, $message); -// Logger::log($pathErrorHttp, implode(' ', $sites[$i])); -// continue; -// } -// // Уже в базе -// if (in_array($sites[$i]['org_id'], $orgs)) { -// continue; -// } -// // С ошибками разметки игнорируем -// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { -// continue; -// } -// // Без ошибок http игнорируем -// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { -// continue; -// } -// try { -// $baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']); -// $client = new Client([ -// 'force_ip_resolve' => 'v4', -// 'debug' => fopen("debug-http.log", "a"), -// 'base_uri' => $baseUri, -// 'allow_directs' => [ -// 'max' => 5, -// 'strict' => true, -// 'referer' => true, -// 'protocols' => ['http', 'https'], -// 'track_redirects' => true -// ], -// 'connect_timeout' => 300.0, -// 'verify' => false, -// 'headers' => [ -// 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36', -// 'Content-Type' => 'text/html;charset=utf-8' -// ] -// ]); -// $response = $client->get('', [ -// 'on_stats' => function (TransferStats $stats) use (&$url) { -// $url = $stats->getEffectiveUri(); -// } -// ]); - -// print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); -// if (substr($url, -1) == '/') { -// $url = $url."sveden/education/"; -// } else { -// $url = $url."/sveden/education/"; -// } -// echo $url .PHP_EOL; -// $response = $client->get($url, [ -// 'on_stats' => function (TransferStats $stats) use (&$url) { -// $url = $stats->getEffectiveUri(); -// } -// ]); -// echo $url . PHP_EOL; - -// $html = $response->getBody()->getContents(); -// if (empty($html)) { -// $message = implode(' ', $sites[$i]); -// Logger::log($pathLogErrorHtml, $message); -// continue; -// } -// $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); -// $contingent = $parser->getDataTable(); - -// // Добавляем поле spec_id по spec_code -// ContingentManager::getInstance()->addSpecId($contingent, $specializations); - -// // Добавляем поле org_id -// ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); -// print_r($contingent); -// if (empty($contingent)) { -// $message = implode(' ', $sites[$i]); -// Logger::log($pathLogErrorHtml, $message); -// } else { -// $set = ContingentManager::getInstance()->checkContingent($contingent); -// if ($set) { -// // Заносим в базу -// ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); -// } else { -// $message = implode(' ', $sites[$i]); -// Logger::log($pathLogErrorHtml, $message); -// } -// unset($contingent); -// } -// } catch (ClientException -// | RequestException -// | ConnectException -// | ServerException -// | MalformedUriException $e -// ) { -// $message = implode(' ', $sites[$i]) . " " . $e->getCode() . " " . $e->getMessage(); -// Logger::log($pathLogErrorHttp, $message); -// Logger::log($pathErrorHttp, implode(' ', $sites[$i])); -// } -// } - -$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); -$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); -// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); -// print_r($sites); - -for ($i = 0; $i < count($sites); $i++) { - // Нет URL сайта вуза - if (empty($sites[$i]['site'])) { - // $message = implode(' ', $sites[$i]); - // Logger::log($pathLogErrorHttp, $message); - // Logger::log($pathErrorHttp, implode(' ', $sites[$i])); - continue; - } - // Уже в базе - if (in_array($sites[$i]['org_id'], $orgs)) { - continue; - } - // С ошибками разметки игнорируем - // if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { - // continue; - // } - // Без ошибок http игнорируем - // if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { - // continue; - // } - print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); - $uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site'])); - $uri = str_replace("_","/", $uri); - if (substr($uri, -1) == '/') { - $uri = $uri."sveden/education/"; - } else { - $uri = $uri."/sveden/education/"; - } - echo $uri . PHP_EOL; - $ch = curl_init($uri); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($ch, CURLOPT_HEADER, false); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - $html = curl_exec($ch); - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - if ($httpCode != 200) { - $errno = curl_errno($ch); - $message = implode(' ', $sites[$i]); - $message .= " cURL error ({$errno}): ".curl_strerror($errno); - Logger::log($pathLogErrorHttp, $message); - unset($httpCode); - continue; - } - - curl_close($ch); - echo "HTTP-code: " . $httpCode . PHP_EOL; - if (empty($html)) continue; - - $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); - $contingent = $parser->getDataTable(); - // Добавляем поле spec_id по spec_code - ContingentManager::getInstance()->addSpecId($contingent, $specializations); - // Добавляем поле org_id - ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); - - - if (empty($contingent)) { - echo "empty". PHP_EOL; - $message = implode(' ', $sites[$i]); - Logger::log($pathLogErrorHtml, $message); - } else { - print_r($contingent); - $set = ContingentManager::getInstance()->checkContingent($contingent); - if ($set) { - // Заносим в базу - ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); - } else { - $message = implode(' ', $sites[$i]); - Logger::log($pathLogErrorHtml, $message); - } - unset($contingent); - unset($httpCode); - } -} \ No newline at end of file diff --git a/app/library/ContingentManager.php b/app/library/ContingentManager.php deleted file mode 100644 index f20d1c0..0000000 --- a/app/library/ContingentManager.php +++ /dev/null @@ -1,187 +0,0 @@ -builder = new GenericBuilder(); - } - - public static function getInstance() : ContingentManager - { - self::$instance ??= new self(); - return self::$instance; - } - - public function getSites(Database $db): array - { - // select kod as org_id, site from niimko.s_vuzes - // where ootype = 'vuz' and deleted = 'n' and fake = 'n' - $params = ['vuz', 'n', 'n', 'RU']; - $query = $this->builder->select() - ->setTable('s_vuzes') - ->setColumns(['org_id' => 'kod', 'site']) - ->where('AND') - ->equals('ootype', 'vuz') - ->equals('deleted', 'n') - ->equals('fake', 'n') - ->equals('country', 'RU') - ->end(); - $sql = $this->builder->write($query); - $sites = $db->selectQuery($sql, $params); - - return $sites; - } - - public function getSitesFromMiccedu(Database $db, array $params) : array - { - // select site, vuzkod as org_id from opendata.miccedu_monitoring - // where year = 2023 and (vuzkod = :val1 or vuzkod = :val2 or ...) - $year = 2023; - foreach ($params as $key => $org) { - $params[$key] = (int)$org['org_id']; - } - $query = $this->builder->select() - ->setTable('miccedu_monitoring') - ->setColumns(['org_id' => 'vuzkod','site']) - ->where('AND') - ->equals('year', $year) - ->subWhere('OR'); - foreach ($params as $orgId) { - $query->equals('vuzkod', $orgId); - } - $query = $query->end(); - $sql = $this->builder->writeFormatted($query); - array_unshift($params, $year); - $sites = $db->selectQuery($sql, $params); - - return $sites; - } - - public function insertContingent(Database $db, array $contingent) : void - { - $params = ['spec_code', 'spec_name', 'edu_level', 'edu_forms', 'contingent', 'spec_id', 'org_id']; - $sql = "insert into sveden_education_contingent" - ."(". implode(',', $params) .") values"; - for ($i = 0; $i < count($contingent); $i++) { - $sql .= "("; - foreach ($contingent[$i] as $key => $value) { - $sql .= ":$key". ($i+1).","; - } - $sql = substr_replace($sql,"),", -1); - } - $sql = substr_replace($sql,"", -1); - - $db->insertQuery($sql, $contingent); - } - - public function getSpecializations(Database $db) : array - { - // select id, kod from niimko.s_specs where oopkodes = 'gos3p' - $params = ['gos3p']; - $query = $this->builder->select() - ->setTable('s_specs') - ->setColumns(['id', 'kod']) - ->where() - ->equals('oopkodes','gos3p') - ->end(); - $sql = $this->builder->write($query); - $specializations = $db->selectQuery($sql, $params); - - return $specializations; - } - - public function getOrgs(Database $db) : array - { - $sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent'; - $org = $db->selectQuery($sql); - $orgs = []; - foreach ($org as $o) { - $orgs[] = $o['org_id']; - } - return $orgs; - } - - public function buildBaseUri(string $url): string - { - // Строит -> https:// - $url = str_replace("www/", "www.", $url); - if (strpos($url,'https://') === false - && strpos($url,'http://') === false - ) { - $url = "http://$url"; - } - $url = str_replace("https://", "http://", $url); - $arr = parse_url($url); - $url = $arr['scheme'] .'://'. $arr['host'] . '/'; - - // $url = str_replace("www.", "", $url); - $url = str_replace("_","/", $url); - $url = $url."sveden/education/"; - - return trim($url); - } - - public function addSpecId(array &$contingent, array $specializations) : void - { - foreach ($contingent as $key => $con) { - $buf = null; - $needle = $con['spec_code']; - foreach ($specializations as $spec) { - if ($needle == $spec['kod']) { - $buf = $spec['id']; - } - } - $contingent[$key]['spec_id'] = $buf; - unset($buf); - } - } - - public function addOrgId(array &$contingent, int $orgId) : void - { - for($i = 0; $i < count($contingent); $i++) { - $contingent[$i]['org_id'] = $orgId; - } - } - - public function checkContingent(array $contingent) : bool - { - $count = 0; - foreach ($contingent as $value) { - $count += $value['contingent']; - } - return $count ? true : false; - } - - public function getExceptionsHtml(string $filename) : array - { - $errorSites = []; - $array = file($filename); - for ($i = 0; $i < count($array); $i++) { - $arr = explode(' ', $array[$i]); - $errorSites[] = $arr[2]; - } - return $errorSites; - } - - public function getExceptionsHttpCurl(string $filename) : array - { - $array = file($filename); - $orgHttpError = []; - foreach ($array as $str) { - $data = explode (' ', $str); - if (preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/", $data[0]) - && $data[3] != PHP_EOL) { - // $orgHttpError[] = $data[2]; - $orgHttpError[] = ['org_id' => $data[2], 'site' => $data[3]]; - } - } - // $orgHttpError = array_unique($orgHttpError); - ksort($orgHttpError); - return $orgHttpError; - } -} \ No newline at end of file diff --git a/app/library/Database.php b/app/library/Database.php deleted file mode 100644 index 6a20415..0000000 --- a/app/library/Database.php +++ /dev/null @@ -1,84 +0,0 @@ -config = $config; - try { - $dsn = $this->config->getDsn(); - $username = $this->config->getUsername(); - $password = $this->config->getPassword(); - $this->pdo = new PDO( - $dsn, - $username, - $password, - [PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION] - ); - $message = "Подключение к ". $this->config->getDBName() ." успешно!"; - Logger::log(self::$logFile, $message); - } catch (PDOException $e) { - $message = "Ошибка подключения к ". $this->config->getDBName() .": " . $e->getMessage(); - Logger::log(self::$logFile, $message); - } - } - - public function __destruct() - { - $message = "Подключение к ". $this->config->getDBName() ." прервано!"; - Logger::log(self::$logFile, $message); - } - - public function selectQuery(string $sql, array $params = []) : array - { - try { - $stmt = $this->pdo->prepare($sql); - // $params = array_values($params); - for ($i = 0; $i < count($params); $i++) { - $stmt->bindParam(":v".$i+1, $params[$i]); - } - $stmt->execute(); - $array = $stmt->fetchAll(PDO::FETCH_ASSOC); - } catch (PDOException $e) { - $message = "Ошибка запроса: " . $e->getMessage(); - Logger::log(self::$logFile, $message); - } finally { - return $array; - } - } - - public function insertQuery(string $sql, array $params) - { - try { - $stmt = $this->pdo->prepare($sql); - for ($i = 0; $i < count($params); $i++) { - $stmt->bindParam(":spec_code".$i+1, $params[$i]['spec_code']); - $stmt->bindParam(":spec_name".$i+1, $params[$i]['spec_name']); - $stmt->bindParam(":edu_forms".$i+1, $params[$i]['edu_forms']); - $stmt->bindParam(":edu_level".$i+1, $params[$i]['edu_level']); - $stmt->bindParam(":contingent".$i+1, $params[$i]['contingent']); - $stmt->bindParam(":org_id".$i+1, $params[$i]['org_id']); - $stmt->bindParam(":spec_id".$i+1, $params[$i]['spec_id']); - } - $stmt->execute(); - Logger::log(self::$logFile, "Запрос выполнен успешно!"); - } catch (PDOException $e) { - $message = "Ошибка запроса:" . $e->getMessage(); - Logger::log(self::$logFile, $message); - // При ошибке запроса сохраняем валидные данные в yaml-файл - if ($e->getCode() === "HY000") { - $yaml = Yaml::dump($params); - file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND); - } - } - } -} \ No newline at end of file diff --git a/app/library/DatabaseConfig.php b/app/library/DatabaseConfig.php deleted file mode 100644 index ccceed1..0000000 --- a/app/library/DatabaseConfig.php +++ /dev/null @@ -1,54 +0,0 @@ - $dbconfig) { - if ($dbname == $db) { - $config = $dbconfig; - } - } - $this->driver = $config['DB_DRIVER']; - $this->host = $config['DB_HOST']; - $this->dbname = $config['DB_NAME']; - $this->port = $config['DB_PORT']; - $this->charset = $config["DB_CHARSET"]; - $this->username = $config['DB_USERNAME']; - $this->password = $config['DB_PASSWORD']; - } - - public function getDBName(): string - { - return $this->dbname; - } - - public function getDsn() : string - { - return $this->driver.":host=".$this->host - .";dbname=".$this->dbname - .";charset=".$this->charset - .";port=".$this->port; - } - - public function getUsername() : string - { - return $this->username; - } - - public function getPassword() : string - { - return $this->password; - } -} \ No newline at end of file diff --git a/app/library/Logger.php b/app/library/Logger.php deleted file mode 100644 index c4ea404..0000000 --- a/app/library/Logger.php +++ /dev/null @@ -1,11 +0,0 @@ -all = (int)$all; - $this->foreigners = (int)$foreigners; - } - - public function getData(): array{ - return [ - "Всего" => $this->all, - "Из них численность обучающихся, - являющихся иностранными гражданами" => $this->foreigners - ]; - } -} \ No newline at end of file diff --git a/composer.json b/composer.json index 51c5f61..5bca03d 100644 --- a/composer.json +++ b/composer.json @@ -1,13 +1,12 @@ { "require": { "guzzlehttp/guzzle": "^7.0", - "nilportugues/sql-query-builder": "^1.8", - "symfony/yaml": "^7.1" + "symfony/yaml": "^7.1", + "nilportugues/sql-query-builder": "^1.8" }, "autoload": { "psr-4": { - "App\\": "app/", - "App\\Library\\": "app/library" + "ContingentParser\\": "ContingentParser/" } } } diff --git a/composer.lock b/composer.lock index d17072c..9d70939 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "171744ca730de2b7a0ee21a1d982266c", + "content-hash": "03ebe40648d54e718198609886034355", "packages": [ { "name": "guzzlehttp/guzzle", diff --git a/create_table.sql b/create_table.sql deleted file mode 100644 index 7bd26af..0000000 --- a/create_table.sql +++ /dev/null @@ -1,11 +0,0 @@ -create table sveden_education_contingent -( - id serial not null primary key, - org_id int null, - spec_id int null, - spec_code varchar(100) null comment 'Код', - spec_name text null comment 'Наименование', - edu_level text null comment 'Уровень образования', - edu_forms text null comment 'Формы обучения', - contingent int not null comment 'Общая численность обучающихся' -); \ No newline at end of file diff --git a/create_table_sveden_education_contingent.sql b/create_table_sveden_education_contingent.sql new file mode 100644 index 0000000..7992f73 --- /dev/null +++ b/create_table_sveden_education_contingent.sql @@ -0,0 +1,10 @@ +CREATE TABLE sveden_education_contingent( + id SERIAL NOT NULL PRIMARY KEY, + org_id INT NULL, + spec_id INT NULL, + spec_code VARCHAR(100) NULL COMMENT 'Код', + spec_name TEXT NULL COMMENT 'Наименование', + edu_level TEXT NULL COMMENT 'Уровень образования', + edu_forms TEXT NULL COMMENT 'Формы обучения', + contingent INT NOT NULL COMMENT 'Общая численность обучающихся' +); \ No newline at end of file diff --git a/curl-helper.php b/curl-helper.php deleted file mode 100644 index fe4c82c..0000000 --- a/curl-helper.php +++ /dev/null @@ -1,109 +0,0 @@ -getExceptionsHttpCurl('select-http-error.log'); -// print_r($sites); - -$dbOpendata = new Database(new DatabaseConfig('opendata')); -$dbNiimko = new Database(new DatabaseConfig('niimko')); -// $sites = ContingentManager::getInstance()->getSites($dbNiimko); -$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); -$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); -// $sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); -$sites = ContingentManager::getInstance()->getExceptionsHttpCurl('log/2024-08-28/error-http-curl.log'); -$sites = ContingentManager::getInstance()->getSitesFromMiccedu($dbOpendata, $sites); -// print_r($sites); -for ($i = 0; $i < count($sites); $i++) { - // Нет URL сайта вуза - if (empty($sites[$i]['site'])) { - // $message = implode(' ', $sites[$i]); - Logger::log($pathLogErrorHttp, $message); - // Logger::log($pathErrorHttp, implode(' ', $sites[$i])); - continue; - } - // Уже в базе - if (in_array($sites[$i]['org_id'], $orgs)) { - continue; - } - // С ошибками разметки игнорируем - // if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { - // continue; - // } - // Без ошибок http игнорируем - // if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { - // continue; - // } - print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); - $uri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']); - echo $uri . PHP_EOL; - $ua = 'Mozilla/5.0 (X11; Linux x86_64) ' - .'AppleWebKit/537.36 (KHTML, like Gecko) ' - .'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'; - // $html = get_content($uri); - $ch = curl_init($uri); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($ch, CURLOPT_HEADER, false); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_USERAGENT, $ua); - curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); - curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 90); - $html = curl_exec($ch); - - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - if ($httpCode != 200 && $httpCode != 0) { - $message = implode(' ', $sites[$i]) . ' ' . $httpCode; - Logger::log($pathLogErrorHttp, $message); - unset($httpCode); - continue; - } else if ($httpCode == 0) { - $errno = curl_errno($ch); - $message = implode(' ', $sites[$i]); - $message .= " cURL error ({$errno}): ".curl_strerror($errno); - Logger::log($pathLogErrorHttp, $message); - unset($httpCode); - continue; - } - - curl_close($ch); - echo "HTTP-code: " . $httpCode . PHP_EOL; - if (empty($html)) continue; - - $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); - $contingent = $parser->getDataTable(); - // Добавляем поле spec_id по spec_code - ContingentManager::getInstance()->addSpecId($contingent, $specializations); - // Добавляем поле org_id - ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); - - - if (empty($contingent)) { - echo "empty". PHP_EOL; - $message = implode(' ', $sites[$i]); - Logger::log($pathLogErrorHtml, $message); - } else { - print_r($contingent); - $set = ContingentManager::getInstance()->checkContingent($contingent); - if ($set) { - // Заносим в базу - ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); - } else { - $message = implode(' ', $sites[$i]); - Logger::log($pathLogErrorHtml, $message); - } - unset($contingent); - unset($httpCode); - } -} diff --git a/index.php b/index.php deleted file mode 100644 index e4dd67b..0000000 --- a/index.php +++ /dev/null @@ -1,3 +0,0 @@ -getSites(); +$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); + +for ($i = 0; $i < count($sites); $i++) { + print("\033[92m" . ($i+1). ' '); + $facade->collectDataFromContingent($sites[$i]); +} \ No newline at end of file diff --git a/test.php b/test.php new file mode 100644 index 0000000..829e9b0 --- /dev/null +++ b/test.php @@ -0,0 +1,15 @@ +updateSitesOpendata($data); diff --git a/vendor/autoload.php b/vendor/autoload.php index 53b3630..58e49c4 100644 --- a/vendor/autoload.php +++ b/vendor/autoload.php @@ -22,4 +22,4 @@ if (PHP_VERSION_ID < 50600) { require_once __DIR__ . '/composer/autoload_real.php'; -return ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1::getLoader(); +return ComposerAutoloaderInit03ebe40648d54e718198609886034355::getLoader(); diff --git a/vendor/composer/autoload_psr4.php b/vendor/composer/autoload_psr4.php index 9f2a292..d6af76c 100644 --- a/vendor/composer/autoload_psr4.php +++ b/vendor/composer/autoload_psr4.php @@ -8,13 +8,12 @@ $baseDir = dirname($vendorDir); return array( 'Symfony\\Polyfill\\Ctype\\' => array($vendorDir . '/symfony/polyfill-ctype'), 'Symfony\\Component\\Yaml\\' => array($vendorDir . '/symfony/yaml'), - 'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'), + 'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-message/src', $vendorDir . '/psr/http-factory/src'), 'Psr\\Http\\Client\\' => array($vendorDir . '/psr/http-client/src'), 'NilPortugues\\Sql\\QueryFormatter\\' => array($vendorDir . '/nilportugues/sql-query-formatter/src'), 'NilPortugues\\Sql\\QueryBuilder\\' => array($vendorDir . '/nilportugues/sql-query-builder/src'), 'GuzzleHttp\\Psr7\\' => array($vendorDir . '/guzzlehttp/psr7/src'), 'GuzzleHttp\\Promise\\' => array($vendorDir . '/guzzlehttp/promises/src'), 'GuzzleHttp\\' => array($vendorDir . '/guzzlehttp/guzzle/src'), - 'App\\Library\\' => array($baseDir . '/app/library'), - 'App\\' => array($baseDir . '/app'), + 'ContingentParser\\' => array($baseDir . '/ContingentParser'), ); diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php index 7600014..0b224ce 100644 --- a/vendor/composer/autoload_real.php +++ b/vendor/composer/autoload_real.php @@ -2,7 +2,7 @@ // autoload_real.php @generated by Composer -class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1 +class ComposerAutoloaderInit03ebe40648d54e718198609886034355 { private static $loader; @@ -24,16 +24,16 @@ class ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1 require __DIR__ . '/platform_check.php'; - spl_autoload_register(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader'), true, true); + spl_autoload_register(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader'), true, true); self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(__DIR__)); - spl_autoload_unregister(array('ComposerAutoloaderInit045658d81f6d9d3243e731dda7bf04d1', 'loadClassLoader')); + spl_autoload_unregister(array('ComposerAutoloaderInit03ebe40648d54e718198609886034355', 'loadClassLoader')); require __DIR__ . '/autoload_static.php'; - call_user_func(\Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::getInitializer($loader)); + call_user_func(\Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::getInitializer($loader)); $loader->register(true); - $filesToLoad = \Composer\Autoload\ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$files; + $filesToLoad = \Composer\Autoload\ComposerStaticInit03ebe40648d54e718198609886034355::$files; $requireFile = \Closure::bind(static function ($fileIdentifier, $file) { if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) { $GLOBALS['__composer_autoload_files'][$fileIdentifier] = true; diff --git a/vendor/composer/autoload_static.php b/vendor/composer/autoload_static.php index fe494e9..14e6e23 100644 --- a/vendor/composer/autoload_static.php +++ b/vendor/composer/autoload_static.php @@ -4,7 +4,7 @@ namespace Composer\Autoload; -class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1 +class ComposerStaticInit03ebe40648d54e718198609886034355 { public static $files = array ( '7b11c4dc42b3b3023073cb14e519683c' => __DIR__ . '/..' . '/ralouphie/getallheaders/src/getallheaders.php', @@ -35,10 +35,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1 'GuzzleHttp\\Promise\\' => 19, 'GuzzleHttp\\' => 11, ), - 'A' => + 'C' => array ( - 'App\\Library\\' => 12, - 'App\\' => 4, + 'ContingentParser\\' => 17, ), ); @@ -53,8 +52,8 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1 ), 'Psr\\Http\\Message\\' => array ( - 0 => __DIR__ . '/..' . '/psr/http-factory/src', - 1 => __DIR__ . '/..' . '/psr/http-message/src', + 0 => __DIR__ . '/..' . '/psr/http-message/src', + 1 => __DIR__ . '/..' . '/psr/http-factory/src', ), 'Psr\\Http\\Client\\' => array ( @@ -80,13 +79,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1 array ( 0 => __DIR__ . '/..' . '/guzzlehttp/guzzle/src', ), - 'App\\Library\\' => + 'ContingentParser\\' => array ( - 0 => __DIR__ . '/../..' . '/app/library', - ), - 'App\\' => - array ( - 0 => __DIR__ . '/../..' . '/app', + 0 => __DIR__ . '/../..' . '/ContingentParser', ), ); @@ -97,9 +92,9 @@ class ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1 public static function getInitializer(ClassLoader $loader) { return \Closure::bind(function () use ($loader) { - $loader->prefixLengthsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixLengthsPsr4; - $loader->prefixDirsPsr4 = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$prefixDirsPsr4; - $loader->classMap = ComposerStaticInit045658d81f6d9d3243e731dda7bf04d1::$classMap; + $loader->prefixLengthsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixLengthsPsr4; + $loader->prefixDirsPsr4 = ComposerStaticInit03ebe40648d54e718198609886034355::$prefixDirsPsr4; + $loader->classMap = ComposerStaticInit03ebe40648d54e718198609886034355::$classMap; }, null, ClassLoader::class); } diff --git a/vendor/composer/installed.php b/vendor/composer/installed.php index 68353ed..04377ec 100644 --- a/vendor/composer/installed.php +++ b/vendor/composer/installed.php @@ -1,9 +1,9 @@ array( 'name' => '__root__', - 'pretty_version' => 'dev-main', - 'version' => 'dev-main', - 'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f', + 'pretty_version' => '1.0.0+no-version-set', + 'version' => '1.0.0.0', + 'reference' => null, 'type' => 'library', 'install_path' => __DIR__ . '/../../', 'aliases' => array(), @@ -11,9 +11,9 @@ ), 'versions' => array( '__root__' => array( - 'pretty_version' => 'dev-main', - 'version' => 'dev-main', - 'reference' => 'bf76820498b17a25905a2317da680fb2eb81cf2f', + 'pretty_version' => '1.0.0+no-version-set', + 'version' => '1.0.0.0', + 'reference' => null, 'type' => 'library', 'install_path' => __DIR__ . '/../../', 'aliases' => array(),