При неудаче в запросе к БД валидные данные сохраняются в yaml-файл (script.php)
This commit is contained in:
120
app/app.php
120
app/app.php
@ -8,54 +8,42 @@ use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use GuzzleHttp\RequestOptions;
|
||||
use App\Library\ContingentParser;
|
||||
use App\Library\Database;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
$pathLogErrorHtml = 'error-html.log';
|
||||
$pathLogErrorHttp = 'error-http.log';
|
||||
|
||||
Logger::log($pathLogErrorHtml, 'start');
|
||||
Logger::log($pathLogErrorHttp, 'start');
|
||||
$pathLogErrorHttp = 'error-http-curl.log';
|
||||
|
||||
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||
exit(0);
|
||||
|
||||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||
$sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent';
|
||||
$org = $dbOpendata->selectQuery($sql);
|
||||
print_r($sites);
|
||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||
// print_r($sites);
|
||||
// print_r($specializations);
|
||||
// print_r($org);
|
||||
$orgs = [];
|
||||
foreach ($org as $o) {
|
||||
$orgs[] = $o['org_id'];
|
||||
}
|
||||
unset($org);
|
||||
|
||||
$errorSites = [];
|
||||
$filename = 'error-html.log';
|
||||
$array = file($filename);
|
||||
for ($i = 0; $i < count($array); $i++) {
|
||||
$arr = explode(' ', $array[$i]);
|
||||
if (!in_array($arr[2], $orgs)) {
|
||||
$errorSites[] = $arr[2];
|
||||
}
|
||||
}
|
||||
$filename = 'error-http.log';
|
||||
$array = file($filename);
|
||||
for ($i = 0; $i < count($array); $i++) {
|
||||
$arr = explode(' ', $array[$i]);
|
||||
if (!in_array($arr[2], $orgs)) {
|
||||
$errorSites[] = $arr[2];
|
||||
}
|
||||
}
|
||||
// $errorSites = [];
|
||||
// $filename = 'error-html.log';
|
||||
// $array = file($filename);
|
||||
// for ($i = 0; $i < count($array); $i++) {
|
||||
// $arr = explode(' ', $array[$i]);
|
||||
// if (!in_array($arr[2], $orgs)) {
|
||||
// $errorSites[] = $arr[2];
|
||||
// }
|
||||
// }
|
||||
// $filename = 'error-http.log';
|
||||
// $array = file($filename);
|
||||
// for ($i = 0; $i < count($array); $i++) {
|
||||
// $arr = explode(' ', $array[$i]);
|
||||
// if (!in_array($arr[2], $orgs)) {
|
||||
// $errorSites[] = $arr[2];
|
||||
// }
|
||||
// }
|
||||
|
||||
$status = null;
|
||||
|
||||
$start = 600;
|
||||
$start = 91;
|
||||
|
||||
for ($i = $start; $i < count($sites); $i++) {
|
||||
// Нет URL сайта вуза
|
||||
@ -69,26 +57,19 @@ for ($i = $start; $i < count($sites); $i++) {
|
||||
continue;
|
||||
}
|
||||
// С ошибками разметки игнорируем
|
||||
if (in_array($sites[$i]['org_id'], $errorSites)) {
|
||||
continue;
|
||||
}
|
||||
// if (in_array($sites[$i]['org_id'], $errorSites)) {
|
||||
// continue;
|
||||
// }
|
||||
try {
|
||||
$client = new Client([
|
||||
RequestOptions::ALLOW_REDIRECTS => [
|
||||
'max' => 10,
|
||||
'strict' => true,
|
||||
'referer' => true,
|
||||
'allow_directs' => true,
|
||||
'track_redirects' => true,
|
||||
'headers' => [
|
||||
'User-Agent' => '
|
||||
Mozilla/5.0 (X11; Linux x86_64)
|
||||
AppleWebKit/537.36 (KHTML, like Gecko)
|
||||
Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36
|
||||
',
|
||||
'Content-Type' => 'text/html;',
|
||||
'charset' => 'utf-8'
|
||||
]
|
||||
'allow_directs' => true,
|
||||
'track_redirects' => true,
|
||||
'verify' => false,
|
||||
// 'http_errors' => false,
|
||||
'headers' => [
|
||||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
'Content-Type' => 'text/html;charset=utf-8',
|
||||
'timeout' => 300
|
||||
]
|
||||
]);
|
||||
|
||||
@ -96,8 +77,8 @@ for ($i = $start; $i < count($sites); $i++) {
|
||||
$url = ContingentManager::getInstance()->buildURL($url);
|
||||
print(($i+1).". Current url: $url\n");
|
||||
|
||||
$response = $client->get($url, ['timeout' => 300]);
|
||||
$status = $response->getStatusCode();
|
||||
$response = $client->get($url);
|
||||
echo $response->getStatusCode() .PHP_EOL;
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||
@ -109,34 +90,31 @@ for ($i = $start; $i < count($sites); $i++) {
|
||||
// Добавляем поле org_id
|
||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||
print_r($contingent);
|
||||
} catch (ClientException $e) {
|
||||
$status = 0;
|
||||
} catch (RequestException $e) {
|
||||
$status = 0;
|
||||
} catch (ConnectException $e) {
|
||||
$status = 0;
|
||||
} catch (ServerException $e) {
|
||||
$status = 0;
|
||||
} finally {
|
||||
if ($status != 200) {
|
||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
} else if (empty($contingent)) {
|
||||
if (empty($contingent)) {
|
||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
} else {
|
||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||
if ($set) {
|
||||
// Заносим в базу
|
||||
// ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||
} else {
|
||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
||||
Logger::log($pathLogErrorHtml, $message);
|
||||
}
|
||||
unset($contingent);
|
||||
}
|
||||
} catch (ClientException $e) {
|
||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
} catch (RequestException $e) {
|
||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
} catch (ConnectException $e) {
|
||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
} catch (ServerException $e) {
|
||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||
Logger::log($pathLogErrorHttp, $message);
|
||||
}
|
||||
}
|
||||
|
||||
Logger::log($pathLogErrorHtml, 'stop');
|
||||
Logger::log($pathLogErrorHttp, 'stop');
|
@ -70,6 +70,17 @@ final class ContingentManager
|
||||
return $specializations;
|
||||
}
|
||||
|
||||
public function getOrgs(Database $db) : array
|
||||
{
|
||||
$sql = 'SELECT DISTINCT org_id FROM sveden_education_contingent';
|
||||
$org = $db->selectQuery($sql);
|
||||
$orgs = [];
|
||||
foreach ($org as $o) {
|
||||
$orgs[] = $o['org_id'];
|
||||
}
|
||||
return $orgs;
|
||||
}
|
||||
|
||||
public function buildURL(string $url): string
|
||||
{
|
||||
// Строит -> https://<base_uri>/sveden/education/
|
||||
|
@ -52,16 +52,15 @@ class ContingentParser
|
||||
{
|
||||
$data = $this->parse();
|
||||
$records = array();
|
||||
|
||||
if ($data == null) return [];
|
||||
|
||||
// var_dump($data['eduName']->item(0));
|
||||
// exit(0);
|
||||
|
||||
$equel = $data['eduName']->length;
|
||||
$equal = $data['eduName']->length;
|
||||
foreach ($data as $field) {
|
||||
if ($field->length == 0) {
|
||||
return [];
|
||||
}
|
||||
if ($field->length != $equel) {
|
||||
if ($field->length != $equal) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ use App\Library\DatabaseConfig;
|
||||
use App\Library\Logger;
|
||||
use PDOException;
|
||||
use PDO;
|
||||
use Symfony\Component\Yaml\Yaml;
|
||||
class Database
|
||||
{
|
||||
private PDO $pdo;
|
||||
@ -66,7 +67,10 @@ class Database
|
||||
}
|
||||
$stmt->execute();
|
||||
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
||||
} catch (PDOException $e) {
|
||||
} catch (PDOException $e) {
|
||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||
$yaml = Yaml::dump($params);
|
||||
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml);
|
||||
$message = "Ошибка запроса:" . $e->getMessage();
|
||||
Logger::log(self::$logFile, $message);
|
||||
}
|
||||
|
Reference in New Issue
Block a user