Обновлены настройки Client, дозапись в базу
This commit is contained in:
parent
fd5fe013b0
commit
30733f294c
@ -7,7 +7,8 @@ use Symfony\Component\Yaml\Yaml;
|
|||||||
// сохраняются в yaml-файле. Скрипт парсит этот файл и заносит в БД
|
// сохраняются в yaml-файле. Скрипт парсит этот файл и заносит в БД
|
||||||
require_once "vendor/autoload.php";
|
require_once "vendor/autoload.php";
|
||||||
|
|
||||||
function array_depth(array $array) {
|
function array_depth(array $array)
|
||||||
|
{
|
||||||
$max_depth = 1;
|
$max_depth = 1;
|
||||||
|
|
||||||
foreach ($array as $value) {
|
foreach ($array as $value) {
|
79
app/app.php
79
app/app.php
@ -4,6 +4,7 @@ namespace App;
|
|||||||
use App\Library\ContingentManager;
|
use App\Library\ContingentManager;
|
||||||
use App\Library\DatabaseConfig;
|
use App\Library\DatabaseConfig;
|
||||||
use App\Library\Logger;
|
use App\Library\Logger;
|
||||||
|
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
||||||
use GuzzleHttp\Exception\ClientException;
|
use GuzzleHttp\Exception\ClientException;
|
||||||
use GuzzleHttp\Exception\ConnectException;
|
use GuzzleHttp\Exception\ConnectException;
|
||||||
use GuzzleHttp\Exception\RequestException;
|
use GuzzleHttp\Exception\RequestException;
|
||||||
@ -11,6 +12,10 @@ use GuzzleHttp\Exception\ServerException;
|
|||||||
use App\Library\ContingentParser;
|
use App\Library\ContingentParser;
|
||||||
use App\Library\Database;
|
use App\Library\Database;
|
||||||
use GuzzleHttp\Client;
|
use GuzzleHttp\Client;
|
||||||
|
use GuzzleHttp\Psr7\Request;
|
||||||
|
use Psr\Http\Message\RequestInterface;
|
||||||
|
use Psr\Http\Message\ResponseInterface;
|
||||||
|
use Psr\Http\Message\UriInterface;
|
||||||
|
|
||||||
$pathLogErrorHtml = 'error-html.log';
|
$pathLogErrorHtml = 'error-html.log';
|
||||||
$pathLogErrorHttp = 'error-http-curl.log';
|
$pathLogErrorHttp = 'error-http-curl.log';
|
||||||
@ -21,29 +26,13 @@ $dbNiimko = new Database(new DatabaseConfig('niimko'));
|
|||||||
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
$sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||||
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||||
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||||
// print_r($sites);
|
|
||||||
// print_r($specializations);
|
|
||||||
// print_r($org);
|
|
||||||
|
|
||||||
// $errorSites = [];
|
$exceptionsOrgHtml = ContingentManager::getInstance()->getExceptionsHtml('error-html.log');
|
||||||
// $filename = 'error-html.log';
|
$exceptionsOrgHttpCurl = ContingentManager::getInstance()->getExceptionsHttpCurl('error-http-curl.log');
|
||||||
// $array = file($filename);
|
|
||||||
// for ($i = 0; $i < count($array); $i++) {
|
|
||||||
// $arr = explode(' ', $array[$i]);
|
|
||||||
// if (!in_array($arr[2], $orgs)) {
|
|
||||||
// $errorSites[] = $arr[2];
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// $filename = 'error-http.log';
|
|
||||||
// $array = file($filename);
|
|
||||||
// for ($i = 0; $i < count($array); $i++) {
|
|
||||||
// $arr = explode(' ', $array[$i]);
|
|
||||||
// if (!in_array($arr[2], $orgs)) {
|
|
||||||
// $errorSites[] = $arr[2];
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
$start = 999;
|
// print_r($exceptionsOrgHttpCurl);
|
||||||
|
|
||||||
|
$start = 0;
|
||||||
|
|
||||||
for ($i = $start; $i < count($sites); $i++) {
|
for ($i = $start; $i < count($sites); $i++) {
|
||||||
// Нет URL сайта вуза
|
// Нет URL сайта вуза
|
||||||
@ -57,13 +46,33 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// С ошибками разметки игнорируем
|
// С ошибками разметки игнорируем
|
||||||
// if (in_array($sites[$i]['org_id'], $errorSites)) {
|
if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||||
// continue;
|
continue;
|
||||||
// }
|
}
|
||||||
|
if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
|
$baseUri = ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']);
|
||||||
|
$onRedirect = function(
|
||||||
|
RequestInterface $request,
|
||||||
|
ResponseInterface $res,
|
||||||
|
UriInterface $uri
|
||||||
|
) {
|
||||||
|
echo 'Redirecting! ' . $request->getUri() . ' to ' . $uri . "\n";
|
||||||
|
};
|
||||||
$client = new Client([
|
$client = new Client([
|
||||||
'allow_directs' => true,
|
'force_ip_resolve' => 'v4',
|
||||||
'track_redirects' => true,
|
'debug' => fopen("debug-http.log", "a"),
|
||||||
|
'base_uri' => $baseUri,
|
||||||
|
'allow_directs' => [
|
||||||
|
'max' => 5,
|
||||||
|
'strict' => true,
|
||||||
|
'referer' => true,
|
||||||
|
'protocols' => ['http', 'https'],
|
||||||
|
'on_redirect' => $onRedirect,
|
||||||
|
'track_redirects' => true
|
||||||
|
],
|
||||||
'connect_timeout' => 300.0,
|
'connect_timeout' => 300.0,
|
||||||
'verify' => false,
|
'verify' => false,
|
||||||
// 'http_errors' => false,
|
// 'http_errors' => false,
|
||||||
@ -72,17 +81,14 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
'Content-Type' => 'text/html;charset=utf-8'
|
'Content-Type' => 'text/html;charset=utf-8'
|
||||||
]
|
]
|
||||||
]);
|
]);
|
||||||
|
print(($i+1). '. ' . implode(' ', $sites[$i]) . "\n");
|
||||||
$url = $sites[$i]['site'];
|
$response = $client->get('/sveden/education/');
|
||||||
$url = ContingentManager::getInstance()->buildURL($url);
|
|
||||||
print(($i+1).". Current url: $url\n");
|
|
||||||
|
|
||||||
$response = $client->get($url);
|
|
||||||
echo $response->getStatusCode() .PHP_EOL;
|
echo $response->getStatusCode() .PHP_EOL;
|
||||||
|
var_dump($response->getHeaderLine("'X-Guzzle-Redirect-History") . PHP_EOL);
|
||||||
|
|
||||||
$html = $response->getBody()->getContents();
|
$html = $response->getBody()->getContents();
|
||||||
if (empty($html)) {
|
if (empty($html)) {
|
||||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
$message = implode(' ', $sites[$i]);
|
||||||
Logger::log($pathLogErrorHtml, $message);
|
Logger::log($pathLogErrorHtml, $message);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -96,7 +102,7 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||||
print_r($contingent);
|
print_r($contingent);
|
||||||
if (empty($contingent)) {
|
if (empty($contingent)) {
|
||||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
$message = implode(' ', $sites[$i]);
|
||||||
Logger::log($pathLogErrorHtml, $message);
|
Logger::log($pathLogErrorHtml, $message);
|
||||||
} else {
|
} else {
|
||||||
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||||
@ -104,7 +110,7 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
// Заносим в базу
|
// Заносим в базу
|
||||||
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||||
} else {
|
} else {
|
||||||
$message = $sites[$i]['org_id'] . ' ' . $sites[$i]['site'];
|
$message = implode(' ', $sites[$i]);
|
||||||
Logger::log($pathLogErrorHtml, $message);
|
Logger::log($pathLogErrorHtml, $message);
|
||||||
}
|
}
|
||||||
unset($contingent);
|
unset($contingent);
|
||||||
@ -112,7 +118,8 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
} catch (ClientException
|
} catch (ClientException
|
||||||
| RequestException
|
| RequestException
|
||||||
| ConnectException
|
| ConnectException
|
||||||
| ServerException $e
|
| ServerException
|
||||||
|
| MalformedUriException $e
|
||||||
) {
|
) {
|
||||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||||
Logger::log($pathLogErrorHttp, $message);
|
Logger::log($pathLogErrorHttp, $message);
|
||||||
|
@ -81,21 +81,12 @@ final class ContingentManager
|
|||||||
return $orgs;
|
return $orgs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function buildURL(string $url): string
|
public function buildBaseUri(string $url): string
|
||||||
{
|
{
|
||||||
// Строит -> https://<base_uri>/sveden/education/
|
// Строит -> https://<base_uri>
|
||||||
$offset = strpos($url, '/', strlen('http://'));
|
if (strpos($url,'https://') === false && strpos($url,'http://') === false) {
|
||||||
if ($offset) {
|
$url = "http://$url";
|
||||||
$url = substr_replace($url, '', $offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$url = "$url/sveden/education/";
|
|
||||||
if (str_contains($url, "http://")) {
|
|
||||||
$url = str_replace("http://","https://", $url);
|
|
||||||
} else {
|
|
||||||
$url = "https://$url";
|
|
||||||
}
|
|
||||||
$url = str_replace("www.","", $url);
|
|
||||||
|
|
||||||
return $url;
|
return $url;
|
||||||
}
|
}
|
||||||
@ -130,4 +121,32 @@ final class ContingentManager
|
|||||||
}
|
}
|
||||||
return $count ? true : false;
|
return $count ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getExceptionsHtml(string $filename) : array
|
||||||
|
{
|
||||||
|
$errorSites = [];
|
||||||
|
$array = file($filename);
|
||||||
|
for ($i = 0; $i < count($array); $i++) {
|
||||||
|
$arr = explode(' ', $array[$i]);
|
||||||
|
$errorSites[] = $arr[2];
|
||||||
|
}
|
||||||
|
return $errorSites;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getExceptionsHttpCurl(string $filename) : array
|
||||||
|
{
|
||||||
|
$array = file($filename);
|
||||||
|
$orgHttpError = [];
|
||||||
|
foreach ($array as $str) {
|
||||||
|
$data = explode (' ', $str);
|
||||||
|
if (preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/", $data[0])
|
||||||
|
&& $data[3] != PHP_EOL) {
|
||||||
|
$orgHttpError[] = $data[2];
|
||||||
|
// $orgHttpError[] = ['org_id' => $data[2], 'site' => $data[3]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$orgHttpError = array_unique($orgHttpError);
|
||||||
|
sort($orgHttpError);
|
||||||
|
return $orgHttpError;
|
||||||
|
}
|
||||||
}
|
}
|
@ -27,15 +27,18 @@ class ContingentParser
|
|||||||
{
|
{
|
||||||
libxml_use_internal_errors(true);
|
libxml_use_internal_errors(true);
|
||||||
$dom = new \DOMDocument(
|
$dom = new \DOMDocument(
|
||||||
encoding: "UTF-8"
|
// encoding: "UTF-8"
|
||||||
);
|
);
|
||||||
$html = mb_convert_encoding(
|
if (mb_detect_encoding($html, 'UTF-8, windows-1251') != "UTF-8") {
|
||||||
$html,
|
$html = mb_convert_encoding(
|
||||||
'UTF-8',
|
$html,
|
||||||
mb_detect_encoding($html, 'UTF-8, windows-1251')
|
'UTF-8',
|
||||||
);
|
mb_detect_encoding($html, 'UTF-8, windows-1251')
|
||||||
$html = str_replace('windows-1251','utf-8', $html);
|
);
|
||||||
$dom->loadHTML($html);
|
$html = str_replace('windows-1251','utf-8', $html);
|
||||||
|
} else {
|
||||||
|
$dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8'));
|
||||||
|
}
|
||||||
$this->xpath = new \DOMXPath($dom);
|
$this->xpath = new \DOMXPath($dom);
|
||||||
$this->template = $template;
|
$this->template = $template;
|
||||||
}
|
}
|
||||||
|
@ -68,18 +68,13 @@ class Database
|
|||||||
$stmt->execute();
|
$stmt->execute();
|
||||||
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
||||||
} catch (PDOException $e) {
|
} catch (PDOException $e) {
|
||||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
|
||||||
$yaml = Yaml::dump([$params]);
|
|
||||||
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
|
||||||
$message = "Ошибка запроса:" . $e->getMessage();
|
$message = "Ошибка запроса:" . $e->getMessage();
|
||||||
Logger::log(self::$logFile, $message);
|
Logger::log(self::$logFile, $message);
|
||||||
// TODO узнать код ошибки
|
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||||
// SQLSTATE[HY000]: General error: 2006 MySQL server has gone away
|
if ($e->getCode() === "HY000") {
|
||||||
// if ($e->getCode() == 0) {
|
$yaml = Yaml::dump($params);
|
||||||
// $yaml = Yaml::dump($params);
|
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
||||||
// file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml);
|
}
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user