Перенес код
This commit is contained in:
99
ContingentParser/Http/CurlHelper.php
Normal file
99
ContingentParser/Http/CurlHelper.php
Normal file
@ -0,0 +1,99 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use ContingentParser\Logger\HttpLogger;
|
||||
use ContingentParser\Printer;
|
||||
use CurlHandle;
|
||||
/**
|
||||
* Summary of CurlHelper
|
||||
*/
|
||||
final class CurlHelper
|
||||
{
|
||||
private CurlHandle|bool $curl;
|
||||
private string $url;
|
||||
private array $site;
|
||||
/**
|
||||
* Коснтруктор
|
||||
* Инициализация сессии
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* Идентификатор организации и базовый URL сайта
|
||||
*/
|
||||
public function __construct(string $url, array $site)
|
||||
{
|
||||
$this->url = $url;
|
||||
$this->site = $site;
|
||||
|
||||
$this->curl = curl_init();
|
||||
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($this->curl, CURLOPT_HEADER, true);
|
||||
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($this->curl, CURLOPT_USERAGENT,
|
||||
'Mozilla/5.0 (X11; Linux x86_64) '
|
||||
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36'
|
||||
);
|
||||
curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, false);
|
||||
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 90);
|
||||
}
|
||||
/**
|
||||
* Прекратить сессию
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
curl_close($this->curl);
|
||||
}
|
||||
/**
|
||||
* Получить html-разметку
|
||||
* @return string
|
||||
*/
|
||||
public function getContent() : string
|
||||
{
|
||||
curl_setopt($this->curl, CURLOPT_URL, $this->url);
|
||||
$html = curl_exec($this->curl);
|
||||
if ($this->checkLocation($this->url, $html)) {
|
||||
$html = $this->getContent();
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
/**
|
||||
* Summary of checkLocation
|
||||
* @param string $html
|
||||
* @return bool
|
||||
*/
|
||||
private function checkLocation(string &$url, string $html) : bool
|
||||
{
|
||||
preg_match('/location:(.*?)\n/i', $html, $matches);
|
||||
if (empty($matches)) return false;
|
||||
$target = $matches[1];
|
||||
$target = preg_replace("/[^a-z0-9\-:.\/,]/iu", '', $target);
|
||||
$url = $target ? $target : $url;
|
||||
|
||||
return $target ? true : false;
|
||||
}
|
||||
/**
|
||||
* Сообщить об ошибке
|
||||
* @return void
|
||||
*/
|
||||
public function reportError() : void
|
||||
{
|
||||
$httpLogger = new HttpLogger('log/http-curl.log');
|
||||
|
||||
$httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE);
|
||||
|
||||
if ($httpCode != 200 && $httpCode != 0) {
|
||||
Printer::println("HTTP-code: $httpCode", 'red');
|
||||
$message = implode(' ', $this->site) . ' HTTP-code(' . $httpCode.')';
|
||||
$httpLogger->log($message, $httpCode);
|
||||
} else if ($httpCode == 0) {
|
||||
$errno = curl_errno($this->curl);
|
||||
$message = implode(' ', $this->site);
|
||||
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||
$httpLogger->log($message);
|
||||
} else {
|
||||
Printer::println("HTTP-code: $httpCode", 'blue');
|
||||
}
|
||||
}
|
||||
}
|
112
ContingentParser/Http/HttpClientFacade.php
Normal file
112
ContingentParser/Http/HttpClientFacade.php
Normal file
@ -0,0 +1,112 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
use ContingentParser\Printer;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\ClientException;
|
||||
use GuzzleHttp\Exception\ConnectException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use GuzzleHttp\Exception\ServerException;
|
||||
use GuzzleHttp\Psr7\Exception\MalformedUriException;
|
||||
use GuzzleHttp\TransferStats;
|
||||
|
||||
final class HttpClientFacade
|
||||
{
|
||||
private Client $client;
|
||||
private array $config;
|
||||
|
||||
public function __construct() {}
|
||||
/**
|
||||
* Обработка численности обучающихся
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* Идентификатор организации, и базовый URL
|
||||
* @return string
|
||||
*/
|
||||
public function processEducationContingentSites(
|
||||
string $url,
|
||||
array $site
|
||||
) : string {
|
||||
try {
|
||||
$client = $this->createClient($url);
|
||||
// Запрос по базовому uri
|
||||
$response = $client->get('', [
|
||||
'on_stats' => function (TransferStats $stats) use (&$redirectUrl) {
|
||||
$redirectUrl = $stats->getEffectiveUri();
|
||||
}
|
||||
]);
|
||||
Printer::println("Redirect $url -> $redirectUrl");
|
||||
$url .= substr($url, -1) == '/' ? '':'/';
|
||||
$url .= "sveden/education/";
|
||||
Printer::println("Parsing for $url");
|
||||
|
||||
$response = $client->get($url);
|
||||
$httpCode = $response->getStatusCode();
|
||||
Printer::println("HTTP-code: $httpCode", 'blue');
|
||||
|
||||
$html = $response->getBody()->getContents();
|
||||
} catch (ClientException
|
||||
| RequestException
|
||||
| ConnectException
|
||||
| ServerException
|
||||
| MalformedUriException $e
|
||||
) {
|
||||
Printer::println("HTTP-code: ".$e->getCode(), 'blue');
|
||||
$html = $this->handleException($url, $site);
|
||||
} finally {
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Обработка исключения
|
||||
* Повторная попытка спомощью CurlHelper
|
||||
* @param string $url
|
||||
* URL сайта
|
||||
* @param array $site
|
||||
* @return string
|
||||
*/
|
||||
private function handleException(string $url, array $site) : string
|
||||
{
|
||||
$curlHelper = new CurlHelper($url, $site);
|
||||
$html = $curlHelper->getContent();
|
||||
$curlHelper->reportError();
|
||||
return $html;
|
||||
}
|
||||
/**
|
||||
* Создать клиента с базовым URL
|
||||
* @param string $url
|
||||
* @return \GuzzleHttp\Client
|
||||
*/
|
||||
private function createClient(string $url) : Client
|
||||
{
|
||||
$this->config = $this->config() + ["base_uri" => $url];
|
||||
return new Client($this->config);
|
||||
}
|
||||
/**
|
||||
* Конфигурация клиента
|
||||
* @return array
|
||||
*/
|
||||
private function config() : array
|
||||
{
|
||||
return [
|
||||
'force_ip_resolve' => 'v4',
|
||||
'debug' => fopen("log/debug-http.log", "w"),
|
||||
'allow_directs' => [
|
||||
'max' => 5,
|
||||
'strict' => true,
|
||||
'referer' => true,
|
||||
'protocols' => ['http', 'https'],
|
||||
'track_redirects' => true
|
||||
],
|
||||
'connect_timeout' => 300.0,
|
||||
'verify' => false,
|
||||
'headers' => [
|
||||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) '
|
||||
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||
'Content-Type' => 'text/html;charset=utf-8'
|
||||
]
|
||||
];
|
||||
}
|
||||
}
|
30
ContingentParser/Http/UrlBuilder.php
Normal file
30
ContingentParser/Http/UrlBuilder.php
Normal file
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
namespace ContingentParser\Http;
|
||||
|
||||
class UrlBuilder
|
||||
{
|
||||
public function __construct() {}
|
||||
/**
|
||||
* Строит валидный URL сайта
|
||||
* @param string $url
|
||||
* Изначальный URL
|
||||
* @return string
|
||||
*/
|
||||
public function build(string $url) : string
|
||||
{
|
||||
// Строит -> https://<base_uri>
|
||||
$url = trim(strtolower($url));
|
||||
$url = preg_replace('/\s+/', '', $url);
|
||||
$url = str_replace("www/", "www.", $url);
|
||||
$url = str_replace("http:\\\\", "", $url);
|
||||
if (!preg_match('#^https?://#', $url)) {
|
||||
$url = "http://$url";
|
||||
}
|
||||
// $url = str_replace("http://", "https://", $url);
|
||||
$arr = parse_url($url);
|
||||
$url = $arr['scheme'] . '://' . $arr['host'] . '/';
|
||||
// $url = str_replace("www.", "", $url);
|
||||
$url = str_replace("_", "/", $url);
|
||||
return trim($url);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user