2024-09-03 15:41:45 +03:00
|
|
|
|
<?php
|
|
|
|
|
namespace ContingentParser\Http;
|
|
|
|
|
|
2024-09-04 16:12:03 +03:00
|
|
|
|
use ContingentParser\Printer;
|
2024-09-03 15:41:45 +03:00
|
|
|
|
use GuzzleHttp\Client;
|
|
|
|
|
use GuzzleHttp\TransferStats;
|
|
|
|
|
|
|
|
|
|
final class HttpClientFacade
|
|
|
|
|
{
|
|
|
|
|
private Client $client;
|
|
|
|
|
private array $config;
|
|
|
|
|
|
|
|
|
|
public function __construct() {}
|
|
|
|
|
/**
|
|
|
|
|
* Обработка численности обучающихся
|
2024-09-04 16:12:03 +03:00
|
|
|
|
* @param string $url URL сайта
|
|
|
|
|
* @param array $site Идентификатор организации, и базовый URL
|
2024-09-03 15:41:45 +03:00
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
public function processEducationContingentSites(
|
|
|
|
|
string $url,
|
|
|
|
|
array $site
|
|
|
|
|
) : string {
|
|
|
|
|
try {
|
|
|
|
|
$client = $this->createClient($url);
|
|
|
|
|
// Запрос по базовому uri
|
|
|
|
|
$response = $client->get('', [
|
|
|
|
|
'on_stats' => function (TransferStats $stats) use (&$redirectUrl) {
|
|
|
|
|
$redirectUrl = $stats->getEffectiveUri();
|
|
|
|
|
}
|
|
|
|
|
]);
|
2024-09-04 16:12:03 +03:00
|
|
|
|
Printer::println("Redirect $url -> $redirectUrl");
|
2024-09-03 15:41:45 +03:00
|
|
|
|
$url .= substr($url, -1) == '/' ? '':'/';
|
2024-09-04 16:12:03 +03:00
|
|
|
|
$url .= "sveden/education/study";
|
|
|
|
|
Printer::println("Parsing for $url");
|
2024-09-03 15:41:45 +03:00
|
|
|
|
|
|
|
|
|
$response = $client->get($url);
|
|
|
|
|
$httpCode = $response->getStatusCode();
|
2024-09-04 16:12:03 +03:00
|
|
|
|
Printer::println("HTTP-code: $httpCode", 'blue');
|
2024-09-03 15:41:45 +03:00
|
|
|
|
|
|
|
|
|
$html = $response->getBody()->getContents();
|
2024-09-04 16:12:03 +03:00
|
|
|
|
} catch (\Exception $e
|
2024-09-03 15:41:45 +03:00
|
|
|
|
) {
|
2024-09-04 16:12:03 +03:00
|
|
|
|
Printer::println("HTTP-code: ".$e->getCode(), 'red');
|
2024-09-03 15:41:45 +03:00
|
|
|
|
$html = $this->handleException($url, $site);
|
|
|
|
|
} finally {
|
|
|
|
|
return $html;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Обработка исключения
|
2024-09-04 16:12:03 +03:00
|
|
|
|
* Повторная попытка с помощью CurlHelper
|
|
|
|
|
* @param string $url URL сайта
|
2024-09-03 15:41:45 +03:00
|
|
|
|
* @param array $site
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2024-09-04 16:12:03 +03:00
|
|
|
|
private function handleException(string $url, array $site): string
|
2024-09-03 15:41:45 +03:00
|
|
|
|
{
|
|
|
|
|
$curlHelper = new CurlHelper($url, $site);
|
2024-09-04 16:12:03 +03:00
|
|
|
|
$html = $curlHelper->getContent();
|
|
|
|
|
$curlHelper->reportError();
|
|
|
|
|
return $html;
|
2024-09-03 15:41:45 +03:00
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Создать клиента с базовым URL
|
|
|
|
|
* @param string $url
|
|
|
|
|
* @return \GuzzleHttp\Client
|
|
|
|
|
*/
|
2024-09-04 16:12:03 +03:00
|
|
|
|
private function createClient(string $url): Client
|
2024-09-03 15:41:45 +03:00
|
|
|
|
{
|
|
|
|
|
$this->config = $this->config() + ["base_uri" => $url];
|
|
|
|
|
return new Client($this->config);
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Конфигурация клиента
|
|
|
|
|
* @return array
|
|
|
|
|
*/
|
|
|
|
|
private function config() : array
|
|
|
|
|
{
|
|
|
|
|
return [
|
|
|
|
|
'force_ip_resolve' => 'v4',
|
|
|
|
|
'debug' => fopen("log/debug-http.log", "w"),
|
|
|
|
|
'allow_directs' => [
|
|
|
|
|
'max' => 5,
|
|
|
|
|
'strict' => true,
|
|
|
|
|
'referer' => true,
|
|
|
|
|
'protocols' => ['http', 'https'],
|
|
|
|
|
'track_redirects' => true
|
|
|
|
|
],
|
|
|
|
|
'connect_timeout' => 300.0,
|
|
|
|
|
'verify' => false,
|
|
|
|
|
'headers' => [
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) '
|
|
|
|
|
.'AppleWebKit/537.36 (KHTML, like Gecko) '
|
|
|
|
|
.'Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
|
|
|
|
'Content-Type' => 'text/html;charset=utf-8'
|
|
|
|
|
]
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
}
|