Исправлена обработка разметки с кодировкой windows-1251

This commit is contained in:
Alexander 2024-08-21 14:51:01 +03:00
parent 29eeb5bf77
commit 48f540b8af
5 changed files with 22 additions and 20 deletions

2
.gitignore vendored
View File

@ -1,2 +0,0 @@
.log
.yaml

View File

@ -64,12 +64,12 @@ for ($i = $start; $i < count($sites); $i++) {
$client = new Client([ $client = new Client([
'allow_directs' => true, 'allow_directs' => true,
'track_redirects' => true, 'track_redirects' => true,
'connect_timeout' => 300.0,
'verify' => false, 'verify' => false,
// 'http_errors' => false, // 'http_errors' => false,
'headers' => [ 'headers' => [
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36', 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
'Content-Type' => 'text/html;charset=utf-8', 'Content-Type' => 'text/html;charset=utf-8'
'timeout' => 300
] ]
]); ]);
@ -109,16 +109,11 @@ for ($i = $start; $i < count($sites); $i++) {
} }
unset($contingent); unset($contingent);
} }
} catch (ClientException $e) { } catch (ClientException
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage(); | RequestException
Logger::log($pathLogErrorHttp, $message); | ConnectException
} catch (RequestException $e) { | ServerException $e
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage(); ) {
Logger::log($pathLogErrorHttp, $message);
} catch (ConnectException $e) {
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
Logger::log($pathLogErrorHttp, $message);
} catch (ServerException $e) {
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage(); $message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
Logger::log($pathLogErrorHttp, $message); Logger::log($pathLogErrorHttp, $message);
} }

View File

@ -26,8 +26,16 @@ class ContingentParser
public function __construct(string $html, string $template) public function __construct(string $html, string $template)
{ {
libxml_use_internal_errors(true); libxml_use_internal_errors(true);
$dom = new \DOMDocument(); $dom = new \DOMDocument(
$dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8')); encoding: "UTF-8"
);
$html = mb_convert_encoding(
$html,
'UTF-8',
mb_detect_encoding($html, 'UTF-8, windows-1251')
);
$html = str_replace('windows-1251','utf-8', $html);
$dom->loadHTML($html);
$this->xpath = new \DOMXPath($dom); $this->xpath = new \DOMXPath($dom);
$this->template = $template; $this->template = $template;
} }
@ -52,7 +60,8 @@ class ContingentParser
{ {
$data = $this->parse(); $data = $this->parse();
$records = array(); $records = array();
// var_dump($data);
// exit(0);
if ($data == null) return []; if ($data == null) return [];
$equal = $data['eduName']->length; $equal = $data['eduName']->length;

View File

@ -69,7 +69,7 @@ class Database
Logger::log(self::$logFile, "Запрос выполнен успешно!"); Logger::log(self::$logFile, "Запрос выполнен успешно!");
} catch (PDOException $e) { } catch (PDOException $e) {
// При ошибке запроса сохраняем валидные данные в yaml-файл // При ошибке запроса сохраняем валидные данные в yaml-файл
$yaml = Yaml::dump($params); $yaml = Yaml::dump([$params]);
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND); file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
$message = "Ошибка запроса:" . $e->getMessage(); $message = "Ошибка запроса:" . $e->getMessage();
Logger::log(self::$logFile, $message); Logger::log(self::$logFile, $message);

View File

@ -31,6 +31,6 @@ if(array_depth($data) == 2){
ContingentManager::getInstance()->insertContingent($db, $data); ContingentManager::getInstance()->insertContingent($db, $data);
} else { } else {
foreach ($data as $value) { foreach ($data as $value) {
// ContingentManager::getInstance()->insertContingent($db, $value); ContingentManager::getInstance()->insertContingent($db, $value);
} }
} }