Исправлена обработка разметки с кодировкой windows-1251
This commit is contained in:
parent
29eeb5bf77
commit
48f540b8af
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +0,0 @@
|
|||||||
.log
|
|
||||||
.yaml
|
|
19
app/app.php
19
app/app.php
@ -64,12 +64,12 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
$client = new Client([
|
$client = new Client([
|
||||||
'allow_directs' => true,
|
'allow_directs' => true,
|
||||||
'track_redirects' => true,
|
'track_redirects' => true,
|
||||||
|
'connect_timeout' => 300.0,
|
||||||
'verify' => false,
|
'verify' => false,
|
||||||
// 'http_errors' => false,
|
// 'http_errors' => false,
|
||||||
'headers' => [
|
'headers' => [
|
||||||
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 YaBrowser/24.6.0.0 Safari/537.36',
|
||||||
'Content-Type' => 'text/html;charset=utf-8',
|
'Content-Type' => 'text/html;charset=utf-8'
|
||||||
'timeout' => 300
|
|
||||||
]
|
]
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -109,16 +109,11 @@ for ($i = $start; $i < count($sites); $i++) {
|
|||||||
}
|
}
|
||||||
unset($contingent);
|
unset($contingent);
|
||||||
}
|
}
|
||||||
} catch (ClientException $e) {
|
} catch (ClientException
|
||||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
| RequestException
|
||||||
Logger::log($pathLogErrorHttp, $message);
|
| ConnectException
|
||||||
} catch (RequestException $e) {
|
| ServerException $e
|
||||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
) {
|
||||||
Logger::log($pathLogErrorHttp, $message);
|
|
||||||
} catch (ConnectException $e) {
|
|
||||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
|
||||||
Logger::log($pathLogErrorHttp, $message);
|
|
||||||
} catch (ServerException $e) {
|
|
||||||
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
$message = implode(' ', $sites[$i]) . "\t" . $e->getCode() . "\t" . $e->getMessage();
|
||||||
Logger::log($pathLogErrorHttp, $message);
|
Logger::log($pathLogErrorHttp, $message);
|
||||||
}
|
}
|
||||||
|
@ -26,8 +26,16 @@ class ContingentParser
|
|||||||
public function __construct(string $html, string $template)
|
public function __construct(string $html, string $template)
|
||||||
{
|
{
|
||||||
libxml_use_internal_errors(true);
|
libxml_use_internal_errors(true);
|
||||||
$dom = new \DOMDocument();
|
$dom = new \DOMDocument(
|
||||||
$dom->loadHTML(mb_convert_encoding($html,'HTML-ENTITIES','UTF-8'));
|
encoding: "UTF-8"
|
||||||
|
);
|
||||||
|
$html = mb_convert_encoding(
|
||||||
|
$html,
|
||||||
|
'UTF-8',
|
||||||
|
mb_detect_encoding($html, 'UTF-8, windows-1251')
|
||||||
|
);
|
||||||
|
$html = str_replace('windows-1251','utf-8', $html);
|
||||||
|
$dom->loadHTML($html);
|
||||||
$this->xpath = new \DOMXPath($dom);
|
$this->xpath = new \DOMXPath($dom);
|
||||||
$this->template = $template;
|
$this->template = $template;
|
||||||
}
|
}
|
||||||
@ -52,7 +60,8 @@ class ContingentParser
|
|||||||
{
|
{
|
||||||
$data = $this->parse();
|
$data = $this->parse();
|
||||||
$records = array();
|
$records = array();
|
||||||
|
// var_dump($data);
|
||||||
|
// exit(0);
|
||||||
if ($data == null) return [];
|
if ($data == null) return [];
|
||||||
|
|
||||||
$equal = $data['eduName']->length;
|
$equal = $data['eduName']->length;
|
||||||
|
@ -69,7 +69,7 @@ class Database
|
|||||||
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
Logger::log(self::$logFile, "Запрос выполнен успешно!");
|
||||||
} catch (PDOException $e) {
|
} catch (PDOException $e) {
|
||||||
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
// При ошибке запроса сохраняем валидные данные в yaml-файл
|
||||||
$yaml = Yaml::dump($params);
|
$yaml = Yaml::dump([$params]);
|
||||||
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
file_put_contents(__DIR__ . '/not-recorded-in-db.yaml', $yaml, FILE_APPEND);
|
||||||
$message = "Ошибка запроса:" . $e->getMessage();
|
$message = "Ошибка запроса:" . $e->getMessage();
|
||||||
Logger::log(self::$logFile, $message);
|
Logger::log(self::$logFile, $message);
|
||||||
|
@ -31,6 +31,6 @@ if(array_depth($data) == 2){
|
|||||||
ContingentManager::getInstance()->insertContingent($db, $data);
|
ContingentManager::getInstance()->insertContingent($db, $data);
|
||||||
} else {
|
} else {
|
||||||
foreach ($data as $value) {
|
foreach ($data as $value) {
|
||||||
// ContingentManager::getInstance()->insertContingent($db, $value);
|
ContingentManager::getInstance()->insertContingent($db, $value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user