Добавлен для пробы curl-helper.php, работоющий с библиотекой cURL
This commit is contained in:
parent
30733f294c
commit
04406fb444
@ -12,7 +12,6 @@ use GuzzleHttp\Exception\ServerException;
|
|||||||
use App\Library\ContingentParser;
|
use App\Library\ContingentParser;
|
||||||
use App\Library\Database;
|
use App\Library\Database;
|
||||||
use GuzzleHttp\Client;
|
use GuzzleHttp\Client;
|
||||||
use GuzzleHttp\Psr7\Request;
|
|
||||||
use Psr\Http\Message\RequestInterface;
|
use Psr\Http\Message\RequestInterface;
|
||||||
use Psr\Http\Message\ResponseInterface;
|
use Psr\Http\Message\ResponseInterface;
|
||||||
use Psr\Http\Message\UriInterface;
|
use Psr\Http\Message\UriInterface;
|
||||||
|
@ -29,11 +29,12 @@ class ContingentParser
|
|||||||
$dom = new \DOMDocument(
|
$dom = new \DOMDocument(
|
||||||
// encoding: "UTF-8"
|
// encoding: "UTF-8"
|
||||||
);
|
);
|
||||||
if (mb_detect_encoding($html, 'UTF-8, windows-1251') != "UTF-8") {
|
$encoding = mb_detect_encoding($html, 'UTF-8, windows-1251');
|
||||||
|
if ($encoding != "UTF-8") {
|
||||||
$html = mb_convert_encoding(
|
$html = mb_convert_encoding(
|
||||||
$html,
|
$html,
|
||||||
'UTF-8',
|
'UTF-8',
|
||||||
mb_detect_encoding($html, 'UTF-8, windows-1251')
|
$encoding
|
||||||
);
|
);
|
||||||
$html = str_replace('windows-1251','utf-8', $html);
|
$html = str_replace('windows-1251','utf-8', $html);
|
||||||
} else {
|
} else {
|
||||||
|
100
curl-helper.php
Normal file
100
curl-helper.php
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
<?php
|
||||||
|
use App\Library\ContingentManager;
|
||||||
|
use App\Library\ContingentParser;
|
||||||
|
use App\Library\Database;
|
||||||
|
use App\Library\DatabaseConfig;
|
||||||
|
use App\Library\Logger;
|
||||||
|
use Symfony\Component\Yaml\Yaml;
|
||||||
|
|
||||||
|
require_once(dirname(__FILE__) ."/vendor/autoload.php");
|
||||||
|
|
||||||
|
$pathLogErrorHttp = __DIR__.'/../log/'. date('Y-m-d') . '/error-http-curl.log';
|
||||||
|
$pathLogErrorHtml = __DIR__.'/../log/'. date('Y-m-d') . '/error-html.log';
|
||||||
|
$
|
||||||
|
// $sites = ContingentManager::getInstance()->getExceptionsHttpCurl('select-http-error.log');
|
||||||
|
// print_r($sites);
|
||||||
|
|
||||||
|
$dbOpendata = new Database(new DatabaseConfig('opendata'));
|
||||||
|
$dbNiimko = new Database(new DatabaseConfig('niimko'));
|
||||||
|
var_dump($dbOpendata);
|
||||||
|
// $sites = ContingentManager::getInstance()->getSites($dbNiimko);
|
||||||
|
$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko);
|
||||||
|
$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata);
|
||||||
|
$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml"));
|
||||||
|
print_r($sites);
|
||||||
|
|
||||||
|
for ($i = 0; $i < count($sites); $i++) {
|
||||||
|
// Нет URL сайта вуза
|
||||||
|
if (empty($sites[$i]['site'])) {
|
||||||
|
// $message = implode(' ', $sites[$i]);
|
||||||
|
// Logger::log($pathLogErrorHttp, $message);
|
||||||
|
// Logger::log($pathErrorHttp, implode(' ', $sites[$i]));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Уже в базе
|
||||||
|
if (in_array($sites[$i]['org_id'], $orgs)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// С ошибками разметки игнорируем
|
||||||
|
// if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) {
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// Без ошибок http игнорируем
|
||||||
|
// if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) {
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL);
|
||||||
|
$uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site']));
|
||||||
|
$uri = str_replace("_","/", $uri);
|
||||||
|
if (substr($uri, -1) == '/') {
|
||||||
|
$uri = $uri."sveden/education/";
|
||||||
|
} else {
|
||||||
|
$uri = $uri."/sveden/education/";
|
||||||
|
}
|
||||||
|
echo $uri . PHP_EOL;
|
||||||
|
$ch = curl_init($uri);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||||
|
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||||
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
$html = curl_exec($ch);
|
||||||
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
if ($httpCode != 200) {
|
||||||
|
$errno = curl_errno($ch);
|
||||||
|
$message = implode(' ', $sites[$i]);
|
||||||
|
$message .= " cURL error ({$errno}): ".curl_strerror($errno);
|
||||||
|
Logger::log($pathLogErrorHttp, $message);
|
||||||
|
unset($httpCode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_close($ch);
|
||||||
|
echo "HTTP-code: " . $httpCode . PHP_EOL;
|
||||||
|
if (empty($html)) continue;
|
||||||
|
|
||||||
|
$parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//');
|
||||||
|
$contingent = $parser->getDataTable();
|
||||||
|
// Добавляем поле spec_id по spec_code
|
||||||
|
ContingentManager::getInstance()->addSpecId($contingent, $specializations);
|
||||||
|
// Добавляем поле org_id
|
||||||
|
ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']);
|
||||||
|
|
||||||
|
|
||||||
|
if (empty($contingent)) {
|
||||||
|
echo "empty". PHP_EOL;
|
||||||
|
$message = implode(' ', $sites[$i]);
|
||||||
|
Logger::log($pathLogErrorHtml, $message);
|
||||||
|
} else {
|
||||||
|
print_r($contingent);
|
||||||
|
$set = ContingentManager::getInstance()->checkContingent($contingent);
|
||||||
|
if ($set) {
|
||||||
|
// Заносим в базу
|
||||||
|
ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent);
|
||||||
|
} else {
|
||||||
|
$message = implode(' ', $sites[$i]);
|
||||||
|
Logger::log($pathLogErrorHtml, $message);
|
||||||
|
}
|
||||||
|
unset($contingent);
|
||||||
|
unset($httpCode);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user