diff --git a/app/app.php b/app/app.php index ec5ead4..39a1a0c 100644 --- a/app/app.php +++ b/app/app.php @@ -12,7 +12,6 @@ use GuzzleHttp\Exception\ServerException; use App\Library\ContingentParser; use App\Library\Database; use GuzzleHttp\Client; -use GuzzleHttp\Psr7\Request; use Psr\Http\Message\RequestInterface; use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\UriInterface; diff --git a/app/library/ContingentParser.php b/app/library/ContingentParser.php index 60017c3..dd50758 100644 --- a/app/library/ContingentParser.php +++ b/app/library/ContingentParser.php @@ -29,11 +29,12 @@ class ContingentParser $dom = new \DOMDocument( // encoding: "UTF-8" ); - if (mb_detect_encoding($html, 'UTF-8, windows-1251') != "UTF-8") { + $encoding = mb_detect_encoding($html, 'UTF-8, windows-1251'); + if ($encoding != "UTF-8") { $html = mb_convert_encoding( $html, 'UTF-8', - mb_detect_encoding($html, 'UTF-8, windows-1251') + $encoding ); $html = str_replace('windows-1251','utf-8', $html); } else { diff --git a/curl-helper.php b/curl-helper.php new file mode 100644 index 0000000..a0ad887 --- /dev/null +++ b/curl-helper.php @@ -0,0 +1,100 @@ +getExceptionsHttpCurl('select-http-error.log'); +// print_r($sites); + +$dbOpendata = new Database(new DatabaseConfig('opendata')); +$dbNiimko = new Database(new DatabaseConfig('niimko')); +var_dump($dbOpendata); +// $sites = ContingentManager::getInstance()->getSites($dbNiimko); +$specializations = ContingentManager::getInstance()->getSpecializations($dbNiimko); +$orgs = ContingentManager::getInstance()->getOrgs($dbOpendata); +$sites = Yaml::parse(file_get_contents(dirname(__FILE__) ."/sites.yaml")); +print_r($sites); + +for ($i = 0; $i < count($sites); $i++) { + // Нет URL сайта вуза + if (empty($sites[$i]['site'])) { + // $message = implode(' ', $sites[$i]); + // Logger::log($pathLogErrorHttp, $message); + // Logger::log($pathErrorHttp, implode(' ', $sites[$i])); + continue; + } + // Уже в базе + if (in_array($sites[$i]['org_id'], $orgs)) { + continue; + } + // С ошибками разметки игнорируем + // if (in_array($sites[$i]['org_id'], $exceptionsOrgHtml)) { + // continue; + // } + // Без ошибок http игнорируем + // if (!in_array($sites[$i]['org_id'], $exceptionsOrgHttpCurl)) { + // continue; + // } + print(($i+1). '. ' . implode(' ', $sites[$i]) . PHP_EOL); + $uri = trim(ContingentManager::getInstance()->buildBaseUri($sites[$i]['site'])); + $uri = str_replace("_","/", $uri); + if (substr($uri, -1) == '/') { + $uri = $uri."sveden/education/"; + } else { + $uri = $uri."/sveden/education/"; + } + echo $uri . PHP_EOL; + $ch = curl_init($uri); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($ch, CURLOPT_HEADER, false); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + $html = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + if ($httpCode != 200) { + $errno = curl_errno($ch); + $message = implode(' ', $sites[$i]); + $message .= " cURL error ({$errno}): ".curl_strerror($errno); + Logger::log($pathLogErrorHttp, $message); + unset($httpCode); + continue; + } + + curl_close($ch); + echo "HTTP-code: " . $httpCode . PHP_EOL; + if (empty($html)) continue; + + $parser = new ContingentParser($html, '//tr[@itemprop="eduChislen"]//'); + $contingent = $parser->getDataTable(); + // Добавляем поле spec_id по spec_code + ContingentManager::getInstance()->addSpecId($contingent, $specializations); + // Добавляем поле org_id + ContingentManager::getInstance()->addOrgId($contingent, $sites[$i]['org_id']); + + + if (empty($contingent)) { + echo "empty". PHP_EOL; + $message = implode(' ', $sites[$i]); + Logger::log($pathLogErrorHtml, $message); + } else { + print_r($contingent); + $set = ContingentManager::getInstance()->checkContingent($contingent); + if ($set) { + // Заносим в базу + ContingentManager::getInstance()->insertContingent($dbOpendata, $contingent); + } else { + $message = implode(' ', $sites[$i]); + Logger::log($pathLogErrorHtml, $message); + } + unset($contingent); + unset($httpCode); + } +}