FIX Allow to configure CurlLinkChecker request headers (#64)

* Allow to configure CurlLinkChecker request headers
* Make adding headers more explicit via array
* Remove extra character in comment
* Fix lint
* Align headers array with php docs
This commit is contained in:
Alex Saelens 2020-07-07 10:25:25 +12:00 committed by GitHub
parent 05ded71c02
commit 9282ed2e2d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 2 deletions

View File

@ -110,3 +110,19 @@ following config in config.yml.
SilverStripe\ExternalLinks\Tasks\CurlLinkChecker::
bypass_cache: 1
```
## Headers
You may want to set headers to be sent with the CURL request (eg: user-agent) to avoid website rejecting the request thinking it is a bot.
You can set them with the following config in config.yml.
```yaml
# Headers
SilverStripe\ExternalLinks\Tasks\CurlLinkChecker:
headers:
- 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0'
- 'accept-encoding: gzip, deflate, br'
- 'referer: https://www.domain.com/'
- 'sec-fetch-mode: navigate'
...
```

View File

@ -31,6 +31,14 @@ class CurlLinkChecker implements LinkChecker
*/
private static $bypass_cache = false;
/**
* Allow to pass custom header to be in CURL request
*
* @config
* @var array
*/
private static $headers = [];
/**
* Return cache
*
@ -66,11 +74,19 @@ class CurlLinkChecker implements LinkChecker
// No cached result so just request
$handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, true);
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
if ($this->config()->get('follow_location')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, true);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
// Add headers
$headers = (array) $this->config()->get('headers');
if (!empty($headers)) {
curl_setopt($handle, CURLOPT_HTTPHEADER, $headers);
}
// Retrieve http code
curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle);
@ -79,6 +95,7 @@ class CurlLinkChecker implements LinkChecker
// Cache result
$this->getCache()->set($cacheKey, $httpCode);
}
return $httpCode;
}
}