Allow to configure CurlLinkChecker request headers

This commit is contained in:
Alexandre Saelens 2020-01-22 09:54:03 +13:00
parent 3746cb1368
commit eeb8060af6
2 changed files with 54 additions and 2 deletions

View File

@ -110,3 +110,19 @@ following config in config.yml.
SilverStripe\ExternalLinks\Tasks\CurlLinkChecker::
bypass_cache: 1
```
## Headers
You may want to set headers to be sent with the CURL request (eg: user-agent) to avoid website rejecting the request thinking it is a bot.
You can set them with the following config in config.yml.
```yaml
# Headers
SilverStripe\ExternalLinks\Tasks\CurlLinkChecker:
user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"
headers:
- "accept-encoding: gzip, deflate, br"
- "referer: https://www.domain.com/"
- "sec-fetch-mode: navigate"
...
```

View File

@ -31,6 +31,24 @@ class CurlLinkChecker implements LinkChecker
*/
private static $bypass_cache = false;
/**
* Set default user agent as config
* Override via YAML file
*
* * @config
* @var string
*/
private static $user_agent = '';
/**
* Allow to pass custom header to be in CURL request
*
* * @config
* @var array
*/
private static $headers = [];
/**
* Return cache
*
@ -66,11 +84,28 @@ class CurlLinkChecker implements LinkChecker
// No cached result so just request
$handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, true);
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
if ($this->config()->get('follow_location')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, true);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
// Add user agent
$userAgent = trim($this->config()->get('user_agent'));
if ($userAgent) {
curl_setopt($handle, CURLOPT_USERAGENT , $userAgent);
}
// Other headers
if ($headers = $this->config()->get('headers')) {
if (is_array($headers)) {
curl_setopt($handle, CURLOPT_HTTPHEADER , $headers);
} else {
curl_setopt($handle, CURLOPT_HTTPHEADER , array($headers));
}
}
// Retrieve http code
curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle);
@ -79,6 +114,7 @@ class CurlLinkChecker implements LinkChecker
// Cache result
$this->getCache()->set($cacheKey, $httpCode);
}
return $httpCode;
}
}