2008-11-19 00:10:33 +01:00
|
|
|
<?php
|
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Sitemaps are a way to tell Google about pages on your site that they might
|
|
|
|
* not otherwise discover. In its simplest terms, a XML Sitemap—usually called
|
|
|
|
* Sitemap, with a capital S—is a list of the pages on your website. Creating
|
|
|
|
* and submitting a Sitemap helps make sure that Google knows about all the
|
|
|
|
* pages on your site, including URLs that may not be discoverable by Google's
|
|
|
|
* normal crawling process.
|
|
|
|
*
|
2008-11-19 00:10:33 +01:00
|
|
|
* GoogleSitemap should handle requests to 'sitemap.xml'
|
|
|
|
* the other two classes are used to render the sitemap.
|
|
|
|
*
|
|
|
|
* You can notify ("ping") Google about a changed sitemap
|
|
|
|
* automatically whenever a new page is published or unpublished.
|
|
|
|
* By default, Google is not notified, and will pick up your new
|
|
|
|
* sitemap whenever the GoogleBot visits your website.
|
|
|
|
*
|
|
|
|
* Enabling notification of Google after every publish (in your _config.php):
|
2011-06-13 01:16:07 +02:00
|
|
|
*
|
2011-04-26 11:17:49 +02:00
|
|
|
* <example>
|
2008-11-19 00:10:33 +01:00
|
|
|
* GoogleSitemap::enable_google_notificaton();
|
|
|
|
* </example>
|
|
|
|
*
|
|
|
|
* @see http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=34609
|
|
|
|
*
|
2009-08-12 05:55:21 +02:00
|
|
|
* @package googlesitemaps
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
class GoogleSitemap extends Controller {
|
|
|
|
|
2012-06-30 11:39:35 +02:00
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public static $allowed_actions = array(
|
|
|
|
'index'
|
|
|
|
);
|
|
|
|
|
2008-11-19 00:10:33 +01:00
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $enabled = true;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $google_notification_enabled = false;
|
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $use_show_in_search = true;
|
2011-06-07 20:37:38 +02:00
|
|
|
|
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* List of DataObject class names to include. As well as the change
|
|
|
|
* frequency and priority of each class.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
2012-07-06 06:44:26 +02:00
|
|
|
private static $dataobjects = array();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2011-06-13 01:16:07 +02:00
|
|
|
/**
|
2011-06-07 20:37:38 +02:00
|
|
|
* Decorates the given DataObject with {@link GoogleSitemapDecorator}
|
|
|
|
* and pushes the class name to the registered DataObjects.
|
|
|
|
* Note that all registered DataObjects need the method AbsoluteLink().
|
|
|
|
*
|
2011-06-11 16:38:31 +02:00
|
|
|
* @param string $className name of DataObject to register
|
|
|
|
* @param string $changeFreq how often is this DataObject updated?
|
2011-06-13 01:16:07 +02:00
|
|
|
* Possible values:
|
|
|
|
* always, hourly, daily, weekly, monthly, yearly, never
|
|
|
|
* @param string $priority How important is this DataObject in comparison to other urls?
|
|
|
|
* Possible values: 0.1, 0.2 ... , 0.9, 1.0
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2011-06-11 16:38:31 +02:00
|
|
|
public static function register_dataobject($className, $changeFreq = 'monthly', $priority = '0.6') {
|
2011-06-07 20:37:38 +02:00
|
|
|
if (!self::is_registered($className)) {
|
|
|
|
Object::add_extension($className, 'GoogleSitemapDecorator');
|
2011-06-13 01:16:07 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
self::$dataobjects[$className] = array(
|
|
|
|
'frequency' => ($changeFreq) ? $changeFreq : 'monthly',
|
|
|
|
'priority' => ($priority) ? $priority : '0.6'
|
|
|
|
);
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
|
|
|
}
|
2012-09-06 12:20:44 +02:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
|
|
|
* Checks whether the given class name is already registered or not.
|
|
|
|
*
|
|
|
|
* @param string $className Name of DataObject to check
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
|
|
|
public static function is_registered($className) {
|
2012-07-06 06:44:26 +02:00
|
|
|
return isset(self::$dataobjects[$className]);
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
2012-09-06 12:20:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Unregisters a class from the sitemap. Mostly used for the test suite
|
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
*/
|
|
|
|
public static function unregister_dataobject($className) {
|
|
|
|
unset(self::$dataobjects[$className]);
|
|
|
|
}
|
2011-06-07 20:37:38 +02:00
|
|
|
|
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Returns a list containing each viewable {@link DataObject} instance of
|
|
|
|
* the registered class names.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
2012-03-26 11:50:30 +02:00
|
|
|
* @return ArrayList
|
2011-06-07 20:37:38 +02:00
|
|
|
*/
|
2012-07-06 06:44:26 +02:00
|
|
|
protected function getDataObjects() {
|
2012-03-26 11:50:30 +02:00
|
|
|
$output = new ArrayList();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
foreach(self::$dataobjects as $class => $config) {
|
|
|
|
$instances = new DataList($class);
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
if($instances) {
|
|
|
|
foreach($instances as $obj) {
|
|
|
|
if($obj->canView()) {
|
|
|
|
$obj->ChangeFreq = $config['frequency'];
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
if(!isset($obj->Priority)) {
|
|
|
|
$obj->Priority = $config['priority'];
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
$output->push($obj);
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-08-12 09:44:32 +02:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
return $output;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Returns a list containing each viewable {@link SiteTree} instance. If
|
|
|
|
* you wish to exclude a particular class from the sitemap, simply set
|
|
|
|
* the priority of the class to -1.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
2012-07-06 06:44:26 +02:00
|
|
|
* @return ArrayList
|
2011-06-07 20:37:38 +02:00
|
|
|
*/
|
2012-07-06 06:44:26 +02:00
|
|
|
protected function getPages() {
|
|
|
|
if(!class_exists('SiteTree')) return new ArrayList();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
$filter = (self::$use_show_in_search) ? "\"ShowInSearch\" = 1" : "";
|
|
|
|
$pages = Versioned::get_by_stage('SiteTree', 'Live', $filter);
|
|
|
|
$output = new ArrayList();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2011-06-20 15:56:01 +02:00
|
|
|
if($pages) {
|
|
|
|
foreach($pages as $page) {
|
2011-06-07 20:37:38 +02:00
|
|
|
$pageHttp = parse_url($page->AbsoluteLink(), PHP_URL_HOST);
|
|
|
|
$hostHttp = parse_url('http://' . $_SERVER['HTTP_HOST'], PHP_URL_HOST);
|
|
|
|
|
|
|
|
if(($pageHttp == $hostHttp) && !($page instanceof ErrorPage)) {
|
2009-08-12 09:44:32 +02:00
|
|
|
if($page->canView() && (!isset($page->Priority) || $page->Priority > 0)) {
|
2012-07-06 06:44:26 +02:00
|
|
|
$output->push($page);
|
2009-08-12 09:44:32 +02:00
|
|
|
}
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
return $output;
|
|
|
|
}
|
2012-06-30 11:42:50 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
/**
|
|
|
|
* Constructs the list of data to include in the rendered sitemap. Links
|
|
|
|
* can include pages from the website, dataobjects (such as forum posts)
|
|
|
|
* as well as custom registered paths.
|
|
|
|
*
|
|
|
|
* @return ArrayList
|
|
|
|
*/
|
|
|
|
public function Items() {
|
|
|
|
$output = new ArrayList();
|
|
|
|
$output->merge($this->getPages());
|
|
|
|
$output->merge($this->getDataObjects());
|
|
|
|
|
|
|
|
$this->extend('updateItems', $output);
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
return $output;
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Notifies Google about changes to your sitemap. This behavior is disabled
|
|
|
|
* by default, enable with:
|
2011-06-20 15:56:01 +02:00
|
|
|
*
|
|
|
|
* <code>
|
2008-11-19 00:10:33 +01:00
|
|
|
* GoogleSitemap::enable_google_notificaton();
|
2011-06-20 15:56:01 +02:00
|
|
|
* </code>
|
|
|
|
*
|
2012-07-06 06:44:26 +02:00
|
|
|
* After notifications have been enabled, every publish / unpublish of a page.
|
|
|
|
* will notify Google of the update.
|
|
|
|
*
|
|
|
|
* If the site is in development mode no ping will be sent regardless whether
|
2008-11-19 00:10:33 +01:00
|
|
|
* the Google notification is enabled.
|
|
|
|
*
|
|
|
|
* @return string Response text
|
|
|
|
*/
|
2012-07-06 06:44:26 +02:00
|
|
|
public static function ping() {
|
2008-11-19 00:10:33 +01:00
|
|
|
if(!self::$enabled) return false;
|
|
|
|
|
2012-07-06 06:44:26 +02:00
|
|
|
// Don't ping if the site has disabled it, or if the site is in dev mode
|
|
|
|
if(!GoogleSitemap::$google_notification_enabled || Director::isDev()) {
|
2008-11-19 00:10:33 +01:00
|
|
|
return;
|
2012-07-06 06:44:26 +02:00
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
$location = urlencode(Controller::join_links(
|
|
|
|
Director::absoluteBaseURL(),
|
|
|
|
'sitemap.xml'
|
|
|
|
));
|
2008-11-19 00:10:33 +01:00
|
|
|
|
2012-09-18 15:45:23 +02:00
|
|
|
$response = self::send_ping(
|
|
|
|
"www.google.com", "/webmasters/sitemaps/ping", sprintf("sitemap=%s", $location)
|
2011-06-07 20:37:38 +02:00
|
|
|
);
|
2012-09-18 15:45:23 +02:00
|
|
|
|
2008-11-19 00:10:33 +01:00
|
|
|
return $response;
|
|
|
|
}
|
2012-09-18 15:45:23 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Send an HTTP request to the host.
|
|
|
|
*
|
|
|
|
* @return String Response text
|
|
|
|
*/
|
|
|
|
protected static function send_ping($host, $path, $query) {
|
|
|
|
$socket = fsockopen($host, $port, $errno, $error);
|
|
|
|
if (!$socket) {
|
|
|
|
return $error;
|
|
|
|
}
|
|
|
|
if ($query) {
|
|
|
|
$query = '?' . $query;
|
|
|
|
}
|
|
|
|
$request = "GET {$path}{$query} HTTP/1.1\r\nHost: $host\r\nConnection: Close\r\n\r\n";
|
|
|
|
fwrite($socket, $request);
|
|
|
|
$response = stream_get_contents($socket);
|
|
|
|
|
|
|
|
return $response;
|
|
|
|
}
|
|
|
|
|
2008-11-19 00:10:33 +01:00
|
|
|
/**
|
|
|
|
* Enable pings to google.com whenever sitemap changes.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
public static function enable_google_notification() {
|
|
|
|
self::$google_notification_enabled = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Disables pings to google when the sitemap changes.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
public static function disable_google_notification() {
|
|
|
|
self::$google_notification_enabled = false;
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
|
|
|
* Default controller handler for the sitemap.xml file
|
|
|
|
*/
|
2012-07-06 06:44:26 +02:00
|
|
|
public function index($url) {
|
2008-11-19 00:10:33 +01:00
|
|
|
if(self::$enabled) {
|
2008-12-05 03:47:07 +01:00
|
|
|
SSViewer::set_source_file_comments(false);
|
2012-03-26 11:50:30 +02:00
|
|
|
|
2009-04-27 01:03:56 +02:00
|
|
|
$this->getResponse()->addHeader('Content-Type', 'application/xml; charset="utf-8"');
|
2008-11-19 00:10:33 +01:00
|
|
|
|
|
|
|
// But we want to still render.
|
|
|
|
return array();
|
|
|
|
} else {
|
2011-06-20 15:56:01 +02:00
|
|
|
return new SS_HTTPResponse('Page not found', 404);
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Enable Google Sitemap support. Requests to the sitemap.xml route will
|
|
|
|
* result in an XML sitemap being provided.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
public static function enable() {
|
|
|
|
self::$enabled = true;
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
2012-07-06 06:44:26 +02:00
|
|
|
* Disable Google Sitemap support. Any requests to the sitemap.xml route
|
|
|
|
* will produce a 404 response.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
public static function disable() {
|
|
|
|
self::$enabled = false;
|
2011-06-20 15:56:01 +02:00
|
|
|
}
|
2011-06-07 20:19:04 +02:00
|
|
|
}
|