2008-11-19 00:10:33 +01:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Initial implementation of Sitemap support.
|
|
|
|
* GoogleSitemap should handle requests to 'sitemap.xml'
|
|
|
|
* the other two classes are used to render the sitemap.
|
|
|
|
*
|
|
|
|
* You can notify ("ping") Google about a changed sitemap
|
|
|
|
* automatically whenever a new page is published or unpublished.
|
|
|
|
* By default, Google is not notified, and will pick up your new
|
|
|
|
* sitemap whenever the GoogleBot visits your website.
|
|
|
|
*
|
|
|
|
* Enabling notification of Google after every publish (in your _config.php):
|
2011-06-11 11:49:35 +02:00
|
|
|
|
2011-04-26 11:17:49 +02:00
|
|
|
* <example>
|
2008-11-19 00:10:33 +01:00
|
|
|
* GoogleSitemap::enable_google_notificaton();
|
|
|
|
* </example>
|
|
|
|
*
|
|
|
|
* @see http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=34609
|
|
|
|
*
|
2009-08-12 05:55:21 +02:00
|
|
|
* @package googlesitemaps
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
class GoogleSitemap extends Controller {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $enabled = true;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var DataObjectSet
|
|
|
|
*/
|
|
|
|
protected $Pages;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $google_notification_enabled = false;
|
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
/**
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
protected static $use_show_in_search = true;
|
2011-06-07 20:37:38 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* List of DataObjects to show in sitemap.xml
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public static $google_sitemap_dataobjects = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* List of DataObjects change frequency
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public static $google_sitemap_dataobjects_changefreq = array();
|
2011-06-11 12:26:17 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* List of DataObjects priority
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public static $google_sitemap_dataobjects_priority = array();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2011-06-11 12:26:17 +02:00
|
|
|
/**
|
2011-06-07 20:37:38 +02:00
|
|
|
* Decorates the given DataObject with {@link GoogleSitemapDecorator}
|
|
|
|
* and pushes the class name to the registered DataObjects.
|
|
|
|
* Note that all registered DataObjects need the method AbsoluteLink().
|
|
|
|
*
|
2011-06-11 16:38:31 +02:00
|
|
|
* @param string $className name of DataObject to register
|
|
|
|
* @param string $changeFreq how often is this DataObject updated?
|
|
|
|
* Possible values:
|
|
|
|
* always, hourly, daily, weekly, monthly, yearly, never
|
|
|
|
* @param string $priority How important is this DataObject in comparison to other urls?
|
|
|
|
* Possible values: 0.1, 0.2 ... , 0.9, 1.0
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2011-06-11 16:38:31 +02:00
|
|
|
public static function register_dataobject($className, $changeFreq = 'monthly', $priority = '0.6') {
|
2011-06-07 20:37:38 +02:00
|
|
|
if (!self::is_registered($className)) {
|
|
|
|
Object::add_extension($className, 'GoogleSitemapDecorator');
|
|
|
|
self::$google_sitemap_dataobjects[] = $className;
|
2011-06-11 12:26:17 +02:00
|
|
|
if ($changeFreq === null) {
|
|
|
|
self::$google_sitemap_dataobjects_changefreq[] = "monthly";
|
|
|
|
} else {
|
|
|
|
self::$google_sitemap_dataobjects_changefreq[] = $changeFreq;
|
|
|
|
}
|
|
|
|
self::$google_sitemap_dataobjects_priority[] = $priority;
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks whether the given class name is already registered or not.
|
|
|
|
*
|
|
|
|
* @param string $className Name of DataObject to check
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
|
|
|
public static function is_registered($className) {
|
|
|
|
return in_array($className, self::$google_sitemap_dataobjects);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds DataObjects to the existing DataObjectSet with pages from the
|
|
|
|
* site tree
|
|
|
|
*
|
|
|
|
* @param DataObjectSet $newPages
|
|
|
|
*
|
|
|
|
* @return DataObjectSet
|
|
|
|
*/
|
|
|
|
protected function addRegisteredDataObjects() {
|
|
|
|
$output = new DataObjectSet();
|
|
|
|
|
|
|
|
foreach(self::$google_sitemap_dataobjects as $index => $className) {
|
|
|
|
$dataObjectSet = DataObject::get($className);
|
|
|
|
|
|
|
|
if($dataObjectSet) {
|
|
|
|
foreach($dataObjectSet as $dataObject) {
|
2011-06-11 12:26:17 +02:00
|
|
|
if($dataObject->canView()) {
|
2011-06-07 20:37:38 +02:00
|
|
|
$dataObject->ChangeFreq = self::$google_sitemap_dataobjects_changefreq[$index];
|
|
|
|
|
|
|
|
if(!isset($dataObject->Priority)) {
|
2011-06-11 12:26:17 +02:00
|
|
|
$dataObject->Priority = self::$google_sitemap_dataobjects_priority[$index];
|
2011-06-07 20:37:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$output->push($dataObject);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-08-12 09:44:32 +02:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
return $output;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns all the links to {@link SiteTree} pages and
|
|
|
|
* {@link DataObject} urls on the page
|
|
|
|
*
|
|
|
|
* @return DataObjectSet
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
public function Items() {
|
2009-08-12 09:44:32 +02:00
|
|
|
$filter = '';
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2010-03-23 05:04:32 +01:00
|
|
|
$bt = defined('DB::USE_ANSI_SQL') ? "\"" : "`";
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
if(self::$use_show_in_search) {
|
2010-03-23 05:04:32 +01:00
|
|
|
$filter = "{$bt}ShowInSearch{$bt} = 1";
|
2009-08-12 09:44:32 +02:00
|
|
|
}
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
$this->Pages = Versioned::get_by_stage('SiteTree', 'Live', $filter);
|
2008-11-19 00:10:33 +01:00
|
|
|
|
|
|
|
$newPages = new DataObjectSet();
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
if($this->Pages) {
|
|
|
|
foreach($this->Pages as $page) {
|
2011-06-07 20:37:38 +02:00
|
|
|
// Only include pages from this host and pages which are not an
|
|
|
|
// instance of ErrorPage. We prefix $_SERVER['HTTP_HOST'] with
|
|
|
|
// 'http://' so that parse_url to help parse_url identify the
|
|
|
|
// host name component; we could use another protocol (like ftp
|
|
|
|
// as the prefix and the code would work the same.
|
|
|
|
$pageHttp = parse_url($page->AbsoluteLink(), PHP_URL_HOST);
|
|
|
|
$hostHttp = parse_url('http://' . $_SERVER['HTTP_HOST'], PHP_URL_HOST);
|
|
|
|
|
|
|
|
if(($pageHttp == $hostHttp) && !($page instanceof ErrorPage)) {
|
2009-08-12 09:44:32 +02:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
// If the page has been set to 0 priority, we set a flag so
|
|
|
|
// it won't be included
|
2009-08-12 09:44:32 +02:00
|
|
|
if($page->canView() && (!isset($page->Priority) || $page->Priority > 0)) {
|
2011-06-07 20:19:04 +02:00
|
|
|
// The one field that isn't easy to deal with in the template is
|
|
|
|
// Change frequency, so we set that here.
|
2011-06-11 11:49:35 +02:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
$date = date('Y-m-d H:i:s');
|
|
|
|
|
|
|
|
$prop = $page->toMap();
|
2011-06-07 20:19:04 +02:00
|
|
|
$created = new SS_Datetime();
|
2011-06-07 20:37:38 +02:00
|
|
|
$created->value = (isset($prop['Created'])) ? $prop['Created'] : $date;
|
2010-06-22 23:49:25 +02:00
|
|
|
|
2010-02-02 23:28:11 +01:00
|
|
|
$now = new SS_Datetime();
|
2011-06-07 20:37:38 +02:00
|
|
|
$now->value = $date;
|
|
|
|
$versions = (isset($prop['Version'])) ? $prop['Version'] : 1;
|
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
$timediff = $now->format('U') - $created->format('U');
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
// Check how many revisions have been made over the lifetime of the
|
|
|
|
// Page for a rough estimate of it's changing frequency.
|
|
|
|
$period = $timediff / ($versions + 1);
|
2011-06-07 20:37:38 +02:00
|
|
|
|
|
|
|
if($period > 60*60*24*365) {
|
|
|
|
// > 1 year
|
|
|
|
$page->ChangeFreq = 'yearly';
|
|
|
|
}
|
|
|
|
elseif($period > 60*60*24*30) {
|
|
|
|
$page->ChangeFreq = 'monthly';
|
|
|
|
}
|
|
|
|
elseif($period > 60*60*24*7) {
|
|
|
|
// > 1 week
|
|
|
|
$page->ChangeFreq = 'weekly';
|
|
|
|
}
|
|
|
|
elseif($period > 60*60*24) {
|
|
|
|
// > 1 day
|
|
|
|
$page->ChangeFreq = 'daily';
|
|
|
|
}
|
|
|
|
elseif($period > 60*60) {
|
|
|
|
// > 1 hour
|
|
|
|
$page->ChangeFreq = 'hourly';
|
|
|
|
} else {
|
|
|
|
// < 1 hour
|
|
|
|
$page->ChangeFreq = 'always';
|
2009-08-12 09:44:32 +02:00
|
|
|
}
|
2011-06-07 20:37:38 +02:00
|
|
|
|
2009-08-12 09:44:32 +02:00
|
|
|
$newPages->push($page);
|
|
|
|
}
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
}
|
2011-06-11 11:49:35 +02:00
|
|
|
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
2011-06-07 20:37:38 +02:00
|
|
|
|
|
|
|
$newPages->merge($this->addRegisteredDataObjects());
|
|
|
|
|
|
|
|
return $newPages;
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Notifies Google about changes to your sitemap.
|
|
|
|
* Triggered automatically on every publish/unpublish of a page.
|
|
|
|
* This behaviour is disabled by default, enable with:
|
|
|
|
* GoogleSitemap::enable_google_notificaton();
|
|
|
|
*
|
|
|
|
* If the site is in "dev-mode", no ping will be sent regardless wether
|
|
|
|
* the Google notification is enabled.
|
|
|
|
*
|
|
|
|
* @return string Response text
|
|
|
|
*/
|
|
|
|
static function ping() {
|
|
|
|
if(!self::$enabled) return false;
|
|
|
|
|
|
|
|
//Don't ping if the site has disabled it, or if the site is in dev mode
|
|
|
|
if(!GoogleSitemap::$google_notification_enabled || Director::isDev())
|
|
|
|
return;
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
$location = urlencode(Controller::join_links(
|
|
|
|
Director::absoluteBaseURL(),
|
|
|
|
'sitemap.xml'
|
|
|
|
));
|
2008-11-19 00:10:33 +01:00
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
$response = HTTP::sendRequest(
|
|
|
|
"www.google.com",
|
|
|
|
"/webmasters/sitemaps/ping",
|
|
|
|
sprintf("sitemap=%s", $location)
|
|
|
|
);
|
2008-11-19 00:10:33 +01:00
|
|
|
|
|
|
|
return $response;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Enable pings to google.com whenever sitemap changes.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
public static function enable_google_notification() {
|
|
|
|
self::$google_notification_enabled = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Disables pings to google when the sitemap changes.
|
2011-06-07 20:37:38 +02:00
|
|
|
*
|
|
|
|
* @return void
|
2008-11-19 00:10:33 +01:00
|
|
|
*/
|
|
|
|
public static function disable_google_notification() {
|
|
|
|
self::$google_notification_enabled = false;
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
|
|
|
* Default controller handler for the sitemap.xml file
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
function index($url) {
|
|
|
|
if(self::$enabled) {
|
2008-12-05 03:47:07 +01:00
|
|
|
SSViewer::set_source_file_comments(false);
|
2009-04-27 01:03:56 +02:00
|
|
|
$this->getResponse()->addHeader('Content-Type', 'application/xml; charset="utf-8"');
|
2008-11-19 00:10:33 +01:00
|
|
|
|
|
|
|
// But we want to still render.
|
|
|
|
return array();
|
|
|
|
} else {
|
2010-05-04 07:37:07 +02:00
|
|
|
return new SS_HTTPResponse('Not allowed', 405);
|
2008-11-19 00:10:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
|
|
|
* Enable the sitemap.xml file
|
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
public static function enable() {
|
|
|
|
self::$enabled = true;
|
|
|
|
}
|
|
|
|
|
2011-06-07 20:37:38 +02:00
|
|
|
/**
|
|
|
|
* Disable the sitemap.xml file
|
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-19 00:10:33 +01:00
|
|
|
public static function disable() {
|
|
|
|
self::$enabled = false;
|
|
|
|
}
|
2011-06-07 20:19:04 +02:00
|
|
|
}
|