API Implement sitemap.xml partitioning (Fixes #9)

Misc upgrade of module code so that site map.xml provides a index site map file based on the standards. Moved configuration vars to the Config API.

Considering how large a change this is, I've branched a 1.0 release off in github.
This commit is contained in:
Will Rossiter 2013-01-15 22:29:59 +13:00
parent 90f3c82660
commit 8bbc14ee23
16 changed files with 591 additions and 349 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.DS_Store

View File

@ -2,7 +2,7 @@
// add the extension to pages // add the extension to pages
if (class_exists('SiteTree')) { if (class_exists('SiteTree')) {
Object::add_extension('SiteTree', 'GoogleSitemapSiteTreeDecorator'); Object::add_extension('SiteTree', 'GoogleSitemapSiteTreeExtension');
} }
// if you need to add this to DataObjects include the following in // if you need to add this to DataObjects include the following in

View File

@ -0,0 +1,8 @@
---
Name: googlesitemaps
---
GoogleSitemap:
enabled: true
objects_per_sitemap: 1000
google_notification_enabled: false
use_show_in_search: true

View File

@ -3,4 +3,4 @@ Name: googlesitemaproutes
--- ---
Director: Director:
rules: rules:
'sitemap.xml': 'GoogleSitemap' 'sitemap.xml': 'GoogleSitemapController'

View File

@ -1,13 +1,14 @@
<?php <?php
/** /**
* Sitemaps are a way to tell Google about pages on your site that they might * Sitemaps are a way to tell Google about pages on your site that they might
* not otherwise discover. In its simplest terms, a XML Sitemap—usually called * not otherwise discover. In its simplest terms, a XML Sitemap usually called
* Sitemap, with a capital S—is a list of the pages on your website. Creating * a Sitemap, with a capital S—is a list of the pages on your website.
* and submitting a Sitemap helps make sure that Google knows about all the
* pages on your site, including URLs that may not be discoverable by Google's
* normal crawling process.
* *
* GoogleSitemap should handle requests to 'sitemap.xml' * Creating and submitting a Sitemap helps make sure that Google knows about
* all the pages on your site, including URLs that may not be discoverable by
* Google's normal crawling process.
*
* The GoogleSitemap handle requests to 'sitemap.xml'
* the other two classes are used to render the sitemap. * the other two classes are used to render the sitemap.
* *
* You can notify ("ping") Google about a changed sitemap * You can notify ("ping") Google about a changed sitemap
@ -25,29 +26,7 @@
* *
* @package googlesitemaps * @package googlesitemaps
*/ */
class GoogleSitemap extends Controller { class GoogleSitemap {
/**
* @var array
*/
public static $allowed_actions = array(
'index'
);
/**
* @var boolean
*/
protected static $enabled = true;
/**
* @var boolean
*/
protected static $google_notification_enabled = false;
/**
* @var boolean
*/
protected static $use_show_in_search = true;
/** /**
* List of DataObject class names to include. As well as the change * List of DataObject class names to include. As well as the change
@ -73,7 +52,7 @@ class GoogleSitemap extends Controller {
*/ */
public static function register_dataobject($className, $changeFreq = 'monthly', $priority = '0.6') { public static function register_dataobject($className, $changeFreq = 'monthly', $priority = '0.6') {
if (!self::is_registered($className)) { if (!self::is_registered($className)) {
Object::add_extension($className, 'GoogleSitemapDecorator'); Object::add_extension($className, 'GoogleSitemapExtension');
self::$dataobjects[$className] = array( self::$dataobjects[$className] = array(
'frequency' => ($changeFreq) ? $changeFreq : 'monthly', 'frequency' => ($changeFreq) ? $changeFreq : 'monthly',
@ -103,96 +82,150 @@ class GoogleSitemap extends Controller {
} }
/** /**
* Clears registered dataobjects. Useful for unit tests. * Clears registered {@link DataObjects}. Useful for unit tests.
*/ */
public static function clear_registered_dataobjects() { public static function clear_registered_dataobjects() {
self::$dataobjects = array(); self::$dataobjects = array();
} }
/**
* Returns a list containing each viewable {@link DataObject} instance of
* the registered class names.
*
* @return ArrayList
*/
protected function getDataObjects() {
$output = new ArrayList();
foreach(self::$dataobjects as $class => $config) {
$instances = new DataList($class);
if($instances) {
foreach($instances as $obj) {
if($obj->canView()) {
$obj->ChangeFreq = $config['frequency'];
if(!isset($obj->Priority)) {
$obj->Priority = $config['priority'];
}
$output->push($obj);
}
}
}
}
return $output;
}
/**
* Returns a list containing each viewable {@link SiteTree} instance. If
* you wish to exclude a particular class from the sitemap, simply set
* the priority of the class to -1.
*
* @return ArrayList
*/
protected function getPages() {
if(!class_exists('SiteTree')) return new ArrayList();
$filter = (self::$use_show_in_search) ? "\"ShowInSearch\" = 1" : "";
$pages = Versioned::get_by_stage('SiteTree', 'Live', $filter);
$output = new ArrayList();
if($pages) {
foreach($pages as $page) {
$pageHttp = parse_url($page->AbsoluteLink(), PHP_URL_HOST);
$hostHttp = parse_url('http://' . $_SERVER['HTTP_HOST'], PHP_URL_HOST);
if(($pageHttp == $hostHttp) && !($page instanceof ErrorPage)) {
if($page->canView() && (!isset($page->Priority) || $page->Priority > 0)) {
$output->push($page);
}
}
}
}
return $output;
}
/** /**
* Constructs the list of data to include in the rendered sitemap. Links * Constructs the list of data to include in the rendered sitemap. Links
* can include pages from the website, dataobjects (such as forum posts) * can include pages from the website, dataobjects (such as forum posts)
* as well as custom registered paths. * as well as custom registered paths.
* *
* @param string
* @param int
*
* @return ArrayList * @return ArrayList
*/ */
public function Items() { public static function get_items($class, $page = 1) {
$output = new ArrayList(); $output = new ArrayList();
$output->merge($this->getPages()); $count = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
$output->merge($this->getDataObjects()); $filter = Config::inst()->get('GoogleSitemap', 'use_show_in_search');
$this->extend('updateItems', $output); if($class == "SiteTree") {
$filter = ($filter) ? "\"ShowInSearch\" = 1" : "";
$instances = Versioned::get_by_stage('SiteTree', 'Live', $filter);
}
else {
$instances = new DataList($class);
}
$instances = $instances->limit(
$count,
($page - 1) * $count
);
if($instances) {
foreach($instances as $obj) {
if($obj->canIncludeInGoogleSitemap()) {
$output->push($obj);
}
}
}
return $output; return $output;
} }
/** /**
* Notifies Google about changes to your sitemap. This behavior is disabled * Returns the string frequency of edits for a particular dataobject class.
* by default, enable with:
* *
* <code> * Frequency for {@link SiteTree} objects can be determined from the version
* GoogleSitemap::enable_google_notificaton(); * history.
* </code> *
* @param string
*
* @return string
*/
public static function get_frequency_for_class($class) {
foreach(self::$dataobjects as $type => $config) {
if($class == $type) {
return $config['frequency'];
}
}
}
/**
* Returns the default priority of edits for a particular dataobject class.
*
* @param string
*
* @return float
*/
public static function get_priority_for_class($class) {
foreach(self::$dataobjects as $type => $config) {
if($class == $type) {
return $config['priority'];
}
}
return 0.5;
}
/**
* The google site map is broken down into multiple smaller files to
* prevent overbearing a server. By default separate {@link DataObject}
* records are keep in separate files and broken down into chunks.
*
* @return ArrayList
*/
public static function get_sitemaps() {
$countPerFile = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
$sitemaps = new ArrayList();
$filter = Config::inst()->get('GoogleSitemap', 'use_show_in_search');
if(class_exists('SiteTree')) {
$filter = ($filter) ? "\"ShowInSearch\" = 1" : "";
$instances = Versioned::get_by_stage('SiteTree', 'Live', $filter);
$count = $instances->count();
$neededForPage = ceil($count / $countPerFile);
for($i = 1; $i <= $neededForPage; $i++) {
$sliced = $instances
->limit($countPerFile, ($i - 1) * $countPerFile)
->last();
$lastModified = ($sliced) ? $sliced->dbObject('LastEdited')->Format('Y-m-d') : date('Y-m-d');
$sitemaps->push(new ArrayData(array(
'ClassName' => 'SiteTree',
'LastModified' => $lastModified,
'Page' => $i
)));
}
}
if(self::$dataobjects) {
foreach(self::$dataobjects as $class => $config) {
$list = new DataList($class);
$list = $list->sort('LastEdited ASC');
$neededForClass = ceil($list->count() / $countPerFile);
for($i = 1; $i <= $neededForClass; $i++) {
// determine the last modified date for this slice
$sliced = $list
->limit($countPerFile, ($i - 1) * $countPerFile)
->last();
$lastModified = ($sliced) ? $sliced->dbObject('LastEdited')->Format('Y-m-d') : date('Y-m-d');
$sitemaps->push(new ArrayData(array(
'ClassName' => $class,
'Page' => $i,
'LastModified' => $lastModified
)));
}
}
}
return $sitemaps;
}
/**
* Notifies Google about changes to your sitemap. This behavior is disabled
* by default, to enable, read the documentation provided in the docs folder.
* *
* After notifications have been enabled, every publish / unpublish of a page. * After notifications have been enabled, every publish / unpublish of a page.
* will notify Google of the update. * will notify Google of the update.
@ -203,10 +236,14 @@ class GoogleSitemap extends Controller {
* @return string Response text * @return string Response text
*/ */
public static function ping() { public static function ping() {
if(!self::$enabled) return false; if(!self::$enabled) {
return false;
}
// Don't ping if the site has disabled it, or if the site is in dev mode // Don't ping if the site has disabled it, or if the site is in dev mode
if(!GoogleSitemap::$google_notification_enabled || Director::isDev()) { $active = Config::inst()->get('GoogleSitemap', 'google_notification_enabled');
if(!$active || Director::isDev()) {
return; return;
} }
@ -248,51 +285,60 @@ class GoogleSitemap extends Controller {
* @return void * @return void
*/ */
public static function enable_google_notification() { public static function enable_google_notification() {
self::$google_notification_enabled = true; Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
Config::inst()->remove('GoogleSitemap', 'google_notification_enabled');
Config::inst()->update('GoogleSitemap', 'google_notification_enabled', true);
} }
/** /**
* Disables pings to google when the sitemap changes. * Disables pings to google when the sitemap changes.
* *
* @deprecated 1.1
* @return void * @return void
*/ */
public static function disable_google_notification() { public static function disable_google_notification() {
self::$google_notification_enabled = false; Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
Config::inst()->remove('GoogleSitemap', 'google_notification_enabled');
Config::inst()->update('GoogleSitemap', 'google_notification_enabled', false);
} }
/**
* Default controller handler for the sitemap.xml file
*/
public function index($url) {
if(self::$enabled) {
SSViewer::set_source_file_comments(false);
$this->getResponse()->addHeader('Content-Type', 'application/xml; charset="utf-8"');
// But we want to still render.
return array();
} else {
return new SS_HTTPResponse('Page not found', 404);
}
}
/** /**
* Enable Google Sitemap support. Requests to the sitemap.xml route will * Enable Google Sitemap support. Requests to the sitemap.xml route will
* result in an XML sitemap being provided. * result in an XML sitemap being provided.
* *
* @deprecated 1.1
* @return void * @return void
*/ */
public static function enable() { public static function enable() {
self::$enabled = true; Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
Config::inst()->remove('GoogleSitemap', 'enabled');
Config::inst()->update('GoogleSitemap', 'enabled', true);
}
/**
* Is GoogleSitemap enabled?
*
* @return boolean
*/
public static function enabled() {
return (Config::inst()->get('GoogleSitemap', 'enabled', Config::INHERITED));
} }
/** /**
* Disable Google Sitemap support. Any requests to the sitemap.xml route * Disable Google Sitemap support. Any requests to the sitemap.xml route
* will produce a 404 response. * will produce a 404 response.
* *
* @deprecated 1,1
* @return void * @return void
*/ */
public static function disable() { public static function disable() {
self::$enabled = false; Deprecation::notice('1.1', 'GoogleSitemap::disable() is deprecated. Please use Config API instead. See documentation.');
Config::inst()->remove('GoogleSitemap', 'enabled');
Config::inst()->update('GoogleSitemap', 'enabled', false);
} }
} }

View File

@ -1,143 +0,0 @@
<?php
/**
* Decorate the page object to provide google sitemaps with
* additionally options and configuration.
*
* @package googlesitemaps
*/
class GoogleSitemapDecorator extends DataExtension {
}
/**
* @package googlesitemaps
*/
class GoogleSitemapSiteTreeDecorator extends DataExtension {
/**
* @var array
*/
public static $db = array(
"Priority" => "Varchar(5)"
);
/**
* @param FieldList
*/
public function updateSettingsFields(&$fields) {
$prorities = array(
'' => _t('SiteTree.PRIORITYAUTOSET', 'Auto-set based on page depth'),
'-1' => _t('SiteTree.PRIORITYNOTINDEXED', "Not indexed"), // We set this to -ve one because a blank value implies auto-generation of Priority
'1.0' => '1 - ' . _t('SiteTree.PRIORITYMOSTIMPORTANT', "Most important"),
'0.9' => '2',
'0.8' => '3',
'0.7' => '4',
'0.6' => '5',
'0.5' => '6',
'0.4' => '7',
'0.3' => '8',
'0.2' => '9',
'0.1' => '10 - ' . _t('SiteTree.PRIORITYLEASTIMPORTANT', "Least important")
);
$tabset = $fields->findOrMakeTab('Root.Settings');
$message = "<p>";
$message .= sprintf(_t('SiteTree.METANOTEPRIORITY', "Manually specify a Google Sitemaps priority for this page (%s)"),
'<a href="http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=71936#prioritize" target="_blank">?</a>'
);
$message .= "</p>";
$tabset->push(new Tab('GoogleSitemap', _t('SiteTree.TABGOOGLESITEMAP', 'Google Sitemap'),
new LiteralField("GoogleSitemapIntro", $message),
new DropdownField("Priority", $this->owner->fieldLabel('Priority'), $prorities)
));
}
public function updateFieldLabels(&$labels) {
parent::updateFieldLabels($labels);
$labels['Priority'] = _t('SiteTree.METAPAGEPRIO', "Page Priority");
}
/**
* @return void
*/
public function onAfterPublish() {
GoogleSitemap::ping();
}
/**
* @return void
*/
public function onAfterUnpublish() {
GoogleSitemap::ping();
}
/**
* The default value of the priority field depends on the depth of the page in
* the site tree, so it must be calculated dynamically.
*
* @return float
*/
public function getPriority() {
if(!$this->owner->getField('Priority')) {
$parentStack = $this->owner->parentStack();
$numParents = is_array($parentStack) ? count($parentStack) - 1 : 0;
return max(0.1, 1.0 - ($numParents / 10));
}
elseif ($this->owner->getField('Priority') == -1) {
return -1;
}
else {
$priority = abs($this->owner->getField('Priority'));
return (is_float($priority) && $priority <= 1.0) ? $priority : 0.5;
}
}
/**
* Returns a pages change frequency calculated by pages age and number of
* versions. Google expects always, hourly, daily, weekly, monthly, yearly
* or never as values.
*
* @see http://support.google.com/webmasters/bin/answer.py?hl=en&answer=183668&topic=8476&ctx=topic
*
* @return SS_Datetime
*/
public function getChangeFrequency() {
$date = date('Y-m-d H:i:s');
$prop = $this->owner->toMap();
$created = new SS_Datetime();
$created->value = (isset($prop['Created'])) ? $prop['Created'] : $date;
$now = new SS_Datetime();
$now->value = $date;
$versions = (isset($prop['Version'])) ? $prop['Version'] : 1;
$timediff = $now->format('U') - $created->format('U');
// Check how many revisions have been made over the lifetime of the
// Page for a rough estimate of it's changing frequency.
$period = $timediff / ($versions + 1);
if ($period > 60 * 60 * 24 * 365) {
$freq = 'yearly';
} elseif ($period > 60 * 60 * 24 * 30) {
$freq = 'monthly';
} elseif ($period > 60 * 60 * 24 * 7) {
$freq = 'weekly';
} elseif ($period > 60 * 60 * 24) {
$freq = 'daily';
} elseif ($period > 60 * 60) {
$freq = 'hourly';
} else {
$freq = 'always';
}
return $freq;
}
}

View File

@ -0,0 +1,68 @@
<?php
/**
* Controller for displaying the sitemap.xml. The module displays an index
* sitemap at the sitemap.xml level, then outputs the individual objects
* at a second level.
*
* <code>
* http://site.com/sitemap.xml/
* http://site.com/sitemap.xml/sitemap/$ClassName-$Page.xml
* </code>
*
* @package googlesitemaps
*/
class GoogleSitemapController extends Controller {
/**
* @var array
*/
public static $allowed_actions = array(
'index',
'sitemap'
);
/**
* Default controller action for the sitemap.xml file. Renders a index
* file containing a list of links to sub sitemaps containing the data.
*
* @return mixed
*/
public function index($url) {
if(GoogleSitemap::enabled()) {
SSViewer::set_source_file_comments(false);
$this->getResponse()->addHeader('Content-Type', 'application/xml; charset="utf-8"');
return array(
'Sitemaps' => GoogleSitemap::get_sitemaps()
);
} else {
return new SS_HTTPResponse('Page not found', 404);
}
}
/**
* Specific controller action for displaying a particular list of links
* for a class
*
* @return mixed
*/
public function sitemap() {
$class = $this->request->param('ID');
$page = $this->request->param('OtherID');
if(GoogleSitemap::enabled() && $class && $page) {
SSViewer::set_source_file_comments(false);
$this->getResponse()->addHeader('Content-Type', 'application/xml; charset="utf-8"');
// But we want to still render.
return array(
'Items' => GoogleSitemap::get_items($class, $page)
);
} else {
return new SS_HTTPResponse('Page not found', 404);
}
}
}

View File

@ -0,0 +1,116 @@
<?php
/**
* Decorate the page object to provide google sitemaps with
* additionally options and configuration.
*
* @package googlesitemaps
*/
class GoogleSitemapExtension extends DataExtension {
/**
* @return boolean
*/
public function canIncludeInGoogleSitemap() {
$can = true;
if(method_exists($this, 'AbsoluteLink')) {
$objHttp = parse_url($this->AbsoluteLink(), PHP_URL_HOST);
$hostHttp = parse_url('http://' . $_SERVER['HTTP_HOST'], PHP_URL_HOST);
if($objHttp != $hostHttp) {
$can = false;
}
}
if($can) {
$can = $this->owner->canView();
}
if($can) {
$can = $this->owner->getGooglePriority();
}
$this->owner->extend('alterCanIncludeInGoogleSitemap', $can);
return $can;
}
/**
* @return void
*/
public function onAfterPublish() {
GoogleSitemap::ping();
}
/**
* @return void
*/
public function onAfterUnpublish() {
GoogleSitemap::ping();
}
/**
* The default value of the priority field depends on the depth of the page in
* the site tree, so it must be calculated dynamically.
*
* @return mixed
*/
public function getGooglePriority() {
$field = $this->owner->hasField('Priority');
if(isset($this->Priority) || ($field && $this->Priority = $this->owner->getField('Priority'))) {
return ($this->Priority < 0) ? false : $this->Priority;
}
return GoogleSitemap::get_priority_for_class($this->owner->class);
}
/**
* Returns a pages change frequency calculated by pages age and number of
* versions. Google expects always, hourly, daily, weekly, monthly, yearly
* or never as values.
*
* @see http://support.google.com/webmasters/bin/answer.py?hl=en&answer=183668&topic=8476&ctx=topic
*
* @return SS_Datetime
*/
public function getChangeFrequency() {
if($freq = GoogleSitemap::get_frequency_for_class($this->owner->class)) {
return $freq;
}
$date = date('Y-m-d H:i:s');
$created = new SS_Datetime();
$created->value = ($this->owner->Created) ? $this->owner->Created : $date;
$now = new SS_Datetime();
$now->value = $date;
$versions = ($this->owner->Version) ? $this->owner->Version : 1;
$timediff = $now->format('U') - $created->format('U');
// Check how many revisions have been made over the lifetime of the
// Page for a rough estimate of it's changing frequency.
$period = $timediff / ($versions + 1);
if ($period > 60 * 60 * 24 * 365) {
$freq = 'yearly';
} elseif ($period > 60 * 60 * 24 * 30) {
$freq = 'monthly';
} elseif ($period > 60 * 60 * 24 * 7) {
$freq = 'weekly';
} elseif ($period > 60 * 60 * 24) {
$freq = 'daily';
} elseif ($period > 60 * 60) {
$freq = 'hourly';
} else {
$freq = 'always';
}
return $freq;
}
}

View File

@ -0,0 +1,90 @@
<?php
/**
* @package googlesitemaps
*/
class GoogleSitemapSiteTreeExtension extends GoogleSitemapExtension {
/**
* @var array
*/
public static $db = array(
"Priority" => "Varchar(5)"
);
/**
* @param FieldList
*/
public function updateSettingsFields(&$fields) {
$prorities = array(
'-1' => _t('GoogleSitemaps.PRIORITYNOTINDEXED', "Not indexed"),
'1.0' => '1 - ' . _t('GoogleSitemaps.PRIORITYMOSTIMPORTANT', "Most important"),
'0.9' => '2',
'0.8' => '3',
'0.7' => '4',
'0.6' => '5',
'0.5' => '6',
'0.4' => '7',
'0.3' => '8',
'0.2' => '9',
'0.1' => '10 - ' . _t('GoogleSitemaps.PRIORITYLEASTIMPORTANT', "Least important")
);
$tabset = $fields->findOrMakeTab('Root.Settings');
$message = "<p>";
$message .= sprintf(_t('GoogleSitemaps.METANOTEPRIORITY', "Manually specify a Google Sitemaps priority for this page (%s)"),
'<a href="http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=71936#prioritize" target="_blank">?</a>'
);
$message .= "</p>";
$tabset->push(new Tab('GoogleSitemap', _t('GoogleSitemaps.TABGOOGLESITEMAP', 'Google Sitemap'),
new LiteralField("GoogleSitemapIntro", $message),
$priority = new DropdownField("Priority", $this->owner->fieldLabel('Priority'), $prorities, $this->owner->Priority)
));
$priority->setEmptyString(_t('GoogleSitemaps.PRIORITYAUTOSET', 'Auto-set based on page depth'));
}
/**
* @param FieldList
*
* @return void
*/
public function updateFieldLabels(&$labels) {
parent::updateFieldLabels($labels);
$labels['Priority'] = _t('GoogleSitemaps.METAPAGEPRIO', "Page Priority");
}
/**
* @return boolean
*/
public function canIncludeInGoogleSitemap() {
$result = parent::canIncludeInGoogleSitemap();
$result = ($this instanceof ErrorPage) ? false : $result;
return $result;
}
/**
* @return mixed
*/
public function getGooglePriority() {
$priority = $this->owner->getField('Priority');
if(!$priority) {
$parentStack = $this->owner->parentStack();
$numParents = is_array($parentStack) ? count($parentStack) - 1 : 0;
$num = max(0.1, 1.0 - ($numParents / 10));
$result = str_replace(",", ".", $num);
return $result;
} else if ($priority == -1) {
return false;
} else {
return (is_float($priority) && $priority <= 1.0) ? $priority : 0.5;
}
}
}

View File

@ -25,10 +25,35 @@ Importance drops from 1.0, to 0.9, to 0.8, and so on, until 0.1 is reached).
In the CMS, in the Settings tab for each page, you can set the importance In the CMS, in the Settings tab for each page, you can set the importance
manually, including requesting to have the page excluded from the sitemap. manually, including requesting to have the page excluded from the sitemap.
## Configuration
## Setup automatic pinging Most module configuration is done via the SilverStripe Config API. Create a new
config file `mysite/_config/googlesitemaps.yml` with the following outline:
GoogleSitemap::enable_google_notification(); ---
Name: customgooglesitemaps
After: googlesitemaps
---
GoogleSitemap:
enabled: true
objects_per_sitemap: 1000
google_notification_enabled: false
use_show_in_search: true
You can now alter any of those properties to set your needs. A popular option
is to turn on automatic pinging so that Google is notified of any updates to
your page. You can set this in the file we created in the last paragraph by
editing the `google_notification_enabled` option to true
---
Name: customgooglesitemaps
After: googlesitemaps
---
GoogleSitemap:
enabled: true
objects_per_sitemap: 1000
google_notification_enabled: true
use_show_in_search: true
### Including DataObjects ### Including DataObjects
@ -79,38 +104,3 @@ instead of the previous code you would write:
See the following blog post for more information: See the following blog post for more information:
http://www.silvercart.org/blog/dataobjects-and-googlesitemaps/ http://www.silvercart.org/blog/dataobjects-and-googlesitemaps/
### Including other routes
If your project has routes that are not stored in the database such as custom
controllers and actions, the module provides an extension hook called
*updateItems* which allows anyone to write extensions to alter the provided
items.
Here's an example of registering the MyController/about URL which is defined as
an action. First we create our new extension and define the links we wish to
add to the $items list.
<?php
class GoogleSitemapExtension extends Extension {
public function updateItems($items) {
$base = Director::absoluteBaseUrl();
$routes = array(
'/MyController/',
'/MyController/about/'
);
foreach($routes as $route) {
$items->push(new ArrayData(array(
'AbsoluteLink' => Controller::join_links($base, $route)
)));
}
}
}
Before we can see the updates we first must add this extension to our built in
class. Inside your mysite/_config.php file add the following:
Object::add_extension('GoogleSitemap', 'GoogleSitemapExtension');

View File

@ -1,5 +1,5 @@
en: en:
SiteTree: GoogleSitemaps:
METANOTEPRIORITY: 'Manually specify a Google Sitemaps priority for this page (%s)' METANOTEPRIORITY: 'Manually specify a Google Sitemaps priority for this page (%s)'
METAPAGEPRIO: 'Page Priority' METAPAGEPRIO: 'Page Priority'
PRIORITYAUTOSET: 'Auto-set based on page depth' PRIORITYAUTOSET: 'Auto-set based on page depth'

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><% loop Sitemaps %>
<sitemap>
<loc>{$BaseHref}sitemap.xml/sitemap/$ClassName/$Page.xml</loc>
<% if LastModified %><lastmod>$LastModified</lastmod><% end_if %>
</sitemap><% end_loop %>
</sitemapindex>

View File

@ -6,7 +6,7 @@
<loc>$AbsoluteLink</loc> <loc>$AbsoluteLink</loc>
<% if $LastEdited %><lastmod>$LastEdited.Format(c)</lastmod><% end_if %> <% if $LastEdited %><lastmod>$LastEdited.Format(c)</lastmod><% end_if %>
<% if $ChangeFrequency %><changefreq>$ChangeFrequency</changefreq><% end_if %> <% if $ChangeFrequency %><changefreq>$ChangeFrequency</changefreq><% end_if %>
<% if $Priority %><priority>$Priority</priority><% end_if %> <% if $GooglePriority %><priority>$GooglePriority</priority><% end_if %>
</url> </url>
<% end_loop %> <% end_loop %>
</urlset> </urlset>

View File

@ -6,9 +6,9 @@
<head> <head>
<title>XML Sitemap</title> <title>XML Sitemap</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<link rel="stylesheet" href="googlesitemaps/css/style.css" /> <link rel="stylesheet" href="../../../googlesitemaps/css/style.css" />
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script> <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>
<script type="text/javascript" src="googlesitemaps/javascript/jquery.tablesorter.min.js"></script> <script type="text/javascript" src="../../../googlesitemaps/javascript/jquery.tablesorter.min.js"></script>
<script type="text/javascript"><![CDATA[ <script type="text/javascript"><![CDATA[
$(document).ready(function() { $(document).ready(function() {
$("#sitemap").tablesorter( { sortList: [[0,0]],widgets: ['zebra'] } ); $("#sitemap").tablesorter( { sortList: [[0,0]],widgets: ['zebra'] } );
@ -18,8 +18,8 @@
<body> <body>
<div id="content"> <div id="content">
<h1> <h1>
<a href="http://www.silverstripe.org" target="_blank">XML Sitemap <a href="http://silverstripe.org" target="_blank">XML Sitemap
<span class="ss_link">&#8594; www.silverstripe.org</span> <span class="ss_link">&#8594; silverstripe.org</span>
</a> </a>
</h1> </h1>
@ -61,7 +61,7 @@
</xsl:for-each> </xsl:for-each>
</tbody> </tbody>
</table> </table>
<p id="Footer" class="expl">Generated by SilverStripe <p id="Footer" class="expl">Generated by the SilverStripe
<a href="https://github.com/silverstripe-labs/silverstripe-googlesitemaps" target="_blank" title="SilverStripe Google Sitemaps module on Github">Google Sitemaps Module</a> <a href="https://github.com/silverstripe-labs/silverstripe-googlesitemaps" target="_blank" title="SilverStripe Google Sitemaps module on Github">Google Sitemaps Module</a>
<br />More information about XML sitemaps on <a href="http://sitemaps.org" target="_blank">sitemaps.org</a>. <br />More information about XML sitemaps on <a href="http://sitemaps.org" target="_blank">sitemaps.org</a>.
</p> </p>

View File

@ -1,7 +1,6 @@
<?php <?php
/** /**
* @todo improve tests to be more robust when adding custom behaviours
* @package googlesitemaps * @package googlesitemaps
* @subpackage tests * @subpackage tests
*/ */
@ -22,9 +21,7 @@ class GoogleSitemapTest extends FunctionalTest {
$this->loadFixture('googlesitemaps/tests/GoogleSitemapPageTest.yml'); $this->loadFixture('googlesitemaps/tests/GoogleSitemapPageTest.yml');
} }
GoogleSitemap::unregister_dataobject('GoogleSitemapTest_DataObject'); GoogleSitemap::clear_registered_dataobjects();
GoogleSitemap::unregister_dataobject('GoogleSitemapTest_OtherDataObject');
GoogleSitemap::unregister_dataobject('GoogleSitemapTest_UnviewableDataObject');
} }
public function tearDown() { public function tearDown() {
@ -33,26 +30,92 @@ class GoogleSitemapTest extends FunctionalTest {
GoogleSitemap::clear_registered_dataobjects(); GoogleSitemap::clear_registered_dataobjects();
} }
public function testItems() {
$sitemap = new GoogleSitemap();
// register a DataObject and see if its aded to the sitemap public function testGetItems() {
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject", ''); GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject", '');
$this->assertEquals(2, $sitemap->Items()->Count());
$items = GoogleSitemap::get_items('GoogleSitemapTest_DataObject', 1);
$this->assertEquals(2, $items->count());
$this->assertDOSEquals(array(
array("Priority" => "0.2"),
array("Priority" => "0.4")
), $items);
GoogleSitemap::register_dataobject("GoogleSitemapTest_OtherDataObject"); GoogleSitemap::register_dataobject("GoogleSitemapTest_OtherDataObject");
$this->assertEquals(3, $sitemap->Items()->Count()); $this->assertEquals(1, GoogleSitemap::get_items('GoogleSitemapTest_OtherDataObject', 1)->count());
GoogleSitemap::register_dataobject("GoogleSitemapTest_UnviewableDataObject"); GoogleSitemap::register_dataobject("GoogleSitemapTest_UnviewableDataObject");
$this->assertEquals(3, $sitemap->Items()->Count()); $this->assertEquals(0, GoogleSitemap::get_items('GoogleSitemapTest_UnviewableDataObject', 1)->count());
} }
public function testItemsWithPages() { public function testAccessingSitemapRootXMLFile() {
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject");
GoogleSitemap::register_dataobject("GoogleSitemapTest_OtherDataObject");
$response = $this->get('sitemap.xml');
$body = $response->getBody();
// the sitemap should contain <loc> to both those files and not the other
// dataobject as it hasn't been registered
$expected = "<loc>". Director::absoluteURL("sitemap.xml/sitemap/GoogleSitemapTest_DataObject/1") ."</loc>";
$this->assertEquals(1, substr_count($body, $expected) , 'A link to GoogleSitemapTest_DataObject exists');
$expected = "<loc>". Director::absoluteURL("sitemap.xml/sitemap/GoogleSitemapTest_OtherDataObject/1") ."</loc>";
$this->assertEquals(1, substr_count($body, $expected) , 'A link to GoogleSitemapTest_OtherDataObject exists');
$expected = "<loc>". Director::absoluteURL("sitemap.xml/sitemap/GoogleSitemapTest_UnviewableDataObject/2") ."</loc>";
$this->assertEquals(0, substr_count($body, $expected) , 'A link to a GoogleSitemapTest_UnviewableDataObject does not exist');
}
public function testLastModifiedDateOnRootXML() {
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject");
DB::query("
UPDATE GoogleSitemapTest_DataObject SET LastEdited = '2012-01-14'"
);
$response = $this->get('sitemap.xml');
$body = $response->getBody();
$expected = "<lastmod>2012-01-14</lastmod>";
$this->assertEquals(1, substr_count($body, $expected));
}
public function testIndexFilePaginatedSitemapFiles() {
$original = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
Config::inst()->update('GoogleSitemap', 'objects_per_sitemap', 1);
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject");
$response = $this->get('sitemap.xml');
$body = $response->getBody();
$expected = "<loc>". Director::absoluteURL("sitemap.xml/sitemap/GoogleSitemapTest_DataObject/1") ."</loc>";
$this->assertEquals(1, substr_count($body, $expected) , 'A link to the first page of GoogleSitemapTest_DataObject exists');
$expected = "<loc>". Director::absoluteURL("sitemap.xml/sitemap/GoogleSitemapTest_DataObject/2") ."</loc>";
$this->assertEquals(1, substr_count($body, $expected) , 'A link to the second page GoogleSitemapTest_DataObject exists');
Config::inst()->update('GoogleSitemap', 'objects_per_sitemap', $original);
}
public function testAccessingNestedSiteMap() {
$original = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
Config::inst()->update('GoogleSitemap', 'objects_per_sitemap', 1);
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject");
$response = $this->get('sitemap.xml/sitemap/GoogleSitemapTest_DataObject/1');
$body = $response->getBody();
$this->assertEquals(200, $response->getStatusCode(), 'successful loaded nested sitemap');
Config::inst()->update('GoogleSitemap', 'objects_per_sitemap', $original);
}
public function testGetItemsWithPages() {
if(!class_exists('Page')) { if(!class_exists('Page')) {
$this->markTestIncomplete('No cms module installed, page related test skipped'); $this->markTestIncomplete('No cms module installed, page related test skipped');
} }
$sitemap = new GoogleSitemap();
$page = $this->objFromFixture('Page', 'Page1'); $page = $this->objFromFixture('Page', 'Page1');
$page->publish('Stage', 'Live'); $page->publish('Stage', 'Live');
$page->flushCache(); $page->flushCache();
@ -64,7 +127,7 @@ class GoogleSitemapTest extends FunctionalTest {
$this->assertDOSContains(array( $this->assertDOSContains(array(
array('Title' => 'Testpage1'), array('Title' => 'Testpage1'),
array('Title' => 'Testpage2') array('Title' => 'Testpage2')
), $sitemap->Items(), "There should be 2 pages in the sitemap after publishing"); ), GoogleSitemap::get_items('SiteTree'), "There should be 2 pages in the sitemap after publishing");
// check if we make a page readonly that it is hidden // check if we make a page readonly that it is hidden
$page2->CanViewType = 'LoggedInUsers'; $page2->CanViewType = 'LoggedInUsers';
@ -75,36 +138,30 @@ class GoogleSitemapTest extends FunctionalTest {
$this->assertDOSEquals(array( $this->assertDOSEquals(array(
array('Title' => 'Testpage1') array('Title' => 'Testpage1')
), $sitemap->Items(), "There should be only 1 page, other is logged in only"); ), GoogleSitemap::get_items('SiteTree'), "There should be only 1 page, other is logged in only");
// register a DataObject and see if its aded to the sitemap
GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject", '');
// check to see if we have the GoogleSitemapTest_DataObject objects
$this->assertEquals(3, $sitemap->Items()->Count());
// register another dataobject
GoogleSitemap::register_dataobject("GoogleSitemapTest_OtherDataObject");
$this->assertEquals(4, $sitemap->Items()->Count());
// check if we register objects that are unreadable they don't end up
// in the sitemap
GoogleSitemap::register_dataobject("GoogleSitemapTest_UnviewableDataObject");
$this->assertEquals(4, $sitemap->Items()->Count());
} }
public function testAccess() { public function testAccess() {
GoogleSitemap::enable(); Config::inst()->update('GoogleSitemap', 'enabled', true);
$response = $this->get('sitemap.xml'); $response = $this->get('sitemap.xml');
$this->assertEquals(200, $response->getStatusCode(), 'Sitemap returns a 200 success when enabled'); $this->assertEquals(200, $response->getStatusCode(), 'Sitemap returns a 200 success when enabled');
$this->assertEquals('application/xml; charset="utf-8"', $response->getHeader('Content-Type')); $this->assertEquals('application/xml; charset="utf-8"', $response->getHeader('Content-Type'));
GoogleSitemap::disable(); GoogleSitemap::register_dataobject("GoogleSitemapTest_DataObject");
$response = $this->get('sitemap.xml/sitemap/GoogleSitemapTest_DataObject/1');
$this->assertEquals(200, $response->getStatusCode(), 'Sitemap returns a 200 success when enabled');
$this->assertEquals('application/xml; charset="utf-8"', $response->getHeader('Content-Type'));
Config::inst()->remove('GoogleSitemap', 'enabled');
Config::inst()->update('GoogleSitemap', 'enabled', false);
$response = $this->get('sitemap.xml'); $response = $this->get('sitemap.xml');
$this->assertEquals(404, $response->getStatusCode(), 'Sitemap returns a 404 when disabled'); $this->assertEquals(404, $response->getStatusCode(), 'Sitemap index returns a 404 when disabled');
$response = $this->get('sitemap.xml/sitemap/GoogleSitemapTest_DataObject/1');
$this->assertEquals(404, $response->getStatusCode(), 'Sitemap file returns a 404 when disabled');
} }
public function testDecoratorAddsFields() { public function testDecoratorAddsFields() {
@ -131,11 +188,11 @@ class GoogleSitemapTest extends FunctionalTest {
// invalid field doesn't break google // invalid field doesn't break google
$page->Priority = 'foo'; $page->Priority = 'foo';
$this->assertEquals(0.5, $page->getPriority()); $this->assertEquals(0.5, $page->getGooglePriority());
// google doesn't like -1 but we use it to indicate the minimum // -1 indicates that we should not index this
$page->Priority = -1; $page->Priority = -1;
$this->assertEquals(0, $page->getPriority()); $this->assertFalse($page->getGooglePriority());
} }
} }

View File

@ -3,6 +3,8 @@ GoogleSitemapTest_DataObject:
Priority: 0.4 Priority: 0.4
DataObjectTest2: DataObjectTest2:
Priority: 0.2 Priority: 0.2
UnindexedDataObject:
Priority: -1
GoogleSitemapTest_OtherDataObject: GoogleSitemapTest_OtherDataObject:
OtherDataObjectTest2: OtherDataObjectTest2: