2008-11-18 23:10:33 +00:00
|
|
|
<?php
|
|
|
|
/**
|
2012-07-06 16:44:26 +12:00
|
|
|
* Sitemaps are a way to tell Google about pages on your site that they might
|
2013-01-15 22:29:59 +13:00
|
|
|
* not otherwise discover. In its simplest terms, a XML Sitemap usually called
|
|
|
|
* a Sitemap, with a capital S—is a list of the pages on your website.
|
2012-07-06 16:44:26 +12:00
|
|
|
*
|
2013-01-15 22:29:59 +13:00
|
|
|
* Creating and submitting a Sitemap helps make sure that Google knows about
|
|
|
|
* all the pages on your site, including URLs that may not be discoverable by
|
|
|
|
* Google's normal crawling process.
|
|
|
|
*
|
|
|
|
* The GoogleSitemap handle requests to 'sitemap.xml'
|
2008-11-18 23:10:33 +00:00
|
|
|
* the other two classes are used to render the sitemap.
|
|
|
|
*
|
|
|
|
* You can notify ("ping") Google about a changed sitemap
|
|
|
|
* automatically whenever a new page is published or unpublished.
|
|
|
|
* By default, Google is not notified, and will pick up your new
|
|
|
|
* sitemap whenever the GoogleBot visits your website.
|
|
|
|
*
|
|
|
|
* Enabling notification of Google after every publish (in your _config.php):
|
2011-06-13 11:16:07 +12:00
|
|
|
*
|
2011-04-26 21:17:49 +12:00
|
|
|
* <example>
|
2008-11-18 23:10:33 +00:00
|
|
|
* GoogleSitemap::enable_google_notificaton();
|
|
|
|
* </example>
|
|
|
|
*
|
|
|
|
* @see http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=34609
|
|
|
|
*
|
2009-08-12 03:55:21 +00:00
|
|
|
* @package googlesitemaps
|
2008-11-18 23:10:33 +00:00
|
|
|
*/
|
2013-01-15 22:29:59 +13:00
|
|
|
class GoogleSitemap {
|
2011-06-07 11:37:38 -07:00
|
|
|
|
|
|
|
/**
|
2012-07-06 16:44:26 +12:00
|
|
|
* List of DataObject class names to include. As well as the change
|
|
|
|
* frequency and priority of each class.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
2012-07-06 16:44:26 +12:00
|
|
|
private static $dataobjects = array();
|
2011-06-07 11:37:38 -07:00
|
|
|
|
2011-06-13 11:16:07 +12:00
|
|
|
/**
|
2011-06-07 11:37:38 -07:00
|
|
|
* Decorates the given DataObject with {@link GoogleSitemapDecorator}
|
|
|
|
* and pushes the class name to the registered DataObjects.
|
|
|
|
* Note that all registered DataObjects need the method AbsoluteLink().
|
|
|
|
*
|
2011-06-11 16:38:31 +02:00
|
|
|
* @param string $className name of DataObject to register
|
|
|
|
* @param string $changeFreq how often is this DataObject updated?
|
2011-06-13 11:16:07 +12:00
|
|
|
* Possible values:
|
|
|
|
* always, hourly, daily, weekly, monthly, yearly, never
|
|
|
|
* @param string $priority How important is this DataObject in comparison to other urls?
|
|
|
|
* Possible values: 0.1, 0.2 ... , 0.9, 1.0
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
|
|
|
* @return void
|
|
|
|
*/
|
2011-06-11 16:38:31 +02:00
|
|
|
public static function register_dataobject($className, $changeFreq = 'monthly', $priority = '0.6') {
|
2011-06-07 11:37:38 -07:00
|
|
|
if (!self::is_registered($className)) {
|
2013-01-15 22:29:59 +13:00
|
|
|
Object::add_extension($className, 'GoogleSitemapExtension');
|
2011-06-13 11:16:07 +12:00
|
|
|
|
2012-07-06 16:44:26 +12:00
|
|
|
self::$dataobjects[$className] = array(
|
|
|
|
'frequency' => ($changeFreq) ? $changeFreq : 'monthly',
|
|
|
|
'priority' => ($priority) ? $priority : '0.6'
|
|
|
|
);
|
2011-06-07 11:37:38 -07:00
|
|
|
}
|
|
|
|
}
|
2012-09-06 22:20:44 +12:00
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
/**
|
|
|
|
* Checks whether the given class name is already registered or not.
|
|
|
|
*
|
|
|
|
* @param string $className Name of DataObject to check
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
|
|
|
public static function is_registered($className) {
|
2012-07-06 16:44:26 +12:00
|
|
|
return isset(self::$dataobjects[$className]);
|
2011-06-07 11:37:38 -07:00
|
|
|
}
|
2012-09-06 22:20:44 +12:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Unregisters a class from the sitemap. Mostly used for the test suite
|
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
*/
|
|
|
|
public static function unregister_dataobject($className) {
|
|
|
|
unset(self::$dataobjects[$className]);
|
|
|
|
}
|
2011-06-07 11:37:38 -07:00
|
|
|
|
2012-07-23 15:01:33 +12:00
|
|
|
/**
|
2013-01-15 22:29:59 +13:00
|
|
|
* Clears registered {@link DataObjects}. Useful for unit tests.
|
2012-07-23 15:01:33 +12:00
|
|
|
*/
|
|
|
|
public static function clear_registered_dataobjects() {
|
|
|
|
self::$dataobjects = array();
|
|
|
|
}
|
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
/**
|
2013-01-15 22:29:59 +13:00
|
|
|
* Constructs the list of data to include in the rendered sitemap. Links
|
|
|
|
* can include pages from the website, dataobjects (such as forum posts)
|
|
|
|
* as well as custom registered paths.
|
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
* @param int
|
|
|
|
*
|
|
|
|
* @return ArrayList
|
2011-06-07 11:37:38 -07:00
|
|
|
*/
|
2013-01-15 22:29:59 +13:00
|
|
|
public static function get_items($class, $page = 1) {
|
2012-03-26 22:50:30 +13:00
|
|
|
$output = new ArrayList();
|
2013-01-15 22:29:59 +13:00
|
|
|
$count = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
|
|
|
|
$filter = Config::inst()->get('GoogleSitemap', 'use_show_in_search');
|
|
|
|
|
|
|
|
if($class == "SiteTree") {
|
|
|
|
$filter = ($filter) ? "\"ShowInSearch\" = 1" : "";
|
|
|
|
$instances = Versioned::get_by_stage('SiteTree', 'Live', $filter);
|
|
|
|
}
|
|
|
|
else {
|
2012-07-06 16:44:26 +12:00
|
|
|
$instances = new DataList($class);
|
2013-01-15 22:29:59 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
$instances = $instances->limit(
|
|
|
|
$count,
|
|
|
|
($page - 1) * $count
|
|
|
|
);
|
|
|
|
|
|
|
|
if($instances) {
|
|
|
|
foreach($instances as $obj) {
|
|
|
|
if($obj->canIncludeInGoogleSitemap()) {
|
|
|
|
$output->push($obj);
|
2011-06-07 11:37:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-01-15 22:29:59 +13:00
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
return $output;
|
|
|
|
}
|
2013-01-15 22:29:59 +13:00
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
/**
|
2013-01-15 22:29:59 +13:00
|
|
|
* Returns the string frequency of edits for a particular dataobject class.
|
|
|
|
*
|
|
|
|
* Frequency for {@link SiteTree} objects can be determined from the version
|
|
|
|
* history.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
2013-01-15 22:29:59 +13:00
|
|
|
* @param string
|
|
|
|
*
|
|
|
|
* @return string
|
2011-06-07 11:37:38 -07:00
|
|
|
*/
|
2013-01-15 22:29:59 +13:00
|
|
|
public static function get_frequency_for_class($class) {
|
|
|
|
foreach(self::$dataobjects as $type => $config) {
|
|
|
|
if($class == $type) {
|
|
|
|
return $config['frequency'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-06-07 11:37:38 -07:00
|
|
|
|
2013-01-15 22:29:59 +13:00
|
|
|
/**
|
|
|
|
* Returns the default priority of edits for a particular dataobject class.
|
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
*
|
|
|
|
* @return float
|
|
|
|
*/
|
|
|
|
public static function get_priority_for_class($class) {
|
|
|
|
foreach(self::$dataobjects as $type => $config) {
|
|
|
|
if($class == $type) {
|
|
|
|
return $config['priority'];
|
2008-11-18 23:10:33 +00:00
|
|
|
}
|
|
|
|
}
|
2013-01-15 22:29:59 +13:00
|
|
|
|
|
|
|
return 0.5;
|
2012-07-06 16:44:26 +12:00
|
|
|
}
|
2012-06-30 21:42:50 +12:00
|
|
|
|
2012-07-06 16:44:26 +12:00
|
|
|
/**
|
2013-01-15 22:29:59 +13:00
|
|
|
* The google site map is broken down into multiple smaller files to
|
|
|
|
* prevent overbearing a server. By default separate {@link DataObject}
|
|
|
|
* records are keep in separate files and broken down into chunks.
|
2012-07-06 16:44:26 +12:00
|
|
|
*
|
|
|
|
* @return ArrayList
|
|
|
|
*/
|
2013-01-15 22:29:59 +13:00
|
|
|
public static function get_sitemaps() {
|
|
|
|
$countPerFile = Config::inst()->get('GoogleSitemap', 'objects_per_sitemap');
|
|
|
|
$sitemaps = new ArrayList();
|
|
|
|
$filter = Config::inst()->get('GoogleSitemap', 'use_show_in_search');
|
2012-07-06 16:44:26 +12:00
|
|
|
|
2013-01-15 22:29:59 +13:00
|
|
|
if(class_exists('SiteTree')) {
|
|
|
|
$filter = ($filter) ? "\"ShowInSearch\" = 1" : "";
|
|
|
|
$instances = Versioned::get_by_stage('SiteTree', 'Live', $filter);
|
|
|
|
$count = $instances->count();
|
|
|
|
|
|
|
|
$neededForPage = ceil($count / $countPerFile);
|
|
|
|
|
|
|
|
for($i = 1; $i <= $neededForPage; $i++) {
|
|
|
|
$sliced = $instances
|
|
|
|
->limit($countPerFile, ($i - 1) * $countPerFile)
|
|
|
|
->last();
|
|
|
|
|
|
|
|
$lastModified = ($sliced) ? $sliced->dbObject('LastEdited')->Format('Y-m-d') : date('Y-m-d');
|
|
|
|
|
|
|
|
$sitemaps->push(new ArrayData(array(
|
|
|
|
'ClassName' => 'SiteTree',
|
|
|
|
'LastModified' => $lastModified,
|
|
|
|
'Page' => $i
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(self::$dataobjects) {
|
|
|
|
foreach(self::$dataobjects as $class => $config) {
|
|
|
|
$list = new DataList($class);
|
|
|
|
$list = $list->sort('LastEdited ASC');
|
|
|
|
|
|
|
|
$neededForClass = ceil($list->count() / $countPerFile);
|
|
|
|
|
|
|
|
for($i = 1; $i <= $neededForClass; $i++) {
|
|
|
|
// determine the last modified date for this slice
|
|
|
|
$sliced = $list
|
|
|
|
->limit($countPerFile, ($i - 1) * $countPerFile)
|
|
|
|
->last();
|
|
|
|
|
|
|
|
$lastModified = ($sliced) ? $sliced->dbObject('LastEdited')->Format('Y-m-d') : date('Y-m-d');
|
|
|
|
|
|
|
|
$sitemaps->push(new ArrayData(array(
|
|
|
|
'ClassName' => $class,
|
|
|
|
'Page' => $i,
|
|
|
|
'LastModified' => $lastModified
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $sitemaps;
|
2008-11-18 23:10:33 +00:00
|
|
|
}
|
2013-01-15 22:29:59 +13:00
|
|
|
|
2008-11-18 23:10:33 +00:00
|
|
|
/**
|
2012-07-06 16:44:26 +12:00
|
|
|
* Notifies Google about changes to your sitemap. This behavior is disabled
|
2013-01-15 22:29:59 +13:00
|
|
|
* by default, to enable, read the documentation provided in the docs folder.
|
2011-06-20 15:56:01 +02:00
|
|
|
*
|
2012-07-06 16:44:26 +12:00
|
|
|
* After notifications have been enabled, every publish / unpublish of a page.
|
|
|
|
* will notify Google of the update.
|
|
|
|
*
|
|
|
|
* If the site is in development mode no ping will be sent regardless whether
|
2008-11-18 23:10:33 +00:00
|
|
|
* the Google notification is enabled.
|
|
|
|
*
|
|
|
|
* @return string Response text
|
|
|
|
*/
|
2012-07-06 16:44:26 +12:00
|
|
|
public static function ping() {
|
2013-01-17 17:20:50 +01:00
|
|
|
if(!self::enabled()) {
|
2013-01-15 22:29:59 +13:00
|
|
|
return false;
|
|
|
|
}
|
2008-11-18 23:10:33 +00:00
|
|
|
|
2012-07-06 16:44:26 +12:00
|
|
|
// Don't ping if the site has disabled it, or if the site is in dev mode
|
2013-01-15 22:29:59 +13:00
|
|
|
$active = Config::inst()->get('GoogleSitemap', 'google_notification_enabled');
|
|
|
|
|
|
|
|
if(!$active || Director::isDev()) {
|
2008-11-18 23:10:33 +00:00
|
|
|
return;
|
2012-07-06 16:44:26 +12:00
|
|
|
}
|
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
$location = urlencode(Controller::join_links(
|
|
|
|
Director::absoluteBaseURL(),
|
|
|
|
'sitemap.xml'
|
|
|
|
));
|
2008-11-18 23:10:33 +00:00
|
|
|
|
2012-09-18 09:45:23 -04:00
|
|
|
$response = self::send_ping(
|
2013-01-15 22:29:59 +13:00
|
|
|
"www.google.com", "/webmasters/sitemaps/ping", sprintf("sitemap=%s", $location)
|
2011-06-07 11:37:38 -07:00
|
|
|
);
|
2012-09-18 09:45:23 -04:00
|
|
|
|
2008-11-18 23:10:33 +00:00
|
|
|
return $response;
|
|
|
|
}
|
2012-09-18 09:45:23 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Send an HTTP request to the host.
|
|
|
|
*
|
|
|
|
* @return String Response text
|
|
|
|
*/
|
|
|
|
protected static function send_ping($host, $path, $query) {
|
2012-09-26 14:37:36 -04:00
|
|
|
$socket = fsockopen($host, 80, $errno, $error);
|
2012-09-18 09:45:23 -04:00
|
|
|
if (!$socket) {
|
|
|
|
return $error;
|
|
|
|
}
|
|
|
|
if ($query) {
|
|
|
|
$query = '?' . $query;
|
|
|
|
}
|
|
|
|
$request = "GET {$path}{$query} HTTP/1.1\r\nHost: $host\r\nConnection: Close\r\n\r\n";
|
|
|
|
fwrite($socket, $request);
|
|
|
|
$response = stream_get_contents($socket);
|
|
|
|
|
|
|
|
return $response;
|
|
|
|
}
|
|
|
|
|
2008-11-18 23:10:33 +00:00
|
|
|
/**
|
|
|
|
* Enable pings to google.com whenever sitemap changes.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
|
|
|
* @return void
|
2008-11-18 23:10:33 +00:00
|
|
|
*/
|
|
|
|
public static function enable_google_notification() {
|
2013-01-15 22:29:59 +13:00
|
|
|
Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
|
|
|
|
|
|
|
|
Config::inst()->remove('GoogleSitemap', 'google_notification_enabled');
|
|
|
|
Config::inst()->update('GoogleSitemap', 'google_notification_enabled', true);
|
2008-11-18 23:10:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Disables pings to google when the sitemap changes.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
2013-01-15 22:29:59 +13:00
|
|
|
* @deprecated 1.1
|
2011-06-07 11:37:38 -07:00
|
|
|
* @return void
|
2008-11-18 23:10:33 +00:00
|
|
|
*/
|
|
|
|
public static function disable_google_notification() {
|
2013-01-15 22:29:59 +13:00
|
|
|
Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
|
2008-11-18 23:10:33 +00:00
|
|
|
|
2013-01-15 22:29:59 +13:00
|
|
|
Config::inst()->remove('GoogleSitemap', 'google_notification_enabled');
|
|
|
|
Config::inst()->update('GoogleSitemap', 'google_notification_enabled', false);
|
2008-11-18 23:10:33 +00:00
|
|
|
}
|
|
|
|
|
2013-01-15 22:29:59 +13:00
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
/**
|
2012-07-06 16:44:26 +12:00
|
|
|
* Enable Google Sitemap support. Requests to the sitemap.xml route will
|
|
|
|
* result in an XML sitemap being provided.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
2013-01-15 22:29:59 +13:00
|
|
|
* @deprecated 1.1
|
2011-06-07 11:37:38 -07:00
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-18 23:10:33 +00:00
|
|
|
public static function enable() {
|
2013-01-15 22:29:59 +13:00
|
|
|
Deprecation::notice('1.1', 'GoogleSitemap::enable() is deprecated. Please use Config API instead. See documentation.');
|
|
|
|
|
|
|
|
Config::inst()->remove('GoogleSitemap', 'enabled');
|
|
|
|
Config::inst()->update('GoogleSitemap', 'enabled', true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Is GoogleSitemap enabled?
|
|
|
|
*
|
|
|
|
* @return boolean
|
|
|
|
*/
|
|
|
|
public static function enabled() {
|
|
|
|
return (Config::inst()->get('GoogleSitemap', 'enabled', Config::INHERITED));
|
2008-11-18 23:10:33 +00:00
|
|
|
}
|
|
|
|
|
2011-06-07 11:37:38 -07:00
|
|
|
/**
|
2012-07-06 16:44:26 +12:00
|
|
|
* Disable Google Sitemap support. Any requests to the sitemap.xml route
|
|
|
|
* will produce a 404 response.
|
2011-06-07 11:37:38 -07:00
|
|
|
*
|
2013-01-15 22:29:59 +13:00
|
|
|
* @deprecated 1,1
|
2011-06-07 11:37:38 -07:00
|
|
|
* @return void
|
|
|
|
*/
|
2008-11-18 23:10:33 +00:00
|
|
|
public static function disable() {
|
2013-01-15 22:29:59 +13:00
|
|
|
Deprecation::notice('1.1', 'GoogleSitemap::disable() is deprecated. Please use Config API instead. See documentation.');
|
|
|
|
|
|
|
|
Config::inst()->remove('GoogleSitemap', 'enabled');
|
|
|
|
Config::inst()->update('GoogleSitemap', 'enabled', false);
|
2011-06-20 15:56:01 +02:00
|
|
|
}
|
2013-01-15 22:29:59 +13:00
|
|
|
}
|