2010-06-24 16:22:41 +02:00
|
|
|
<?php
|
|
|
|
/**
|
2010-09-03 03:37:51 +02:00
|
|
|
* Parser wrapping the Markdown Extra parser (see http://michelf.com/projects/php-markdown/extra/).
|
|
|
|
* As it parses files in context
|
|
|
|
*
|
|
|
|
* @todo Abstract code that doesn't require DocumentationPage context into standalone parser
|
|
|
|
* extending the Markdown_Extra class.
|
|
|
|
*
|
2010-06-24 16:22:41 +02:00
|
|
|
* @package sapphiredocs
|
|
|
|
*/
|
|
|
|
class DocumentationParser {
|
2010-08-01 06:46:37 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var String Rewriting of api links in the format "[api:MyClass]" or "[api:MyClass::$my_property]".
|
|
|
|
*/
|
|
|
|
static $api_link_base = 'http://api.silverstripe.org/search/lookup/?q=%s&version=%s&module=%s';
|
2010-09-03 07:29:15 +02:00
|
|
|
|
|
|
|
static $heading_counts = array();
|
|
|
|
|
2010-06-24 16:22:41 +02:00
|
|
|
/**
|
|
|
|
* Parse a given path to the documentation for a file. Performs a case insensitive
|
|
|
|
* lookup on the file system. Automatically appends the file extension to one of the markdown
|
|
|
|
* extensions as well so /install/ in a web browser will match /install.md or /INSTALL.md
|
2010-08-01 06:46:32 +02:00
|
|
|
*
|
|
|
|
* Filepath: /var/www/myproject/src/cms/en/folder/subfolder/page.md
|
|
|
|
* URL: http://myhost/mywebroot/dev/docs/2.4/cms/en/folder/subfolder/page
|
|
|
|
* Webroot: http://myhost/mywebroot/
|
|
|
|
* Baselink: dev/docs/2.4/cms/en/
|
|
|
|
* Pathparts: folder/subfolder/page
|
|
|
|
*
|
|
|
|
* @param DocumentationPage $page
|
|
|
|
* @param String $baselink Link relative to webroot, up until the "root" of the module.
|
|
|
|
* Necessary to rewrite relative links
|
2010-06-24 16:22:41 +02:00
|
|
|
*
|
2010-08-01 10:25:00 +02:00
|
|
|
* @return String
|
2010-06-24 16:22:41 +02:00
|
|
|
*/
|
2010-08-01 06:46:32 +02:00
|
|
|
public static function parse(DocumentationPage $page, $baselink = null) {
|
|
|
|
$md = $page->getMarkdown();
|
|
|
|
|
|
|
|
// Pre-processing
|
2010-08-01 10:25:00 +02:00
|
|
|
$md = self::rewrite_image_links($md, $page);
|
2010-08-01 06:46:32 +02:00
|
|
|
$md = self::rewrite_relative_links($md, $page, $baselink);
|
2010-08-01 06:46:37 +02:00
|
|
|
$md = self::rewrite_api_links($md, $page);
|
2010-09-03 07:29:15 +02:00
|
|
|
$md = self::rewrite_heading_anchors($md, $page);
|
2010-08-01 23:14:41 +02:00
|
|
|
// $md = self::rewrite_code_blocks($md, $page);
|
2010-08-01 06:46:32 +02:00
|
|
|
|
2010-08-01 23:13:40 +02:00
|
|
|
require_once('../sapphiredocs/thirdparty/markdown.php');
|
2010-08-01 06:46:32 +02:00
|
|
|
$html = Markdown($md);
|
2010-06-24 16:22:41 +02:00
|
|
|
|
2010-08-01 10:25:00 +02:00
|
|
|
return $html;
|
|
|
|
}
|
|
|
|
|
2010-08-01 23:14:41 +02:00
|
|
|
/*
|
|
|
|
function rewrite_code_blocks($md) {
|
|
|
|
$tabwidth = (defined('MARKDOWN_TAB_WIDTH')) ? MARKDOWN_TAB_WIDTH : 4;
|
|
|
|
$md = preg_replace_callback('{
|
|
|
|
(?:\n\n|\A\n?)
|
|
|
|
[ ]*(\{[a-zA-Z]*\})? # lang
|
|
|
|
[ ]* \n # Whitespace and newline following marker.
|
|
|
|
( # $1 = the code block -- one or more lines, starting with a space/tab
|
|
|
|
(?>
|
|
|
|
[ ]{'.$tabwidth.'} # Lines must start with a tab or a tab-width of spaces
|
|
|
|
.*\n+
|
|
|
|
)+
|
|
|
|
)
|
|
|
|
((?=^[ ]{0,'.$tabwidth.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
|
|
|
|
}xm',
|
|
|
|
array('DocumentationParser', '_do_code_blocks'), $md);
|
|
|
|
|
|
|
|
return $md;
|
|
|
|
}
|
|
|
|
static function _do_code_blocks($matches) {
|
|
|
|
$tabwidth = (defined('MARKDOWN_TAB_WIDTH')) ? MARKDOWN_TAB_WIDTH : 4;
|
|
|
|
$codeblock = $matches[2];
|
|
|
|
|
|
|
|
// outdent
|
|
|
|
$codeblock = preg_replace('/^(\t|[ ]{1,'.$tabwidth.'})/m', '', $codeblock);
|
|
|
|
$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
|
|
|
|
|
|
|
|
# trim leading newlines and trailing newlines
|
|
|
|
$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
|
|
|
|
|
|
|
|
$codeblock = "<pre><code>$codeblock\n</code></pre>";
|
|
|
|
return "\n\n".$this->hashBlock($codeblock)."\n\n";
|
|
|
|
}
|
|
|
|
*/
|
2010-08-01 23:13:40 +02:00
|
|
|
|
2010-08-01 10:25:00 +02:00
|
|
|
static function rewrite_image_links($md, $page) {
|
|
|
|
// Links with titles
|
|
|
|
$re = '/
|
|
|
|
!
|
|
|
|
\[
|
|
|
|
(.*?) # image title (non greedy)
|
|
|
|
\]
|
|
|
|
\(
|
|
|
|
(.*?) # image url (non greedy)
|
|
|
|
\)
|
|
|
|
/x';
|
|
|
|
preg_match_all($re, $md, $images);
|
|
|
|
if($images) foreach($images[0] as $i => $match) {
|
|
|
|
$title = $images[1][$i];
|
|
|
|
$url = $images[2][$i];
|
|
|
|
|
|
|
|
// Don't process absolute links (based on protocol detection)
|
|
|
|
$urlParts = parse_url($url);
|
|
|
|
if($urlParts && isset($urlParts['scheme'])) continue;
|
|
|
|
|
|
|
|
// Rewrite URL (relative or absolute)
|
|
|
|
$baselink = Director::makeRelative(dirname($page->getPath()));
|
|
|
|
$relativeUrl = rtrim($baselink, '/') . '/' . ltrim($url, '/');
|
|
|
|
|
|
|
|
// Resolve relative paths
|
|
|
|
while(strpos($relativeUrl, '/..') !== FALSE) {
|
|
|
|
$relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Replace any double slashes (apart from protocol)
|
|
|
|
$relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl);
|
|
|
|
|
|
|
|
// Make it absolute again
|
|
|
|
$absoluteUrl = Director::absoluteBaseURL() . $relativeUrl;
|
|
|
|
|
|
|
|
// Replace in original content
|
|
|
|
$md = str_replace(
|
|
|
|
$match,
|
|
|
|
sprintf('![%s](%s)', $title, $absoluteUrl),
|
|
|
|
$md
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $md;
|
2010-08-01 06:46:32 +02:00
|
|
|
}
|
2010-08-01 06:46:39 +02:00
|
|
|
|
2010-08-01 06:46:37 +02:00
|
|
|
/**
|
2010-08-01 06:46:39 +02:00
|
|
|
* Rewrite links with special "api:" prefix, from two possible formats:
|
|
|
|
* 1. [api:DataObject]
|
|
|
|
* 2. (My Title)(api:DataObject)
|
|
|
|
*
|
|
|
|
* Hack: Replaces any backticks with "<code>" blocks,
|
|
|
|
* as the currently used markdown parser doesn't resolve links in backticks,
|
|
|
|
* but does resolve in "<code>" blocks.
|
|
|
|
*
|
2010-08-01 06:46:37 +02:00
|
|
|
* @param String $md
|
|
|
|
* @param DocumentationPage $page
|
|
|
|
* @return String
|
|
|
|
*/
|
|
|
|
static function rewrite_api_links($md, $page) {
|
|
|
|
// Links with titles
|
|
|
|
$re = '/
|
2010-08-01 06:46:39 +02:00
|
|
|
`?
|
2010-08-01 06:46:37 +02:00
|
|
|
\[
|
|
|
|
(.*?) # link title (non greedy)
|
|
|
|
\]
|
|
|
|
\(
|
|
|
|
api:(.*?) # link url (non greedy)
|
|
|
|
\)
|
2010-08-01 06:46:39 +02:00
|
|
|
`?
|
2010-08-01 06:46:37 +02:00
|
|
|
/x';
|
|
|
|
preg_match_all($re, $md, $linksWithTitles);
|
|
|
|
if($linksWithTitles) foreach($linksWithTitles[0] as $i => $match) {
|
|
|
|
$title = $linksWithTitles[1][$i];
|
|
|
|
$subject = $linksWithTitles[2][$i];
|
|
|
|
$url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder());
|
|
|
|
$md = str_replace(
|
|
|
|
$match,
|
2010-08-01 06:46:39 +02:00
|
|
|
sprintf('<code>[%s](%s)</code>', $title, $url),
|
2010-08-01 06:46:37 +02:00
|
|
|
$md
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bare links
|
|
|
|
$re = '/
|
2010-08-01 06:46:39 +02:00
|
|
|
`?
|
2010-08-01 06:46:37 +02:00
|
|
|
\[
|
|
|
|
api:(.*?)
|
|
|
|
\]
|
2010-08-01 06:46:39 +02:00
|
|
|
`?
|
2010-08-01 06:46:37 +02:00
|
|
|
/x';
|
|
|
|
preg_match_all($re, $md, $links);
|
|
|
|
if($links) foreach($links[0] as $i => $match) {
|
|
|
|
$subject = $links[1][$i];
|
|
|
|
$url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder());
|
|
|
|
$md = str_replace(
|
|
|
|
$match,
|
2010-08-01 06:46:39 +02:00
|
|
|
sprintf('<code>[%s](%s)</code>', $subject, $url),
|
2010-08-01 06:46:37 +02:00
|
|
|
$md
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $md;
|
|
|
|
}
|
|
|
|
|
2010-09-03 07:29:15 +02:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static function rewrite_heading_anchors($md, $page) {
|
|
|
|
$re = '/
|
|
|
|
\#+(.*)
|
|
|
|
/x';
|
|
|
|
$md = preg_replace_callback($re, array('DocumentationParser', '_rewrite_heading_anchors_callback'), $md);
|
|
|
|
|
|
|
|
return $md;
|
|
|
|
}
|
|
|
|
|
|
|
|
static function _rewrite_heading_anchors_callback($matches) {
|
|
|
|
$heading = $matches[0];
|
|
|
|
$headingText = $matches[1];
|
|
|
|
|
|
|
|
if(preg_match('/\{\#.*\}/', $headingText)) return $heading;
|
|
|
|
|
|
|
|
if(!isset(self::$heading_counts[$headingText])) {
|
|
|
|
self::$heading_counts[$headingText] = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
self::$heading_counts[$headingText]++;
|
|
|
|
$headingText .= "-" . self::$heading_counts[$headingText];
|
|
|
|
}
|
|
|
|
|
|
|
|
return sprintf("%s {#%s}", preg_replace('/\n/', '', $heading), self::generate_html_id($headingText));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate an html element id from a string
|
|
|
|
*
|
|
|
|
* @return String
|
|
|
|
*/
|
|
|
|
static function generate_html_id($title) {
|
|
|
|
$t = $title;
|
|
|
|
$t = str_replace('&','-and-',$t);
|
|
|
|
$t = str_replace('&','-and-',$t);
|
|
|
|
$t = ereg_replace('[^A-Za-z0-9]+','-',$t);
|
|
|
|
$t = ereg_replace('-+','-',$t);
|
|
|
|
$t = trim($t, '-');
|
|
|
|
|
|
|
|
return $t;
|
|
|
|
}
|
|
|
|
|
2010-08-01 06:46:32 +02:00
|
|
|
/**
|
|
|
|
* Resolves all relative links within markdown.
|
|
|
|
*
|
|
|
|
* @param String $md Markdown content
|
|
|
|
* @param DocumentationPage $page
|
|
|
|
* @param String $baselink
|
|
|
|
* @return String Markdown
|
|
|
|
*/
|
|
|
|
static function rewrite_relative_links($md, $page, $baselink) {
|
|
|
|
$re = '/
|
2010-08-01 10:25:00 +02:00
|
|
|
([^\!]?) # exclude image format
|
2010-08-01 06:46:32 +02:00
|
|
|
\[
|
|
|
|
(.*?) # link title (non greedy)
|
|
|
|
\]
|
|
|
|
\(
|
|
|
|
(.*?) # link url (non greedy)
|
|
|
|
\)
|
|
|
|
/x';
|
|
|
|
preg_match_all($re, $md, $matches);
|
|
|
|
|
|
|
|
// relative path (to module base folder), without the filename
|
|
|
|
$relativePath = dirname($page->getRelativePath());
|
|
|
|
if($relativePath == '.') $relativePath = '';
|
|
|
|
|
|
|
|
if($matches) foreach($matches[0] as $i => $match) {
|
2010-08-01 10:25:00 +02:00
|
|
|
$title = $matches[2][$i];
|
|
|
|
$url = $matches[3][$i];
|
2010-08-01 06:46:32 +02:00
|
|
|
|
|
|
|
// Don't process API links
|
|
|
|
if(preg_match('/^api:/', $url)) continue;
|
|
|
|
|
|
|
|
// Don't process absolute links (based on protocol detection)
|
|
|
|
$urlParts = parse_url($url);
|
|
|
|
if($urlParts && isset($urlParts['scheme'])) continue;
|
|
|
|
|
|
|
|
// Rewrite URL (relative or absolute)
|
|
|
|
if(preg_match('/^\//', $url)) {
|
|
|
|
$relativeUrl = $baselink . $url;
|
|
|
|
} else {
|
|
|
|
$relativeUrl = $baselink . '/' . $relativePath . '/' . $url;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Resolve relative paths
|
|
|
|
while(strpos($relativeUrl, '..') !== FALSE) {
|
|
|
|
$relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Replace any double slashes (apart from protocol)
|
|
|
|
$relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl);
|
|
|
|
|
|
|
|
// Replace in original content
|
|
|
|
$md = str_replace(
|
|
|
|
$match,
|
2010-08-01 10:25:00 +02:00
|
|
|
sprintf('%s[%s](%s)', $matches[1][$i], $title, $relativeUrl),
|
2010-08-01 06:46:32 +02:00
|
|
|
$md
|
|
|
|
);
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
|
2010-08-01 06:46:32 +02:00
|
|
|
return $md;
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Find a documentation page given a path and a file name. It ignores the extensions
|
|
|
|
* and simply compares the title.
|
|
|
|
*
|
|
|
|
* Name may also be a path /install/foo/bar.
|
|
|
|
*
|
2010-08-01 06:46:32 +02:00
|
|
|
* @param String $modulePath Absolute path to the entity
|
2010-06-24 16:22:41 +02:00
|
|
|
* @param Array $path path to the file in the entity
|
|
|
|
*
|
|
|
|
* @return String|false - File path
|
|
|
|
*/
|
2010-08-01 06:46:32 +02:00
|
|
|
static function find_page($modulePath, $path) {
|
|
|
|
return self::find_page_recursive($modulePath, $path);
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Recursive function for finding the goal
|
|
|
|
*/
|
|
|
|
private static function find_page_recursive($base, $goal) {
|
|
|
|
$handle = opendir($base);
|
|
|
|
|
|
|
|
$name = strtolower(array_shift($goal));
|
|
|
|
|
|
|
|
if(!$name) $name = 'index';
|
|
|
|
|
|
|
|
if($handle) {
|
|
|
|
$extensions = DocumentationService::get_valid_extensions();
|
|
|
|
|
|
|
|
while (false !== ($file = readdir($handle))) {
|
|
|
|
if(in_array($file, DocumentationService::get_valid_extensions())) continue;
|
|
|
|
|
|
|
|
$formatted = strtolower($file);
|
|
|
|
|
|
|
|
// if the name has a . then take the substr
|
|
|
|
$formatted = ($pos = strrpos($formatted, '.')) ? substr($formatted, 0, $pos) : $formatted;
|
|
|
|
$name = ($dot = strrpos($formatted, '.')) ? substr($name, 0, $dot) : $name;
|
|
|
|
|
|
|
|
// the folder is the one that we are looking for.
|
|
|
|
if($name == $formatted) {
|
|
|
|
|
|
|
|
if(is_dir($base . $file)) {
|
|
|
|
// if this is a directory check that there is any more states to get
|
|
|
|
// to in the goal. If none then what we want is the 'index.md' file
|
|
|
|
if(count($goal) > 0) {
|
|
|
|
return self::find_page_recursive($base . $file, $goal);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// recurse but check for an index.md file next time around
|
|
|
|
return self::find_page_recursive($base . $file, array('index'));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// goal state. End of recursion
|
|
|
|
$result = $base .'/'. $file;
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
closedir($handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* String helper for cleaning a file name to a readable version.
|
|
|
|
*
|
|
|
|
* @param String $name to convert
|
|
|
|
*
|
|
|
|
* @return String $name output
|
|
|
|
*/
|
|
|
|
public static function clean_page_name($name) {
|
|
|
|
// remove dashs and _
|
|
|
|
$name = str_ireplace(array('-', '_'), ' ', $name);
|
|
|
|
|
|
|
|
// remove extension
|
|
|
|
$hasExtension = strpos($name, '.');
|
|
|
|
|
|
|
|
if($hasExtension !== false && $hasExtension > 0) {
|
|
|
|
$name = substr($name, 0, $hasExtension);
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert first letter
|
|
|
|
return ucfirst($name);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2010-09-05 04:29:07 +02:00
|
|
|
* Return the children from a given module sorted by Title using natural ordering.
|
|
|
|
* It is used for building the tree of the page.
|
2010-06-24 16:22:41 +02:00
|
|
|
*
|
|
|
|
* @param String module name
|
2010-10-08 05:31:19 +02:00
|
|
|
* @param bool Recursive search
|
|
|
|
* @param DataObjectSet set of pages matched so far
|
2010-06-24 16:22:41 +02:00
|
|
|
*
|
|
|
|
* @return DataObjectSet
|
|
|
|
*/
|
2010-10-08 05:31:19 +02:00
|
|
|
public static function get_pages_from_folder($folder, $recursive = false, &$pages = false) {
|
2010-06-24 16:22:41 +02:00
|
|
|
$output = new DataObjectSet();
|
2010-10-08 05:31:19 +02:00
|
|
|
|
|
|
|
if(!$pages) $pages = new DataObjectSet();
|
|
|
|
|
|
|
|
$handle = opendir($folder);
|
2010-06-24 16:22:41 +02:00
|
|
|
|
|
|
|
if($handle) {
|
|
|
|
$extensions = DocumentationService::get_valid_extensions();
|
|
|
|
$ignore = DocumentationService::get_ignored_files();
|
2010-10-08 05:31:19 +02:00
|
|
|
$files = array();
|
2010-06-24 16:22:41 +02:00
|
|
|
|
|
|
|
while (false !== ($file = readdir($handle))) {
|
|
|
|
if(!in_array($file, $ignore)) {
|
2010-10-08 05:31:19 +02:00
|
|
|
$file = trim(strtolower($file), '/');
|
|
|
|
$path = rtrim($folder, '/') . '/'. $file;
|
|
|
|
|
|
|
|
if($recursive && is_dir($path)) {
|
|
|
|
self::get_pages_from_folder($path, true, $pages);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$files[] = $file;
|
|
|
|
}
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
2010-09-05 04:29:07 +02:00
|
|
|
|
|
|
|
natsort($files);
|
|
|
|
|
2010-10-08 05:31:19 +02:00
|
|
|
if($files) {
|
|
|
|
foreach($files as $file) {
|
|
|
|
$clean = ($pos = strrpos($file, '.')) ? substr($file, 0, $pos) : $file;
|
|
|
|
$path = rtrim($folder, '/') . '/'. $file;
|
|
|
|
|
|
|
|
$pages->push(new ArrayData(array(
|
|
|
|
'Title' => self::clean_page_name($file),
|
|
|
|
'Filename' => $clean,
|
|
|
|
'Path' => $path
|
|
|
|
)));
|
|
|
|
}
|
2010-09-05 04:29:07 +02:00
|
|
|
}
|
2010-10-08 05:31:19 +02:00
|
|
|
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
|
2010-10-08 05:31:19 +02:00
|
|
|
closedir($handle);
|
|
|
|
|
|
|
|
return $pages;
|
2010-06-24 16:22:41 +02:00
|
|
|
}
|
|
|
|
}
|