mirror of
https://github.com/silverstripe/silverstripe-docsviewer
synced 2024-10-22 11:05:56 +02:00
26c66f494b
typo another typo ANOTHER typo update tests fixed regexs resolving test failures resolving test failures tidy up test tidy up refined regexs, output formatting, and tests further refined regexs typo in comments whitespace comment
540 lines
18 KiB
PHP
Executable File
540 lines
18 KiB
PHP
Executable File
<?php
|
|
|
|
/**
|
|
* Parser wrapping the Markdown Extra parser.
|
|
*
|
|
* @see http://michelf.com/projects/php-markdown/extra/
|
|
*
|
|
* @package docsviewer
|
|
*/
|
|
class DocumentationParser
|
|
{
|
|
const CODE_BLOCK_BACKTICK = 1;
|
|
const CODE_BLOCK_COLON = 2;
|
|
|
|
/**
|
|
* @var array
|
|
*/
|
|
public static $heading_counts = array();
|
|
|
|
/**
|
|
* Parse a given path to the documentation for a file. Performs a case
|
|
* insensitive lookup on the file system. Automatically appends the file
|
|
* extension to one of the markdown extensions as well so /install/ in a
|
|
* web browser will match /install.md or /INSTALL.md.
|
|
*
|
|
* Filepath: /var/www/myproject/src/cms/en/folder/subfolder/page.md
|
|
* URL: http://myhost/mywebroot/dev/docs/2.4/cms/en/folder/subfolder/page
|
|
* Webroot: http://myhost/mywebroot/
|
|
* Baselink: dev/docs/2.4/cms/en/
|
|
* Pathparts: folder/subfolder/page
|
|
*
|
|
* @param DocumentationPage $page
|
|
* @param String $baselink Link relative to webroot, up until the "root"
|
|
* of the module. Necessary to rewrite relative
|
|
* links
|
|
*
|
|
* @return String
|
|
*/
|
|
public static function parse(DocumentationPage $page, $baselink = null)
|
|
{
|
|
if (!$page || (!$page instanceof DocumentationPage)) {
|
|
return false;
|
|
}
|
|
|
|
$md = $page->getMarkdown(true);
|
|
|
|
// Pre-processing
|
|
$md = self::rewrite_image_links($md, $page);
|
|
$md = self::rewrite_relative_links($md, $page, $baselink);
|
|
|
|
$md = self::rewrite_api_links($md, $page);
|
|
$md = self::rewrite_heading_anchors($md, $page);
|
|
|
|
$md = self::rewrite_code_blocks($md);
|
|
|
|
$parser = new ParsedownExtra();
|
|
$parser->setBreaksEnabled(false);
|
|
|
|
$text = $parser->text($md);
|
|
|
|
return $text;
|
|
}
|
|
|
|
public static function rewrite_code_blocks($md)
|
|
{
|
|
$started = false;
|
|
$inner = false;
|
|
$mode = false;
|
|
$end = false;
|
|
$debug = false;
|
|
|
|
$lines = explode("\n", $md);
|
|
$output = array();
|
|
|
|
foreach ($lines as $i => $line) {
|
|
if ($debug) {
|
|
var_dump('Line '. ($i + 1) . ' '. $line);
|
|
}
|
|
|
|
// if line just contains whitespace, continue down the page.
|
|
// Prevents code blocks with leading tabs adding an extra line.
|
|
if (preg_match('/^\s$/', $line) && !$started) {
|
|
continue;
|
|
}
|
|
|
|
if (!$started && preg_match('/^[\t]*:::\s*(.*)/', $line, $matches)) {
|
|
// first line with custom formatting
|
|
if ($debug) {
|
|
var_dump('Starts a new block with :::');
|
|
}
|
|
|
|
$started = true;
|
|
$mode = self::CODE_BLOCK_COLON;
|
|
|
|
$output[$i] = sprintf('```%s', (isset($matches[1])) ? trim($matches[1]) : "");
|
|
} elseif (!$started && preg_match('/^\t*```\s*(.*)/', $line, $matches)) {
|
|
if ($debug) {
|
|
var_dump('Starts a new block with ```');
|
|
}
|
|
|
|
$started = true;
|
|
$mode = self::CODE_BLOCK_BACKTICK;
|
|
|
|
$output[$i] = sprintf('```%s', (isset($matches[1])) ? trim($matches[1]) : "");
|
|
} elseif ($started && $mode == self::CODE_BLOCK_BACKTICK) {
|
|
// inside a backtick fenced box
|
|
if (preg_match('/^\t*```\s*/', $line, $matches)) {
|
|
if ($debug) {
|
|
var_dump('End a block with ```');
|
|
}
|
|
|
|
// end of the backtick fenced box. Unset the line that contains the backticks
|
|
$end = true;
|
|
} else {
|
|
if ($debug) {
|
|
var_dump('Still in a block with ```');
|
|
}
|
|
|
|
// still inside the line.
|
|
if (!$started) {
|
|
$output[$i - 1] = '```';
|
|
}
|
|
|
|
$output[$i] = $line;
|
|
$inner = true;
|
|
}
|
|
} elseif (preg_match('/^[\ ]{0,3}?[\t](.*)/', $line, $matches)) {
|
|
|
|
// inner line of block, or first line of standard markdown code block
|
|
// regex removes first tab (any following tabs are part of the code).
|
|
if (!$started) {
|
|
if ($debug) {
|
|
var_dump('Start code block because of tab. No fence');
|
|
}
|
|
|
|
$output[$i - 1] = '```';
|
|
} else {
|
|
if ($debug) {
|
|
var_dump('Content is still tabbed so still inner');
|
|
}
|
|
}
|
|
|
|
$output[$i] = $matches[1];
|
|
$inner = true;
|
|
$started = true;
|
|
} elseif ($started && $inner && trim($line) === "") {
|
|
if ($debug) {
|
|
var_dump('Inner line of code block');
|
|
}
|
|
|
|
// still inside a colon based block, if the line is only whitespace
|
|
// then continue with with it. We can continue with it for now as
|
|
// it'll be tidied up later in the $end section.
|
|
$inner = true;
|
|
$output[$i] = $line;
|
|
} elseif ($started && $inner) {
|
|
// line contains something other than whitespace, or tabbed. E.g
|
|
// > code
|
|
// > \n
|
|
// > some message
|
|
//
|
|
// So actually want to reset $i to the line before this new line
|
|
// and include this line. The edge case where this will fail is
|
|
// new the following segment contains a code block as well as it
|
|
// will not open.
|
|
if ($debug) {
|
|
var_dump('Contains something that isnt code. So end the code.');
|
|
}
|
|
|
|
$end = true;
|
|
$output[$i] = $line;
|
|
$i = $i - 1;
|
|
} else {
|
|
$output[$i] = $line;
|
|
}
|
|
|
|
if ($end) {
|
|
if ($debug) {
|
|
var_dump('End of code block');
|
|
}
|
|
$output = self::finalize_code_output($i, $output);
|
|
|
|
// reset state
|
|
$started = $inner = $mode = $end = false;
|
|
}
|
|
}
|
|
|
|
if ($started) {
|
|
$output = self::finalize_code_output($i+1, $output);
|
|
}
|
|
|
|
return implode("\n", $output);
|
|
}
|
|
|
|
/**
|
|
* Adds the closing code backticks. Removes trailing whitespace.
|
|
*
|
|
* @param int
|
|
* @param array
|
|
*
|
|
* @return array
|
|
*/
|
|
private static function finalize_code_output($i, $output)
|
|
{
|
|
if (isset($output[$i]) && trim($output[$i])) {
|
|
$output[$i] .= "\n```\n";
|
|
} else {
|
|
$output[$i] = "```";
|
|
}
|
|
|
|
return $output;
|
|
}
|
|
|
|
public static function rewrite_image_links($md, $page)
|
|
{
|
|
// Links with titles
|
|
$re = '/
|
|
!
|
|
\[
|
|
(.*?) # image title (non greedy)
|
|
\]
|
|
\(
|
|
(.*?) # image url (non greedy)
|
|
\)
|
|
/x';
|
|
preg_match_all($re, $md, $images);
|
|
|
|
if ($images) {
|
|
foreach ($images[0] as $i => $match) {
|
|
$title = $images[1][$i];
|
|
$url = $images[2][$i];
|
|
|
|
// Don't process absolute links (based on protocol detection)
|
|
$urlParts = parse_url($url);
|
|
|
|
if ($urlParts && isset($urlParts['scheme'])) {
|
|
continue;
|
|
}
|
|
|
|
// Rewrite URL (relative or absolute)
|
|
$baselink = Director::makeRelative(
|
|
dirname($page->getPath())
|
|
);
|
|
|
|
// if the image starts with a slash, it's absolute
|
|
if (substr($url, 0, 1) == '/') {
|
|
$relativeUrl = str_replace(BASE_PATH, '', Controller::join_links(
|
|
$page->getEntity()->getPath(),
|
|
$url
|
|
));
|
|
} else {
|
|
$relativeUrl = rtrim($baselink, '/') . '/' . ltrim($url, '/');
|
|
}
|
|
|
|
// Resolve relative paths
|
|
while (strpos($relativeUrl, '/..') !== false) {
|
|
$relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl);
|
|
}
|
|
|
|
// Make it absolute again
|
|
$absoluteUrl = Controller::join_links(
|
|
Director::absoluteBaseURL(),
|
|
$relativeUrl
|
|
);
|
|
|
|
// Replace any double slashes (apart from protocol)
|
|
// $absoluteUrl = preg_replace('/([^:])\/{2,}/', '$1/', $absoluteUrl);
|
|
|
|
// Replace in original content
|
|
$md = str_replace(
|
|
$match,
|
|
sprintf('![%s](%s)', $title, $absoluteUrl),
|
|
$md
|
|
);
|
|
}
|
|
}
|
|
|
|
return $md;
|
|
}
|
|
|
|
/**
|
|
* Rewrite links with special "api:" prefix to html as in the following example:
|
|
*
|
|
* (1) [api:DataObject] gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject&version=2.4&module=framework">DataObject</a>
|
|
* (2) [api:DataObject::$defaults] gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject::$defaults&version=2.4&module=framework">DataObject::$defaults</a>
|
|
* (3) [api:DataObject::populateDefaults()] gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject::populateDefaults()&version=2.4&module=framework">DataObject::$defaults</a>
|
|
* (4) [Title](api:DataObject) gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject&version=2.4&module=framework">Title</a>
|
|
* (5) [Title](api:DataObject::$defaults) gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject::$defaults&version=2.4&module=framework">Title</a>
|
|
* (6) [Title](api:DataObject->populateDefaults()) gets re-written to
|
|
* <a href="https://api.silverstripe.org/search/lookup/?q=DataObject::populateDefaults()&version=2.4&module=framework">Title</a>
|
|
*
|
|
* The above api links can be enclosed in backticks.
|
|
*
|
|
* The markdown parser gets confused by the extra pair of parentheses in links of the form [DataObject](api:DataObject->populateDefaults()) so
|
|
* all links are re-written as html markup instead of markdown [Title](url). This also prevents other markdown parsing problems.
|
|
*
|
|
* @param String $md
|
|
* @param DocumentationPage $page
|
|
* @return String
|
|
*/
|
|
public static function rewrite_api_links($markdown, $doc_page)
|
|
{
|
|
|
|
$version = $doc_page->getVersion();
|
|
$module = $doc_page->getEntity()->getKey();
|
|
|
|
// define regexs of the api links to be parsed (note: do not include backticks)
|
|
$regexs = array(
|
|
'title_and_method' => '# \[ ([^\]]*) \] \( api: ([^\)]*\(\)) \) #x', // title_and_method = (6) (must be first)
|
|
'title_remaining' => '# \[ ([^\]]*) \] \( api: ([^\)]*) \) #x', // title_and_remaining = (4) and (5)
|
|
'no_title' => '# \[ api: ([^\]]*) \] #x' // no_title = (1),(2) and (3)
|
|
);
|
|
|
|
// define output format for parsing api links without backticks into html
|
|
$html_format = '<a href="http://api.silverstripe.org/search/lookup/?q=%s&version=%s&module=%s">%s</a>';
|
|
|
|
// parse api links without backticks into html
|
|
foreach($regexs as $type => $regex) {
|
|
preg_match_all($regex, $markdown, $links);
|
|
if($links) {
|
|
foreach($links[0] as $i => $match) {
|
|
if($type === 'no_title'){
|
|
$title = $links[1][$i];
|
|
$link = $links[1][$i];
|
|
// change backticked links to avoid being parsed in the same way as non-backticked links
|
|
$markdown = str_replace('`'.$match.'`','SS'.$link.'SS',$markdown);
|
|
} else {
|
|
$title = $links[1][$i];
|
|
$link = $links[2][$i];
|
|
// change backticked links to avoid being parsed in the same way as non-backticked links
|
|
$markdown = str_replace('`'.$match.'`','XX'.$title.'YY'.$link.'ZZ',$markdown);
|
|
}
|
|
$html = sprintf($html_format, $link, $version, $module, $title);
|
|
$markdown = str_replace($match,$html,$markdown);
|
|
}
|
|
}
|
|
}
|
|
|
|
// recover backticked links with no titles
|
|
preg_match_all('#SS(.*)?SS#', $markdown, $links);
|
|
if($links) {
|
|
foreach($links[0] as $i => $match) {
|
|
$link = $links[1][$i];
|
|
$markdown = str_replace($match,'`[api:'.$link.']`',$markdown);
|
|
}
|
|
}
|
|
|
|
// recover backticked links with titles
|
|
preg_match_all('#XX(.*)?YY(.*)?ZZ#', $markdown, $links);
|
|
if($links) {
|
|
foreach($links[0] as $i => $match) {
|
|
$title = $links[1][$i];
|
|
$link = $links[2][$i];
|
|
$markdown = str_replace($match,'`['.$title.'](api:'.$link.')`',$markdown);
|
|
}
|
|
}
|
|
|
|
return $markdown;
|
|
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
public static function rewrite_heading_anchors($md, $page)
|
|
{
|
|
$re = '/^\#+(.*)/m';
|
|
$md = preg_replace_callback($re, array('DocumentationParser', '_rewrite_heading_anchors_callback'), $md);
|
|
|
|
return $md;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
public static function _rewrite_heading_anchors_callback($matches)
|
|
{
|
|
$heading = $matches[0];
|
|
$headingText = $matches[1];
|
|
|
|
if (preg_match('/\{\#.*\}/', $headingText)) {
|
|
return $heading;
|
|
}
|
|
|
|
if (!isset(self::$heading_counts[$headingText])) {
|
|
self::$heading_counts[$headingText] = 1;
|
|
} else {
|
|
self::$heading_counts[$headingText]++;
|
|
$headingText .= "-" . self::$heading_counts[$headingText];
|
|
}
|
|
|
|
return sprintf("%s {#%s}", preg_replace('/\n/', '', $heading), self::generate_html_id($headingText));
|
|
}
|
|
|
|
/**
|
|
* Generate an html element id from a string
|
|
*
|
|
* @return String
|
|
*/
|
|
public static function generate_html_id($title)
|
|
{
|
|
$t = $title;
|
|
$t = str_replace('&', '-and-', $t);
|
|
$t = str_replace('&', '-and-', $t);
|
|
$t = preg_replace('/[^A-Za-z0-9]+/', '-', $t);
|
|
$t = preg_replace('/-+/', '-', $t);
|
|
$t = trim($t, '-');
|
|
$t = strtolower($t);
|
|
|
|
return $t;
|
|
}
|
|
|
|
/**
|
|
* Resolves all relative links within markdown.
|
|
*
|
|
* @param String $md Markdown content
|
|
* @param DocumentationPage $page
|
|
*
|
|
* @return String Markdown
|
|
*/
|
|
public static function rewrite_relative_links($md, $page)
|
|
{
|
|
$baselink = $page->getEntity()->Link();
|
|
|
|
$re = '/
|
|
([^\!]?) # exclude image format
|
|
\[
|
|
(.*?) # link title (non greedy)
|
|
\]
|
|
\(
|
|
(.*?) # link url (non greedy)
|
|
\)
|
|
/x';
|
|
preg_match_all($re, $md, $matches);
|
|
|
|
// relative path (relative to module base folder), without the filename.
|
|
// For "sapphire/en/current/topics/templates", this would be "templates"
|
|
$relativePath = dirname($page->getRelativePath());
|
|
|
|
if (strpos($page->getRelativePath(), 'index.md')) {
|
|
$relativeLink = $page->getRelativeLink();
|
|
} else {
|
|
$relativeLink = dirname($page->getRelativeLink());
|
|
}
|
|
|
|
if ($relativePath == '.') {
|
|
$relativePath = '';
|
|
}
|
|
|
|
if ($relativeLink == ".") {
|
|
$relativeLink = '';
|
|
}
|
|
|
|
// file base link
|
|
$fileBaseLink = Director::makeRelative(dirname($page->getPath()));
|
|
|
|
if ($matches) {
|
|
foreach ($matches[0] as $i => $match) {
|
|
$title = $matches[2][$i];
|
|
$url = $matches[3][$i];
|
|
|
|
// Don't process API links
|
|
if (preg_match('/^api:/', $url)) {
|
|
continue;
|
|
}
|
|
|
|
// Don't process absolute links (based on protocol detection)
|
|
$urlParts = parse_url($url);
|
|
if ($urlParts && isset($urlParts['scheme'])) {
|
|
continue;
|
|
}
|
|
|
|
// for images we need to use the file base path
|
|
if (preg_match('/_images/', $url)) {
|
|
$relativeUrl = Controller::join_links(
|
|
Director::absoluteBaseURL(),
|
|
$fileBaseLink,
|
|
$url
|
|
);
|
|
} else {
|
|
// Rewrite public URL
|
|
if (preg_match('/^\//', $url)) {
|
|
// Absolute: Only path to module base
|
|
$relativeUrl = Controller::join_links($baselink, $url, '/');
|
|
} else {
|
|
// Relative: Include path to module base and any folders
|
|
$relativeUrl = Controller::join_links($baselink, $relativeLink, $url, '/');
|
|
}
|
|
}
|
|
|
|
// Resolve relative paths
|
|
while (strpos($relativeUrl, '..') !== false) {
|
|
$relativeUrl = preg_replace('/[-\w]+\/\.\.\//', '', $relativeUrl);
|
|
}
|
|
|
|
// Replace any double slashes (apart from protocol)
|
|
$relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl);
|
|
|
|
// Replace in original content
|
|
$md = str_replace(
|
|
$match,
|
|
sprintf('%s[%s](%s)', $matches[1][$i], $title, $relativeUrl),
|
|
$md
|
|
);
|
|
}
|
|
}
|
|
|
|
return $md;
|
|
}
|
|
|
|
/**
|
|
* Strips out the metadata for a page
|
|
*
|
|
* @param DocumentationPage
|
|
*/
|
|
public static function retrieve_meta_data(DocumentationPage &$page)
|
|
{
|
|
if ($md = $page->getMarkdown()) {
|
|
$matches = preg_match_all('/
|
|
(?<key>[A-Za-z0-9_-]+):
|
|
\s*
|
|
(?<value>.*)
|
|
/x', $md, $meta);
|
|
|
|
if ($matches) {
|
|
foreach ($meta['key'] as $index => $key) {
|
|
if (isset($meta['value'][$index])) {
|
|
$page->setMetaData($key, $meta['value'][$index]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|