From 3d90271b31c25d99d6178f967a6ca3b8ee77eeda Mon Sep 17 00:00:00 2001 From: Will Rossiter Date: Tue, 21 Dec 2010 09:42:44 +0000 Subject: [PATCH] FEATURE: initial cut of Zend_Lucene_Search backend for documentation search. --- code/DocumentationEntity.php | 22 +- code/DocumentationPage.php | 93 +++++- code/DocumentationParser.php | 279 ++++------------- code/DocumentationSearch.php | 261 +++++++++------- code/DocumentationService.php | 293 +++++++++++++++--- code/DocumentationViewer.php | 61 ++-- .../Layout/DocumentationViewer_results.ss | 16 + tests/DocumentationParserTest.php | 32 +- tests/DocumentationSearchTest.php | 30 ++ tests/DocumentationServiceTest.php | 68 ++++ ...rTests.php => DocumentationViewerTest.php} | 8 +- tests/docs-search/en/index.md | 0 tests/docs-search/en/query-in-content.md | 1 + tests/docs-search/en/query-search.md | 0 .../en/subfolder/foo-folder/third-foo.md | 0 tests/docs-search/en/subfolder/foo.md | 1 + 16 files changed, 724 insertions(+), 441 deletions(-) create mode 100644 templates/Layout/DocumentationViewer_results.ss create mode 100644 tests/DocumentationSearchTest.php create mode 100644 tests/DocumentationServiceTest.php rename tests/{DocumentationViewerTests.php => DocumentationViewerTest.php} (97%) create mode 100644 tests/docs-search/en/index.md create mode 100644 tests/docs-search/en/query-in-content.md create mode 100644 tests/docs-search/en/query-search.md create mode 100644 tests/docs-search/en/subfolder/foo-folder/third-foo.md create mode 100644 tests/docs-search/en/subfolder/foo.md diff --git a/code/DocumentationEntity.php b/code/DocumentationEntity.php index 8ffccd5..9511523 100644 --- a/code/DocumentationEntity.php +++ b/code/DocumentationEntity.php @@ -23,22 +23,22 @@ class DocumentationEntity extends ViewableData { ); /** - * @var String $module folder name + * @var string $module folder name */ private $moduleFolder; /** - * @var String $title nice title + * @var string $title nice title */ private $title; /** - * @var Array $version version numbers and the paths to each + * @var array $version version numbers and the paths to each */ private $versions = array(); /** - * @var Array + * @var array */ private $currentVersion; @@ -51,13 +51,14 @@ class DocumentationEntity extends ViewableData { * Constructor. You do not need to pass the langs to this as * it will work out the languages from the filesystem * - * @param String $module name of module - * @param String $version version of this module - * @param String $path Absolute path to this module (excluding language folders) + * @param string $module name of module + * @param string $version version of this module + * @param string $path Absolute path to this module (excluding language folders) + * @param string $title */ - function __construct($module, $version = '', $path, $title = false) { + function __construct($module, $version, $path, $title = false) { $this->addVersion($version, $path); - $this->title = (!$title) ? $this->module : $title; + $this->title = (!$title) ? $module : $title; $this->moduleFolder = $module; } @@ -224,6 +225,9 @@ class DocumentationEntity extends ViewableData { return rtrim($path, '/') . '/' . rtrim($lang, '/') .'/'; } + /** + * @return string + */ function __toString() { return sprintf('DocumentationEntity: %s)', $this->getPath()); } diff --git a/code/DocumentationPage.php b/code/DocumentationPage.php index cf4e705..284986f 100644 --- a/code/DocumentationPage.php +++ b/code/DocumentationPage.php @@ -1,9 +1,8 @@ entity; } + /** + * @param DocumentationEntity + */ function setEntity($entity) { $this->entity = $entity; } /** - * @return String Relative path to file or folder within the entity (including file extension), - * but excluding version or language folders. + * @return string */ function getRelativePath() { return $this->relativePath; } + /** + * @param string + */ function setRelativePath($path) { $this->relativePath = $path; } @@ -55,21 +66,60 @@ class DocumentationPage extends ViewableData { /** * Absolute path including version and lang folder. * - * @return String + * @throws InvalidArgumentException + * + * @return string */ function getPath() { - $path = realpath(rtrim($this->entity->getPath($this->version, $this->lang), '/') . '/' . trim($this->getRelativePath(), '/')); + if($this->fullPath) { + return $this->fullPath; + } + elseif($this->entity) { + $path = realpath(rtrim($this->entity->getPath($this->version, $this->lang), '/') . '/' . trim($this->getRelativePath(), '/')); - if(!file_exists($path)) { - throw new InvalidArgumentException(sprintf( - 'Path could not be found. Module path: %s, file path: %s', - $this->entity->getPath(), - $this->relativePath - )); + if(!file_exists($path)) { + throw new InvalidArgumentException(sprintf( + 'Path could not be found. Module path: %s, file path: %s', + $this->entity->getPath(), + $this->relativePath + )); + } + } + else { + $path = $this->relativePath; } return $path; } + + /** + * Absolute path including version and lang to the file to read + * off the file system. In the case of a folder this is the index.md file + * + * @return string + */ + function getFilePath() { + $path = $this->getPath(); + + if(!is_dir($path)) return $path; + + if($entity = $this->getEntity()) { + if($relative = $this->getRelativePath()) { + return DocumentationService::find_page($entity, explode($relative, '/')); + } + else { + $parts = str_replace($entity->getPath($this->version, $this->lang), '', $this->fullPath); + + return DocumentationService::find_page($entity, explode($parts, '/')); + } + } + + return rtrim($path, '/') . '/index.md'; + } + + function setFullPath($path) { + $this->fullPath = $path; + } function getLang() { return $this->lang; @@ -86,14 +136,22 @@ class DocumentationPage extends ViewableData { function setVersion($version) { $this->version = $version; } - + + function setTitle($title) { + $this->title = $title; + } + + function getTitle() { + return $this->title; + } + /** * @return String */ function getMarkdown() { try { - $path = $this->getPath(); - + $path = $this->getFilePath(); + return file_get_contents($path); } catch(InvalidArgumentException $e) {} @@ -106,6 +164,7 @@ class DocumentationPage extends ViewableData { * @return String */ function getHTML($baselink = null) { - return DocumentationParser::parse($this, $baselink); + // if this is not a directory then we can to parse the file + return DocumentationParser::parse($this->getFilePath(), $baselink); } } \ No newline at end of file diff --git a/code/DocumentationParser.php b/code/DocumentationParser.php index e915123..eebb443 100644 --- a/code/DocumentationParser.php +++ b/code/DocumentationParser.php @@ -1,15 +1,11 @@ getMarkdown(); - - // Pre-processing - $md = self::rewrite_image_links($md, $page); - $md = self::rewrite_relative_links($md, $page, $baselink); - $md = self::rewrite_api_links($md, $page); - $md = self::rewrite_heading_anchors($md, $page); - // $md = self::rewrite_code_blocks($md, $page); - - require_once('../sapphiredocs/thirdparty/markdown.php'); - $html = Markdown($md); + if(!$page || (!$page instanceof DocumentationPage)) return false; - return $html; + $md = $page->getMarkdown(); + + // Pre-processing + $md = self::rewrite_image_links($md, $page); + $md = self::rewrite_relative_links($md, $page, $baselink); + $md = self::rewrite_api_links($md, $page); + $md = self::rewrite_heading_anchors($md, $page); + // $md = self::rewrite_code_blocks($md, $page); + + require_once('../sapphiredocs/thirdparty/markdown.php'); + $html = Markdown($md); + + return $html; } /* @@ -157,15 +155,17 @@ class DocumentationParser { `? /x'; preg_match_all($re, $md, $linksWithTitles); - if($linksWithTitles) foreach($linksWithTitles[0] as $i => $match) { - $title = $linksWithTitles[1][$i]; - $subject = $linksWithTitles[2][$i]; - $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder()); - $md = str_replace( - $match, - sprintf('[%s](%s)', $title, $url), - $md - ); + if($linksWithTitles) { + foreach($linksWithTitles[0] as $i => $match) { + $title = $linksWithTitles[1][$i]; + $subject = $linksWithTitles[2][$i]; + $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder()); + $md = str_replace( + $match, + sprintf('[%s](%s)', $title, $url), + $md + ); + } } // Bare links @@ -177,14 +177,16 @@ class DocumentationParser { `? /x'; preg_match_all($re, $md, $links); - if($links) foreach($links[0] as $i => $match) { - $subject = $links[1][$i]; - $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder()); - $md = str_replace( - $match, - sprintf('[%s](%s)', $subject, $url), - $md - ); + if($links) { + foreach($links[0] as $i => $match) { + $subject = $links[1][$i]; + $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getModuleFolder()); + $md = str_replace( + $match, + sprintf('[%s](%s)', $subject, $url), + $md + ); + } } return $md; @@ -259,187 +261,42 @@ class DocumentationParser { $relativePath = dirname($page->getRelativePath()); if($relativePath == '.') $relativePath = ''; - if($matches) foreach($matches[0] as $i => $match) { - $title = $matches[2][$i]; - $url = $matches[3][$i]; + if($matches) { + foreach($matches[0] as $i => $match) { + $title = $matches[2][$i]; + $url = $matches[3][$i]; - // Don't process API links - if(preg_match('/^api:/', $url)) continue; + // Don't process API links + if(preg_match('/^api:/', $url)) continue; - // Don't process absolute links (based on protocol detection) - $urlParts = parse_url($url); - if($urlParts && isset($urlParts['scheme'])) continue; + // Don't process absolute links (based on protocol detection) + $urlParts = parse_url($url); + if($urlParts && isset($urlParts['scheme'])) continue; - // Rewrite URL (relative or absolute) - if(preg_match('/^\//', $url)) { - $relativeUrl = $baselink . $url; - } else { - $relativeUrl = $baselink . '/' . $relativePath . '/' . $url; + // Rewrite URL (relative or absolute) + if(preg_match('/^\//', $url)) { + $relativeUrl = $baselink . $url; + } else { + $relativeUrl = $baselink . '/' . $relativePath . '/' . $url; + } + + // Resolve relative paths + while(strpos($relativeUrl, '..') !== FALSE) { + $relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl); + } + + // Replace any double slashes (apart from protocol) + $relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl); + + // Replace in original content + $md = str_replace( + $match, + sprintf('%s[%s](%s)', $matches[1][$i], $title, $relativeUrl), + $md + ); } - - // Resolve relative paths - while(strpos($relativeUrl, '..') !== FALSE) { - $relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl); - } - - // Replace any double slashes (apart from protocol) - $relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl); - - // Replace in original content - $md = str_replace( - $match, - sprintf('%s[%s](%s)', $matches[1][$i], $title, $relativeUrl), - $md - ); } return $md; } - - /** - * Find a documentation page given a path and a file name. It ignores the extensions - * and simply compares the title. - * - * Name may also be a path /install/foo/bar. - * - * @param String $modulePath Absolute path to the entity - * @param Array $path path to the file in the entity - * - * @return String|false - File path - */ - static function find_page($modulePath, $path) { - return self::find_page_recursive($modulePath, $path); - } - - /** - * Recursive function for finding the goal - */ - private static function find_page_recursive($base, $goal) { - $handle = opendir($base); - - $name = strtolower(array_shift($goal)); - - if(!$name) $name = 'index'; - - if($handle) { - $extensions = DocumentationService::get_valid_extensions(); - - while (false !== ($file = readdir($handle))) { - if(in_array($file, DocumentationService::get_valid_extensions())) continue; - - $formatted = strtolower($file); - - // if the name has a . then take the substr - $formatted = ($pos = strrpos($formatted, '.')) ? substr($formatted, 0, $pos) : $formatted; - $name = ($dot = strrpos($name, '.')) ? substr($name, 0, $dot) : $name; - - // the folder is the one that we are looking for. - if(strtolower($name) == strtolower($formatted)) { - if(is_dir($base . $file)) { - // if this is a directory check that there is any more states to get - // to in the goal. If none then what we want is the 'index.md' file - if(count($goal) > 0) { - return self::find_page_recursive($base . $file, $goal); - } - else { - // recurse but check for an index.md file next time around - return self::find_page_recursive($base . $file, array('index')); - } - } - else { - // goal state. End of recursion - $result = $base .'/'. $file; - - return $result; - } - } - } - } - - closedir($handle); - } - - /** - * String helper for cleaning a file name to a readable version. - * - * @param String $name to convert - * - * @return String $name output - */ - public static function clean_page_name($name) { - // remove dashs and _ - $name = str_replace(array('-', '_'), ' ', $name); - - // remove extension - $hasExtension = strrpos($name, '.'); - - if($hasExtension !== false && $hasExtension > 0) { - $name = substr($name, 0, $hasExtension); - } - - // convert first letter - return ucfirst(trim($name)); - } - - - /** - * Return the children from a given module sorted by Title using natural ordering. - * It is used for building the tree of the page. - * - * @param String module name - * @param bool Recursive search - * @param DataObjectSet set of pages matched so far - * - * @throws Exception - * @return DataObjectSet - */ - public static function get_pages_from_folder($folder, $recursive = false, &$pages = false) { - $output = new DataObjectSet(); - - if(!$pages) $pages = new DataObjectSet(); - - if(!is_dir($folder)) throw new Exception(sprintf('%s is not a folder', $folder)); - - $handle = opendir($folder); - - if($handle) { - $extensions = DocumentationService::get_valid_extensions(); - $ignore = DocumentationService::get_ignored_files(); - $files = array(); - - while (false !== ($file = readdir($handle))) { - if(!in_array($file, $ignore)) { - $file = trim(strtolower($file), '/'); - $path = rtrim($folder, '/') . '/'. $file; - - if($recursive && is_dir($path)) { - self::get_pages_from_folder($path, true, $pages); - } - else { - $files[] = $file; - } - } - } - - natsort($files); - - if($files) { - foreach($files as $file) { - $clean = ($pos = strrpos($file, '.')) ? substr($file, 0, $pos) : $file; - $path = rtrim($folder, '/') . '/'. $file; - - $pages->push(new ArrayData(array( - 'Title' => self::clean_page_name($file), - 'Filename' => $clean, - 'Path' => $path - ))); - } - } - - } - - closedir($handle); - - return $pages; - } } \ No newline at end of file diff --git a/code/DocumentationSearch.php b/code/DocumentationSearch.php index d1a066d..ae35f15 100644 --- a/code/DocumentationSearch.php +++ b/code/DocumentationSearch.php @@ -1,149 +1,198 @@ 'Text' - ); + private $totalResults; - static $allowed_actions = array('xml', 'search'); /** - * @var array Cached search results - */ - private $searchCache = array(); - - /** - * @var Int Page Length - */ - private $pageLength = 10; - - /** - * Generates the XML tree for {@link Sphinx} XML Pipes + * Folder name for indexes (in the temp folder). You can override it using + * {@link DocumentationSearch::set_index_location($)} * - * @uses DomDocument + * @var string */ - function xml() { - DocumentationService::load_automatic_registration(); - - $dom = new DomDocument('1.0'); - $dom->encoding = "utf-8"; - $dom->formatOutput = true; - $root = $dom->appendChild($dom->createElementNS('http://sphinxsearch.com', 'sphinx:docset')); - - $schema = $dom->createElement('sphinx:schema'); - - $field = $dom->createElement('sphinx:field'); - $attr = $dom->createElement('sphinx:attr'); - - foreach(array('Title','Content', 'Language', 'Module', 'Path') as $field) { - $node = $dom->createElement('sphinx:field'); - $node->setAttribute('name', strtolower($field)); - - $schema->appendChild($node); - } - - $root->appendChild($schema); - - // go through each documentation page and add it to index - $pages = $this->getAllDocumentationPages(); - - if($pages) { - foreach($pages as $doc) { - $node = $dom->createElement('sphinx:document'); - - $node->setAttribute('id', $doc->ID); - - foreach($doc->getArray() as $key => $value) { - $key = strtolower($key); - if($key == 'id') continue; - - $tmp = $dom->createElement($key); - $tmp->appendChild($dom->createTextNode($value)); - - $node->appendChild($tmp); - } - - $root->appendChild($node); - } - } - - return $dom->saveXML(); - } + private static $index_location = 'sapphiredocs'; + + static $allowed_actions = array( + 'buildindex' + ); /** * Generate an array of every single documentation page installed on the system. * - * @todo Add version support - * - * @return array + * @return DataObjectSet */ - private function getAllDocumentationPages() { + static function get_all_documentation_pages() { + DocumentationService::load_automatic_registration(); + $modules = DocumentationService::get_registered_modules(); $output = new DataObjectSet(); - if($modules) { foreach($modules as $module) { + foreach($module->getLanguages() as $language) { try { - $pages = DocumentationParser::get_pages_from_folder($module->getPath(false, $language)); - + $pages = DocumentationService::get_pages_from_folder($module); + if($pages) { foreach($pages as $page) { - $output->push(new ArrayData(array( - 'Title' => $page->Title, - 'Content' => file_get_contents($page->Path), - 'Path' => $page->Path, - 'Language' => $language, - 'ID' => base_convert(substr(md5($page->Path), -8), 16, 10) - ))); + $output->push($page); } } } - catch(Exception $e) {} + catch(Exception $e) { + user_error($e, E_USER_WARNING); + } } } } - + return $output; } + + /** + * Enable searching documentation + */ + public static function enable() { + if(!class_exists('ZendSearchLuceneSearchable')) { + return user_error('DocumentationSearch requires the ZendSearchLucene library', E_ERROR); + } + + self::$enabled = true; + + ZendSearchLuceneSearchable::enable(array()); + } + + /** + * @return bool + */ + public static function enabled() { + return self::$enabled; + } + + /** + * @param string + */ + public function set_index($index) { + self::$index_location = $index; + } /** - * Takes a search from the URL, performs a sphinx search and displays a search results - * template. - * - * @todo Add additional language / version filtering + * @return string */ - function search() { - $query = (isset($this->urlParams['ID'])) ? $this->urlParams['ID'] : false; - $results = false; - $keywords = ""; + public function get_index_location() { + return TEMP_FOLDER . '/'. trim(self::$index_location, '/'); + } + + /** + * Perform a search query on the index + * + * Rebuilds the index if it out of date + */ + public function performSearch($query) { + $this->buildindex(); + $index = Zend_Search_Lucene::open(self::get_index_location()); - if($query) { - $keywords = urldecode($query); + Zend_Search_Lucene::setResultSetLimit(200); + + $results = $index->find($query); - $start = isset($_GET['start']) ? (int)$_GET['start'] : 0; - - $cachekey = $query.':'.$start; + $this->results = new DataObjectSet(); + $this->totalResults = $index->numDocs(); + + foreach($results as $result) { + $data = $result->getDocument(); - if(!isset($this->searchCache[$cachekey])) { - $this->searchCache[$cachekey] = SphinxSearch::search('DocumentationPage', $keywords, array_merge_recursive(array( - 'start' => $start, - 'pagesize' => $this->pageLength - ))); - } - - $results = $this->searchCache[$cachekey]; + $this->results->push(new ArrayData(array( + 'Title' => DBField::create('Varchar', $data->Title), + 'Link' => DBField::create('Varchar',$data->Path), + 'Language' => DBField::create('Varchar',$data->Language), + 'Version' => DBField::create('Varchar',$data->Version) + ))); } + } + + /** + * @return DataObjectSet + */ + public function getResults($start) { + return $this->results; + } + + /** + * @return int + */ + public function getTotalResults() { + return (int) $this->totalResults; + } + + /** + * Builds the document index + */ + public function buildIndex() { + ini_set("memory_limit", -1); + ini_set('max_execution_time', 0); - return array( - 'Query' => DBField::create('Text', $keywords), - 'Results' => $results - ); + // only rebuild the index if we have to. Check for either flush or the time write.lock.file + // was last altered + $lock = self::get_index_location() .'/write.lock.file'; + $lockFileFresh = (file_exists($lock) && filemtime($lock) > (time() - (60 * 60 * 24))); + + if($lockFileFresh && !isset($_REQUEST['flush'])) return true; + + try { + $index = Zend_Search_Lucene::open(self::get_index_location()); + $index->removeReference(); + } + catch (Zend_Search_Lucene_Exception $e) { + + } + + try { + $index = Zend_Search_Lucene::create(self::get_index_location()); + } + catch(Zend_Search_Lucene_Exception $c) { + user_error($c); + } + + // includes registration + $pages = self::get_all_documentation_pages(); + + if($pages) { + $count = 0; + foreach($pages as $page) { + $count++; + + if(!is_dir($page->getPath())) { + var_dump("Indexing ". $page->getPath()); + $doc = Zend_Search_Lucene_Document_Html::loadHTML($page->getHtml()); + $doc->addField(Zend_Search_Lucene_Field::Text('Title', $page->getTitle())); + $doc->addField(Zend_Search_Lucene_Field::Keyword('Version', $page->getVersion())); + $doc->addField(Zend_Search_Lucene_Field::Keyword('Language', $page->getLang())); + $doc->addField(Zend_Search_Lucene_Field::Keyword('Path', $page->getPath())); + $index->addDocument($doc); + } + else { + var_dump("Not Indexing ". $page->getPath()); + } + } + } + + $index->commit(); + } + + public function optimizeIndex() { + $index = Zend_Search_Lucene::open(self::get_index_location()); + + if($index) $index->optimize(); } } \ No newline at end of file diff --git a/code/DocumentationService.php b/code/DocumentationService.php index c8ca249..4cc0ec0 100644 --- a/code/DocumentationService.php +++ b/code/DocumentationService.php @@ -25,47 +25,19 @@ class DocumentationService { 'de' => 'German' ); - /** * Files to ignore from any documentation listing. * - * @var Array + * @var array */ private static $ignored_files = array('.', '..', '.DS_Store', '.svn', '.git', 'assets', 'themes', '_images'); - - /** - * Set the ignored files list - * - * @param Array - */ - public function set_ignored_files($files) { - self::$ignored_files = $files; - } - - /** - * Return the list of files which are ignored - * - * @return Array - */ - public function get_ignored_files() { - return self::$ignored_files; - } - + /** * Case insenstive values to use as extensions on markdown pages. * - * @var Array + * @var array */ public static $valid_markdown_extensions = array('.md', '.txt', '.markdown'); - - /** - * Return the allowed extensions - * - * @return Array - */ - public static function get_valid_extensions() { - return self::$valid_markdown_extensions; - } /** * Registered modules to include in the documentation. Either pre-filled by the @@ -74,7 +46,7 @@ class DocumentationService { * * You can remove registered modules using {@link DocumentationService::unregister()} * - * @var Array + * @var array */ private static $registered_modules = array(); @@ -83,19 +55,46 @@ class DocumentationService { * the documentation but for sapphire/cms and overall we need to register major * versions via {@link DocumentationService::register} * - * @var Array + * @var array */ private static $major_versions = array(); /** * Return the major versions * - * @return Array + * @return array */ public static function get_major_versions() { return self::$major_versions; } + /** + * Return the allowed extensions + * + * @return array + */ + public static function get_valid_extensions() { + return self::$valid_markdown_extensions; + } + + /** + * Set the ignored files list + * + * @param array + */ + public function set_ignored_files($files) { + self::$ignored_files = $files; + } + + /** + * Return the list of files which are ignored + * + * @return array + */ + public function get_ignored_files() { + return self::$ignored_files; + } + /** * Check to see if a given language is registered in the system * @@ -181,6 +180,12 @@ class DocumentationService { */ public static function set_automatic_registration($bool = true) { self::$automatic_registration = $bool; + + if(!$bool) { + // remove current registed modules when disabling automatic registration + // needed to avoid caching issues when running all the tests + self::$registered_modules = array(); + } } /** @@ -227,8 +232,11 @@ class DocumentationService { * @return DocumentationEntity $module the registered module */ public static function is_registered_module($module, $version = false, $lang = false) { - if(isset(self::$registered_modules[$module])) { - $module = self::$registered_modules[$module]; + + $check = ($module instanceof DocumentationEntity) ? $module->getModuleFolder() : (string) $module; + + if(isset(self::$registered_modules[$check])) { + $module = self::$registered_modules[$check]; if($lang && !$module->hasLanguage($lang)) return false; if($version && !$module->hasVersion($version)) return false; @@ -250,7 +258,7 @@ class DocumentationService { */ public static function register($module, $path, $version = '', $title = false, $major = false) { if(!file_exists($path)) throw new InvalidArgumentException(sprintf('Path "%s" doesn\'t exist', $path)); - + // add the module to the registered array if(!isset(self::$registered_modules[$module])) { // module is completely new @@ -315,7 +323,7 @@ class DocumentationService { if($modules) { foreach($modules as $key => $module) { - if(is_dir(BASE_PATH .'/'. $module) && !in_array($module, self::$ignored_files, true)) { + if(is_dir(BASE_PATH .'/'. $module) && !in_array($module, self::get_ignored_files(), true)) { // check to see if it has docs $docs = BASE_PATH .'/'. $module .'/docs/'; @@ -337,4 +345,213 @@ class DocumentationService { public static function get_language_title($lang) { return (isset(self::$language_mapping[$lang])) ? _t("DOCUMENTATIONSERVICE.LANG-$lang", self::$language_mapping[$lang]) : $lang; } + + + /** + * Find a documentation page given a path and a file name. It ignores the extensions + * and simply compares the title. + * + * Name may also be a path /install/foo/bar. + * + * @param DocumentationEntity + * @param array exploded url string + * + * @return String|false - File path + */ + static function find_page($module, $path) { + if($module = self::is_registered_module($module)) { + return self::find_page_recursive($module->getPath(), $path); + } + + return false; + } + + /** + * Recursive function for finding the goal of a path to a documentation + * page + * + * @return string + */ + private static function find_page_recursive($base, $goal) { + $handle = opendir($base); + + $name = strtolower(array_shift($goal)); + if(!$name || $name == '/') $name = 'index'; + + if($handle) { + $extensions = DocumentationService::get_valid_extensions(); + $firstFile = false; + + // ensure we end with a slash + $base = rtrim($base, '/') .'/'; + + while (false !== ($file = readdir($handle))) { + + if(in_array($file, DocumentationService::get_valid_extensions())) continue; + + if(!$firstFile && !is_dir($base . $file)) $firstFile = $file; + + $formatted = strtolower($file); + + // if the name has a . then take the substr + $formatted = ($pos = strrpos($formatted, '.')) ? substr($formatted, 0, $pos) : $formatted; + $name = ($dot = strrpos($name, '.')) ? substr($name, 0, $dot) : $name; + + // the folder is the one that we are looking for. + if(strtolower($name) == strtolower($formatted)) { + + // if this file is a directory we could be displaying that + // or simply moving towards the goal. + if(is_dir($base . $file)) { + + $base = $base . trim($file, '/') .'/'; + + // if this is a directory check that there is any more states to get + // to in the goal. If none then what we want is the 'index.md' file + if(count($goal) > 0) { + return self::find_page_recursive($base, $goal); + } + else { + // recurse but check for an index.md file next time around + return self::find_page_recursive($base, array('index')); + } + } + else { + // goal state. End of recursion. + // tidy up the URLs with single trailing slashes + $result = $base . ltrim($file, '/'); + + if(is_dir($result)) $result = (rtrim($result, '/') . '/'); + + return $result; + } + } + } + } + + closedir($handle); + + // if goal has not been found and the index.md file does not exist then the next + // option is to pick the first file in the folder + return $base . ltrim($file, '/'); + } + + /** + * String helper for cleaning a file name to a readable version. + * + * @param String $name to convert + * + * @return String $name output + */ + public static function clean_page_name($name) { + // remove dashs and _ + $name = str_replace(array('-', '_'), ' ', $name); + + // remove extension + $name = self::trim_extension_off($name); + + // convert first letter + return ucfirst(trim($name)); + } + + /** + * Helper function to strip the extension off + * + * @param string + * + * @return string + */ + public static function trim_extension_off($name) { + $hasExtension = strrpos($name, '.'); + + if($hasExtension !== false && $hasExtension > 0) { + $name = substr($name, 0, $hasExtension); + } + + return $name; + } + + + /** + * Return the children from a given module sorted by Title using natural ordering. + * It is used for building the tree of the page. + * + * @param string|DocumentationEntity path + * @param bool enable several recursive calls (more than 1 level) + * @throws Exception + * @return DataObjectSet + */ + public static function get_pages_from_folder($module, $recursive = true) { + $output = new DataObjectSet(); + + $pages = array(); + if($module instanceof DocumentationEntity) { + if(self::is_registered_module($module)) { + self::get_pages_from_folder_recursive($module->getPath(), $module, $recursive, $pages); + } + else { + return user_error("$module is not registered", E_USER_WARNING); + } + } + else { + self::get_pages_from_folder_recursive($module, false, $recursive, $pages); + } + + if(count($pages) > 0) { + natsort($pages); + + foreach($pages as $key => $path) { + // get file name from the path + $file = ($pos = strrpos($path, '/')) ? substr($path, $pos + 1) : $path; + + // trim off the extension + + $page = new DocumentationPage(); + $page->setTitle(self::clean_page_name($file)); + $page->setFullPath($path); + $page->Filename = self::trim_extension_off($file); + + if($module instanceof DocumentationEntity) { + $page->setEntity($module); + } + + $output->push($page); + } + } + + return $output; + } + + /** + * Recursively search through $folder + */ + private static function get_pages_from_folder_recursive($folder, $module = false, $recusive, &$pages) { + if(!is_dir($folder)) throw new Exception(sprintf('%s is not a folder', $folder)); + + $handle = opendir($folder); + + if($handle) { + $extensions = self::get_valid_extensions(); + $ignore = self::get_ignored_files(); + $files = array(); + + while (false !== ($file = readdir($handle))) { + if(!in_array($file, $ignore)) { + $file = trim(strtolower($file), '/'); + $path = rtrim($folder, '/') . '/'. $file; + + if(is_dir($path)) { + $pages[] = $path; + + if($recusive) self::get_pages_from_folder_recursive($path, $module, $recusive, $pages); + } + else if(in_array(substr($file, (strrpos($file, '.'))), $extensions)) { + $pages[] = $path; + } + } + } + } + + closedir($handle); + } } \ No newline at end of file diff --git a/code/DocumentationViewer.php b/code/DocumentationViewer.php index 0eaf3a7..b773814 100755 --- a/code/DocumentationViewer.php +++ b/code/DocumentationViewer.php @@ -8,26 +8,18 @@ * * For more documentation on how to use this class see the documentation in /sapphiredocs/docs folder * - * To view the documentation in the browser use: - * - * http://yoursite.com/dev/docs/ Which is locked to ADMIN only - * - * @todo - Add ability to have docs on the front end as the main site. - * - Fix Language Selector (enabling it troubles the handleRequest when submitting) - * - SS_HTTPRequest when we ask for 10 params it gives us 10. Could be 10 blank ones. - * It would mean I could save alot of code if it only gave back an array of size X - * up to a maximum of 10... - * * @package sapphiredocs */ class DocumentationViewer extends Controller { static $allowed_actions = array( + 'home', 'LanguageForm', 'doLanguageForm', 'handleRequest', - 'DocumentationSearchForm' + 'DocumentationSearchForm', + 'results' ); /** @@ -104,7 +96,7 @@ class DocumentationViewer extends Controller { public function handleRequest(SS_HTTPRequest $request) { // if we submitted a form, let that pass if(!$request->isGET()) return parent::handleRequest($request); - + $firstParam = ($request->param('Action')) ? $request->param('Action') : $request->shift(); $secondParam = $request->shift(); $thirdParam = $request->shift(); @@ -351,10 +343,11 @@ class DocumentationViewer extends Controller { */ function getPage() { $module = $this->getModule(); + if(!$module) return false; - $absFilepath = DocumentationParser::find_page($module->getPath(), $this->Remaining); - + $absFilepath = DocumentationService::find_page($module, $this->Remaining); + if($absFilepath) { $relativeFilePath = str_replace($module->getPath(), '', $absFilepath); @@ -365,9 +358,9 @@ class DocumentationViewer extends Controller { $page->setVersion($this->Version); return $page; - } else { - return false; } + + return false; } /** @@ -379,7 +372,7 @@ class DocumentationViewer extends Controller { */ function getModulePages() { if($module = $this->getModule()) { - $pages = DocumentationParser::get_pages_from_folder($module->getPath()); + $pages = DocumentationService::get_pages_from_folder($module, false); if($pages) { foreach($pages as $page) { @@ -425,7 +418,7 @@ class DocumentationViewer extends Controller { $page->LinkingMode = (isset($this->Remaining[$level + 1])) ? 'section' : 'current'; if(is_dir($page->Path)) { - $children = DocumentationParser::get_pages_from_folder($page->Path); + $children = DocumentationService::get_pages_from_folder($page->Path, false); $segments = array(); for($x = 0; $x <= $level; $x++) { @@ -495,7 +488,7 @@ class DocumentationViewer extends Controller { if($i > 0) $path[] = $title; $output->push(new ArrayData(array( - 'Title' => DocumentationParser::clean_page_name($title), + 'Title' => DocumentationService::clean_page_name($title), 'Link' => $this->Link($path) ))); } @@ -605,24 +598,44 @@ class DocumentationViewer extends Controller { * @return Form */ function DocumentationSearchForm() { + if(!DocumentationSearch::enabled()) return false; + + $query = (isset($_REQUEST['Search'])) ? Convert::raw2xml($_REQUEST['Search']) : ""; $fields = new FieldSet( - new TextField('Search') + new TextField('Search', _t('DocumentationViewer.SEARCH', 'Search'), $query) ); $actions = new FieldSet( - new FormAction('doDocumentationSearchForm', 'Search') + new FormAction('results', 'Search') ); - return new Form($this, 'DocumentationSearchForm', $fields, $actions); + $form = new Form($this, 'DocumentationSearchForm', $fields, $actions); + $form->disableSecurityToken(); + + $form->setFormAction('home/DocumentationSearchForm'); + + return $form; } /** * Past straight to results, display and encode the query */ - function doDocumentationSearchForm($data, $form) { + function results($data, $form) { $query = (isset($data['Search'])) ? urlencode($data['Search']) : ""; + $start = (isset($_GET['start'])) ? (int) $_GET['start'] : 0; - $this->redirect('DocumentationSearch/search/'. $query); + $search = new DocumentationSearch(); + $search->performSearch($query); + + $results = $search->getResults($start); + $total = $search->getTotalResults(); + + echo $this->customise(array( + 'Results' => $results, + 'Query' => DBField::create('HTMLVarchar', $query), + 'Start' => DBField::create('HTMLVarchar', $start), + 'TotalResults' + ))->renderWith(array('DocumentationViewer_results', 'DocumentationViewer')); } } \ No newline at end of file diff --git a/templates/Layout/DocumentationViewer_results.ss b/templates/Layout/DocumentationViewer_results.ss new file mode 100644 index 0000000..c123142 --- /dev/null +++ b/templates/Layout/DocumentationViewer_results.ss @@ -0,0 +1,16 @@ +
+
+ <% if Results %> + <% control Results %> +

$Title

+ $Content.Summary + <% end_control %> + <% else %> +

No Results

+ <% end_if %> +
+ +
+ +
+
\ No newline at end of file diff --git a/tests/DocumentationParserTest.php b/tests/DocumentationParserTest.php index c792c60..bb5dd6c 100644 --- a/tests/DocumentationParserTest.php +++ b/tests/DocumentationParserTest.php @@ -204,37 +204,7 @@ class DocumentationParserTest extends SapphireTest { ); foreach($names as $key => $value) { - $this->assertEquals(DocumentationParser::clean_page_name($value), $should[$key]); + $this->assertEquals(DocumentationService::clean_page_name($value), $should[$key]); } } - - function testGetPagesFromFolder() { - $pages = DocumentationParser::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs/en/'); - $this->assertContains('index', $pages->column('Filename'), 'Index'); - $this->assertContains('subfolder', $pages->column('Filename'), 'Foldername'); - $this->assertContains('test', $pages->column('Filename'), 'Filename'); - $this->assertNotContains('_images', $pages->column('Filename'), 'Ignored files'); - - // test the order of pages - $pages = DocumentationParser::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs/en/sort'); - - $this->assertEquals( - array('1 basic', '2 intermediate', '3 advanced', '10 some page', '21 another page'), - $pages->column('Title') - ); - - } - - function testGetPagesFromFolderRecursive() { - $pages = DocumentationParser::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs-recursive/en/', true); - // check to see all the pages are found, we don't care about order - $this->assertEquals($pages->Count(), 6); - - $pages = $pages->column('Title'); - - foreach(array('Index', 'Subfolder testfile', 'Subsubfolder testfile', 'Testfile') as $expected) { - $this->assertContains($expected, $pages); - } - } - } \ No newline at end of file diff --git a/tests/DocumentationSearchTest.php b/tests/DocumentationSearchTest.php new file mode 100644 index 0000000..09e781f --- /dev/null +++ b/tests/DocumentationSearchTest.php @@ -0,0 +1,30 @@ +assertEquals(7, $search->Count(), '5 pages. 5 pages in entire folder'); + } +} \ No newline at end of file diff --git a/tests/DocumentationServiceTest.php b/tests/DocumentationServiceTest.php new file mode 100644 index 0000000..8780306 --- /dev/null +++ b/tests/DocumentationServiceTest.php @@ -0,0 +1,68 @@ +assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/test.md", $path); + + // the home page. The path finder should go to the index.md file in the default language + $path = DocumentationService::find_page('DocumentationViewerTests', array('')); + $this->assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/index.md", $path); + + // second level + $path = DocumentationService::find_page('DocumentationViewerTests', array('subfolder', 'subpage')); + $this->assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/subfolder/subpage.md", $path); + + $path = DocumentationService::find_page('DocumentationViewerTests', array('subfolder', 'subsubfolder')); + $this->assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/subfolder/subsubfolder/", $path); + + // third level + $path = DocumentationService::find_page('DocumentationViewerTests', array('subfolder', 'subsubfolder', 'subsubpage')); + $this->assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/subfolder/subsubfolder/subsubpage.md", $path); + + // with trailing slash + $path = DocumentationService::find_page('DocumentationViewerTests', array('subfolder', 'subsubfolder', 'subsubpage')); + $this->assertEquals(BASE_PATH . "/sapphiredocs/tests/docs/en/subfolder/subsubfolder/subsubpage.md", $path); + } + + function testGetPagesFromFolder() { + $pages = DocumentationService::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs/en/'); + + $this->assertContains('index', $pages->column('Filename'), 'The tests/docs/en folder should contain a index file'); + $this->assertContains('subfolder', $pages->column('Filename'), 'The tests/docs/en folder should contain a subfolder called subfolder'); + $this->assertContains('test', $pages->column('Filename'), 'The tests/docs/en folder should contain a test file'); + $this->assertNotContains('_images', $pages->column('Filename'), 'It should not include hidden files'); + $this->assertNotContains('.svn', $pages->column('Filename'), 'It should not include hidden files'); + + // test the order of pages + $pages = DocumentationService::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs/en/sort'); + + $this->assertEquals( + array('1 basic', '2 intermediate', '3 advanced', '10 some page', '21 another page'), + $pages->column('Title') + ); + } + + + function testGetPagesFromFolderRecursive() { + $pages = DocumentationService::get_pages_from_folder(BASE_PATH . '/sapphiredocs/tests/docs-recursive/en/'); + + // check to see all the pages are found, we don't care about order + $this->assertEquals($pages->Count(), 9); + + $pages = $pages->column('Title'); + + foreach(array('Index', 'Subfolder testfile', 'Subsubfolder testfile', 'Testfile') as $expected) { + $this->assertContains($expected, $pages); + } + } +} \ No newline at end of file diff --git a/tests/DocumentationViewerTests.php b/tests/DocumentationViewerTest.php similarity index 97% rename from tests/DocumentationViewerTests.php rename to tests/DocumentationViewerTest.php index f51112b..ab24ec1 100644 --- a/tests/DocumentationViewerTests.php +++ b/tests/DocumentationViewerTest.php @@ -37,10 +37,6 @@ class DocumentationViewerTests extends FunctionalTest { DocumentationService::unregister("DocumentationViewerTests"); DocumentationService::set_automatic_registration($this->origEnabled); DocumentationViewer::set_link_base($this->origLinkBase); - // $this->origModules = Documentation::get_registered_modules(); - // foreach($this->origModules as $name => $module) { - // DocumentationService::register($name); - // } } function testCurrentRedirection() { @@ -148,12 +144,14 @@ class DocumentationViewerTests extends FunctionalTest { $this->assertFalse($child1->Children); $child2 = $pagesArr[2]; - + $this->assertType('DataObjectSet', $child2->Children); + $this->assertEquals( array('subpage', 'subsubfolder'), $child2->Children->column('Filename') ); + $child2Links = $child2->Children->column('Link'); $this->assertStringEndsWith('2.4/en/DocumentationViewerTests/subfolder/subpage/', $child2Links[0]); $this->assertStringEndsWith('2.4/en/DocumentationViewerTests/subfolder/subsubfolder/', $child2Links[1]); diff --git a/tests/docs-search/en/index.md b/tests/docs-search/en/index.md new file mode 100644 index 0000000..e69de29 diff --git a/tests/docs-search/en/query-in-content.md b/tests/docs-search/en/query-in-content.md new file mode 100644 index 0000000..9f26b63 --- /dev/null +++ b/tests/docs-search/en/query-in-content.md @@ -0,0 +1 @@ +Foo \ No newline at end of file diff --git a/tests/docs-search/en/query-search.md b/tests/docs-search/en/query-search.md new file mode 100644 index 0000000..e69de29 diff --git a/tests/docs-search/en/subfolder/foo-folder/third-foo.md b/tests/docs-search/en/subfolder/foo-folder/third-foo.md new file mode 100644 index 0000000..e69de29 diff --git a/tests/docs-search/en/subfolder/foo.md b/tests/docs-search/en/subfolder/foo.md new file mode 100644 index 0000000..d23bbd3 --- /dev/null +++ b/tests/docs-search/en/subfolder/foo.md @@ -0,0 +1 @@ +Query is foo \ No newline at end of file