From 2956cc482fff4b99dbf6f9f1ad81e36a1dce4a57 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 19 Jan 2009 02:49:42 +0000 Subject: [PATCH] FEATURE Supporting search for special characters like umlauts in SearchForm. These characters are encoded to HTML entities by TinyMCE for SiteTree->Content, hence we need a special case in the search logic (incl. unit tests) ENHANCEMENT Added MySQL FULLTEXT index for $Content property on SiteTree, Needs separate indexing to be searchable by SearchForm - $Content is the only field which has special characters encoded as HTML entities (through TinyMCE) git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@70328 467b73ca-7a2a-4603-9d3b-597d59a354a9 --- core/model/SiteTree.php | 1 + search/SearchForm.php | 25 +++++++++++++++++-------- tests/search/SearchFormTest.php | 23 ++++++++++++++++++++++- tests/search/SearchFormTest.yml | 5 ++++- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/core/model/SiteTree.php b/core/model/SiteTree.php index 132ce462a..efdb81ea3 100644 --- a/core/model/SiteTree.php +++ b/core/model/SiteTree.php @@ -88,6 +88,7 @@ class SiteTree extends DataObject implements PermissionProvider,i18nEntityProvid static $indexes = array( "SearchFields" => Array('type'=>'fulltext', 'value'=>'Title, MenuTitle, Content, MetaTitle, MetaDescription, MetaKeywords'), "TitleSearchFields" => Array('type'=>'fulltext', 'value'=>'Title'), + "ContentSearchFields" => Array('type'=>'fulltext', 'value'=>'Content'), "URLSegment" => true, ); diff --git a/search/SearchForm.php b/search/SearchForm.php index 39655647e..c2c33f246 100755 --- a/search/SearchForm.php +++ b/search/SearchForm.php @@ -150,14 +150,15 @@ class SearchForm extends Form { public function searchEngine($keywords, $pageLength = null, $sortBy = "Relevance DESC", $extraFilter = "", $booleanSearch = false, $alternativeFileFilter = "", $invertedMatch = false) { if(!$pageLength) $pageLength = $this->pageLength; $fileFilter = ''; - $keywords = addslashes($keywords); - + + $keywords = Convert::raw2sql($keywords); + $htmlEntityKeywords = htmlentities($keywords); + if($booleanSearch) $boolean = "IN BOOLEAN MODE"; + if($extraFilter) { - $extraFilter = " AND $extraFilter"; - - if($alternativeFileFilter) $fileFilter = " AND $alternativeFileFilter"; - else $fileFilter = $extraFilter; + $extraFilter = " AND $extraFilter"; + $fileFilter = ($alternativeFileFilter) ? " AND $alternativeFileFilter" : $extraFilter; } if($this->showInSearchTurnOn) $extraFilter .= " AND showInSearch <> 0"; @@ -167,12 +168,20 @@ class SearchForm extends Form { $notMatch = $invertedMatch ? "NOT " : ""; if($keywords) { - $matchContent = "MATCH (Title, MenuTitle, Content, MetaTitle, MetaDescription, MetaKeywords) AGAINST ('$keywords' $boolean)"; + $matchContent = " + MATCH (Title, MenuTitle, MetaTitle, MetaDescription, MetaKeywords) AGAINST ('$keywords' $boolean) + + MATCH (Content) AGAINST ('$htmlEntityKeywords' $boolean) + "; $matchFile = "MATCH (Filename, Title, Content) AGAINST ('$keywords' $boolean) AND ClassName = 'File'"; // We make the relevance search by converting a boolean mode search into a normal one $relevanceKeywords = str_replace(array('*','+','-'),'',$keywords); - $relevanceContent = "MATCH (Title) AGAINST ('$relevanceKeywords') + MATCH (Title, MenuTitle, Content, MetaTitle, MetaDescription, MetaKeywords) AGAINST ('$relevanceKeywords')"; + $htmlEntityRelevanceKeywords = str_replace(array('*','+','-'),'',$htmlEntityKeywords); + $relevanceContent = " + MATCH (Title) AGAINST ('$relevanceKeywords') + + MATCH(Content) AGAINST ('$htmlEntityRelevanceKeywords') + + MATCH (Title, MenuTitle, Content, MetaTitle, MetaDescription, MetaKeywords) AGAINST ('$relevanceKeywords') + "; $relevanceFile = "MATCH (Filename, Title, Content) AGAINST ('$relevanceKeywords')"; } else { $relevanceContent = $relevanceFile = 1; diff --git a/tests/search/SearchFormTest.php b/tests/search/SearchFormTest.php index 65638e2ca..7ea611aa9 100644 --- a/tests/search/SearchFormTest.php +++ b/tests/search/SearchFormTest.php @@ -18,7 +18,7 @@ class SearchFormTest extends FunctionalTest { $holderPage = $this->objFromFixture('SiteTree', 'searchformholder'); $this->mockController = new ContentController($holderPage); } - + function testPublishedPagesMatchedByTitle() { $sf = new SearchForm($this->mockController, 'SearchForm'); @@ -134,5 +134,26 @@ class SearchFormTest extends FunctionalTest { 'Page with "Show in Search" disabled doesnt show' ); } + + function testSearchTitleAndContentWithSpecialCharacters() { + $sf = new SearchForm($this->mockController, 'SearchForm'); + + $pageWithSpecialChars = $this->objFromFixture('SiteTree', 'pageWithSpecialChars'); + $pageWithSpecialChars->publish('Stage', 'Live'); + + $results = $sf->getResults(null, array('Search'=>'Brötchen')); + $this->assertContains( + $pageWithSpecialChars->ID, + $results->column('ID'), + 'Published pages with umlauts in title are found' + ); + + $results = $sf->getResults(null, array('Search'=>'Bäcker')); + $this->assertContains( + $pageWithSpecialChars->ID, + $results->column('ID'), + 'Published pages with htmlencoded umlauts in content are found' + ); + } } ?> \ No newline at end of file diff --git a/tests/search/SearchFormTest.yml b/tests/search/SearchFormTest.yml index b1391e556..91c18cfb0 100644 --- a/tests/search/SearchFormTest.yml +++ b/tests/search/SearchFormTest.yml @@ -30,4 +30,7 @@ SiteTree: Title: inheritRestrictedView dontShowInSearchPage: Title: dontShowInSearchPage - ShowInSearch: 0 \ No newline at end of file + ShowInSearch: 0 + pageWithSpecialChars: + Title: Brötchen + Content: Frisch vom Bäcker \ No newline at end of file