silverstripe-textextraction/code/extractors/HTMLTextExtractor.php
Ingo Schommer 977c4e49c9 API Using paths instead of File objects in extractors
Makes coupling to File objects optional, by choosing
to use the FileTextExtractable extension.
2012-08-22 18:25:12 +02:00

25 lines
569 B
PHP

<?php
/**
* Text extractor that uses php function strip_tags to get just the text. OK for indexing, not the best for readable text.
* @author mstephens
*
*/
class HTMLTextExtractor extends FileTextExtractor {
function supportedExtensions() {
return array("html", "htm", "xhtml");
}
/**
* Lower priority because its not the most clever HTML extraction. If there is something better, use it
* @var unknown_type
*/
public static $priority = 10;
function getContent($path) {
$content = file_get_contents($path);
return strip_tags($content);
}
}
?>