mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 11:06:00 +02:00
30 lines
617 B
PHP
30 lines
617 B
PHP
<?php
|
|
|
|
/**
|
|
* Text extractor that uses php function strip_tags to get just the text. OK for indexing, not the best for readable text.
|
|
* @author mstephens
|
|
*
|
|
*/
|
|
class HTMLTextExtractor extends FileTextExtractor {
|
|
|
|
function isAvailable() {
|
|
return true;
|
|
}
|
|
|
|
function supportedExtensions() {
|
|
return array("html", "htm", "xhtml");
|
|
}
|
|
|
|
/**
|
|
* Lower priority because its not the most clever HTML extraction. If there is something better, use it
|
|
* @var unknown_type
|
|
*/
|
|
public static $priority = 10;
|
|
|
|
function getContent($path) {
|
|
$content = file_get_contents($path);
|
|
return strip_tags($content);
|
|
}
|
|
}
|
|
|
|
?>
|