2015-02-18 15:31:38 +13:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tests the {@see TikaTextExtractor} class
|
|
|
|
*/
|
2015-11-18 17:07:31 +13:00
|
|
|
class TikaTextExtractorTest extends SapphireTest
|
|
|
|
{
|
|
|
|
public function testExtraction()
|
|
|
|
{
|
|
|
|
$extractor = new TikaTextExtractor();
|
|
|
|
if (!$extractor->isAvailable()) {
|
|
|
|
$this->markTestSkipped('tika cli not available');
|
|
|
|
}
|
2015-02-18 15:31:38 +13:00
|
|
|
|
2015-11-18 17:07:31 +13:00
|
|
|
// Check file
|
|
|
|
$file = Director::baseFolder() . '/textextraction/tests/fixtures/test1.pdf';
|
|
|
|
$content = $extractor->getContent($file);
|
|
|
|
$this->assertContains('This is a test file with a link', $content);
|
2015-02-18 15:31:38 +13:00
|
|
|
|
2015-11-18 17:07:31 +13:00
|
|
|
// Check mime validation
|
|
|
|
$this->assertTrue($extractor->supportsMime('application/pdf'));
|
|
|
|
$this->assertTrue($extractor->supportsMime('text/html'));
|
|
|
|
$this->assertFalse($extractor->supportsMime('application/not-supported'));
|
|
|
|
}
|
2015-02-18 15:31:38 +13:00
|
|
|
|
2015-11-18 17:07:31 +13:00
|
|
|
public function testServerExtraction()
|
|
|
|
{
|
|
|
|
$extractor = new TikaServerTextExtractor();
|
|
|
|
if (!$extractor->isAvailable()) {
|
|
|
|
$this->markTestSkipped('tika server not available');
|
|
|
|
}
|
2015-02-25 14:44:03 +13:00
|
|
|
|
2015-11-18 17:07:31 +13:00
|
|
|
// Check file
|
|
|
|
$file = Director::baseFolder() . '/textextraction/tests/fixtures/test1.pdf';
|
|
|
|
$content = $extractor->getContent($file);
|
|
|
|
$this->assertContains('This is a test file with a link', $content);
|
2015-02-25 14:44:03 +13:00
|
|
|
|
2015-11-18 17:07:31 +13:00
|
|
|
// Check mime validation
|
|
|
|
$this->assertTrue($extractor->supportsMime('application/pdf'));
|
|
|
|
$this->assertTrue($extractor->supportsMime('text/html'));
|
|
|
|
$this->assertFalse($extractor->supportsMime('application/not-supported'));
|
|
|
|
}
|
2018-07-11 16:28:09 +12:00
|
|
|
|
2018-07-11 16:24:57 +12:00
|
|
|
public function testNormaliseVersion()
|
|
|
|
{
|
|
|
|
$extractor = new TikaServerTextExtractor();
|
|
|
|
$reflection = new ReflectionClass($extractor);
|
|
|
|
$method = $reflection->getMethod('normaliseVersion');
|
|
|
|
$method->setAccessible(true);
|
|
|
|
|
2018-07-11 16:38:12 +12:00
|
|
|
foreach ($this->versionProvider() as $data) {
|
|
|
|
list($input, $expected) = $data;
|
2018-07-11 16:24:57 +12:00
|
|
|
$actual = $method->invoke($extractor, $input);
|
|
|
|
$this->assertEquals($expected, $actual);
|
|
|
|
}
|
|
|
|
}
|
2018-07-11 16:38:12 +12:00
|
|
|
|
|
|
|
protected function versionProvider()
|
|
|
|
{
|
2018-07-11 16:47:52 +12:00
|
|
|
return array(
|
|
|
|
array('1.7.1', '1.7.1'),
|
|
|
|
array('1.7', '1.7.0'),
|
|
|
|
array('1', '1.0.0'),
|
|
|
|
array(null, '0.0.0'),
|
|
|
|
array('v1.5', 'v1.5.0'),
|
|
|
|
array('carrot', 'carrot.0.0')
|
|
|
|
);
|
2018-07-11 16:38:12 +12:00
|
|
|
}
|
2015-02-25 14:44:03 +13:00
|
|
|
}
|