PDFExtractor.inc.php
2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
/**
* $Id:$
*
* KnowledgeTree Open Source Edition
* Document Management Made Simple
* Copyright (C) 2004 - 2008 The Jam Warehouse Software (Pty) Limited
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 3 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place,
* Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com.
*
* The interactive user interfaces in modified source and object code versions
* of this program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU General Public License version 3.
*
* In accordance with Section 7(b) of the GNU General Public License version 3,
* these Appropriate Legal Notices must retain the display of the "Powered by
* KnowledgeTree" logo and retain the original copyright notice. If the display of the
* logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
* must display the words "Powered by KnowledgeTree" and retain the original
* copyright notice.
* Contributor( s): ______________________________________
*
*/
class PDFExtractor extends ApplicationExtractor
{
public function __construct()
{
$config = KTConfig::getSingleton();
$params = $config->get('extractorParameters/pdftotext', '-nopgbrk -enc UTF-8 "{source}" "{target}"');
parent::__construct('externalBinary','pdftotext','pdftotext',_kt('PDF Text Extractor'),$params);
}
public function getSupportedMimeTypes()
{
return array('application/pdf');
}
protected function filter($text)
{
return $text;
}
protected function exec($cmd)
{
$res = parent::exec($cmd);
if (false === $res && (strpos($this->output, 'Copying of text from this document is not allowed') !== false))
{
$this->output = '';
file_put_contents($this->targetfile, _kt('Security properties on the PDF document prevent text from being extracted.'));
return true;
}
return $res;
}
}
?>