OOPDFTextExtractor.inc.php
1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
<?php
require_once('PDFExtractor.inc.php');
require_once('OOTextExtractor.inc.php');
class OOPDFTextExtractor extends CompositeExtractor
{
public function __construct()
{
parent::__construct(new OOTextExtractor('application/pdf'),'pdf','application/pdf',new PDFExtractor(), true);
}
public function getSupportedMimeTypes()
{
// we provide this so diagnose doesn't fail
return array();
}
public function getDisplayName()
{
// we provide this so diagnose doesn't fail
throw new Exception(_kt('This should be overriden'));
}
}
/*
class OOPDFTextExtractor extends DocumentExtractor
{
private $pdf2txt;
private $text2pdf;
public function __construct()
{
$this->pdf2txt = new PDFExtractor();
$this->text2pdf = new OOTextExtractor();
}
public function needsIntermediateSourceFile()
{
// we need the intermediate file because it
// has the correct extension. jodconverter uses the extension to determine mimetype
return true;
}
public function getDisplayName()
{
throw new Exception('This should be overriden');
}
public function getSupportedMimeTypes()
{
return array();
}
public function extractTextContent()
{
$pdffile = $this->targetfile . '.pdf';
$this->text2pdf->setSourceFile($this->sourcefile);
$this->text2pdf->setTargetFile($pdffile);
$this->text2pdf->setMimeType($this->mimetype);
$this->text2pdf->setExtension($this->extension);
if ($this->extractTextContent())
{
return false;
}
$this->pdf2txt->setSourceFile($pdffile);
$this->pdf2txt->setTargetFile($this->targetfile);
$this->pdf2txt->setMimeType('application/pdf');
$this->pdf2txt->setExtension('pdf');
$result = $this->pdf2txt->extractTextContent();
unlink(@$pdffile);
return $result;
}
public function diagnose()
{
$diagnosis = $this->pdf2txt->diagnose();
if (!empty($diagnosis))
{
return $diagnosis;
}
$diagnosis = $this->text2pdf->diagnose();
if (!empty($diagnosis))
{
return $diagnosis;
}
return null;
}
} */
?>