BaseIndexer.php
2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
<?php
class KTBaseIndexerTrigger {
var $mimetypes = array(
'text/plain' => true,
);
var $command = 'catdoc'; // could be any application.
var $args = array("-w");
var $use_pipes = true;
function setDocument($oDocument) {
$this->oDocument = $oDocument;
}
function transform() {
$iMimeTypeId = $this->oDocument->getMimeTypeId();
$sMimeType = KTMime::getMimeTypeName($iMimeTypeId);
if (!array_key_exists($sMimeType, $this->mimetypes)) {
return;
}
$oStorage = KTStorageManagerUtil::getSingleton();
$sFile = $oStorage->temporaryFile($this->oDocument);
$tempstub = 'transform';
if ($this->command != null) {
$tempstub = $this->command;
}
$myfilename = tempnam("/tmp", 'kt.' . $tempstub);
$contents = $this->extract_contents($sFile, $myfilename);
unlink($myfilename);
if (empty($contents)) {
return;
}
$aInsertValues = array(
'document_id' => $this->oDocument->getId(),
'document_text' => $contents,
);
$sTable = KTUtil::getTableName('document_text');
// clean up the document query "stuff".
// FIXME this suggests that we should move the _old_ document_searchable_text across to the old-document's id if its a checkin.
DBUtil::runQuery(array('DELETE FROM ' . $sTable . ' WHERE document_id = ?', array($this->oDocument->getId())));
DBUtil::autoInsert($sTable, $aInsertValues, array('noid' => true));
}
// handles certain, _very_ simple reader types.
function extract_contents($sFilename, $sTempFilename) {
$cmdline = array($this->command);
$cmdline = array_merge($cmdline, $this->args);
$cmdline[] = $sFilename;
if ($this->use_pipes) {
$command = KTUtil::safeShellString($cmdline) . " >> " . $sTempFilename;
} else {
$cmdline[] = $sTempFilename;
$command = KTUtil::safeShellString($cmdline);
}
system($command);
$contents = file_get_contents($sTempFilename);
return $contents;
}
}
?>