BaseIndexer.php
3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<?php
class KTBaseIndexerTrigger {
/**
* Which MIME types that this indexer acts upon.
*/
var $mimetypes = array(
// 'text/plain' => true,
);
/**
* commandconfig is where to find the command to use in the
* KnowledgeTree configuration. For example, it may be
* "indexing/catdoc", which would correspond to the "indexing"
* section of config.ini, item "catdoc".
*/
var $commandconfig = ''; // Something like "indexing/catdoc"
/**
* In the absence of the command in the configuration, what command
* to use directly.
*/
var $command = ''; // Something like "catdoc"
/**
* Any options to send to the command before the input file.
*/
var $args = array();
/**
* Setting use_pipes to true will cause the output of the command to
* be sent to a temporary file created and chosen by the system.
*
* If it is false, the temporary file will be sent as the last
* parameter.
*/
var $use_pipes = true;
function setDocument($oDocument) {
$this->oDocument = $oDocument;
}
function transform() {
$iMimeTypeId = $this->oDocument->getMimeTypeId();
$sMimeType = KTMime::getMimeTypeName($iMimeTypeId);
if (!array_key_exists($sMimeType, $this->mimetypes)) {
return;
}
$oStorage = KTStorageManagerUtil::getSingleton();
$sFile = $oStorage->temporaryFile($this->oDocument);
$tempstub = 'transform';
if ($this->command != null) {
$tempstub = $this->command;
}
$myfilename = tempnam("/tmp", 'kt.' . $tempstub);
$contents = $this->extract_contents($sFile, $myfilename);
unlink($myfilename);
if (empty($contents)) {
return;
}
$aInsertValues = array(
'document_id' => $this->oDocument->getId(),
'document_text' => $contents,
);
$sTable = KTUtil::getTableName('document_text');
// clean up the document query "stuff".
// FIXME this suggests that we should move the _old_ document_searchable_text across to the old-document's id if its a checkin.
DBUtil::runQuery(array('DELETE FROM ' . $sTable . ' WHERE document_id = ?', array($this->oDocument->getId())));
DBUtil::autoInsert($sTable, $aInsertValues, array('noid' => true));
}
// handles certain, _very_ simple reader types.
function extract_contents($sFilename, $sTempFilename) {
$sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
$cmdline = array($sCommand);
$cmdline = array_merge($cmdline, $this->args);
$cmdline[] = $sFilename;
$aOptions = array();
if ($this->use_pipes) {
$aOptions["append"] = $sTempFilename;
} else {
$cmdline[] = $sTempFilename;
}
KTUtil::pexec($cmdline, $aOptions);
$contents = file_get_contents($sTempFilename);
return $contents;
}
}
?>