BaseIndexer.php
5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
<?php
/**
* $Id$
*
* The contents of this file are subject to the KnowledgeTree Public
* License Version 1.1.2 ("License"); You may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.knowledgetree.com/KPL
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights and
* limitations under the License.
*
* All copies of the Covered Code must include on each user interface screen:
* (i) the "Powered by KnowledgeTree" logo and
* (ii) the KnowledgeTree copyright notice
* in the same form as they appear in the distribution. See the License for
* requirements.
*
* The Original Code is: KnowledgeTree Open Source
*
* The Initial Developer of the Original Code is The Jam Warehouse Software
* (Pty) Ltd, trading as KnowledgeTree.
* Portions created by The Jam Warehouse Software (Pty) Ltd are Copyright
* (C) 2007 The Jam Warehouse Software (Pty) Ltd;
* All Rights Reserved.
* Contributor( s): ______________________________________
*
*/
class KTBaseIndexerTrigger {
/**
* Which MIME types that this indexer acts upon.
*/
var $mimetypes = array(
// 'text/plain' => true,
);
/**
* commandconfig is where to find the command to use in the
* KnowledgeTree configuration. For example, it may be
* "indexing/catdoc", which would correspond to the "indexing"
* section of config.ini, item "catdoc".
*/
var $commandconfig = ''; // Something like "indexing/catdoc"
/**
* In the absence of the command in the configuration, what command
* to use directly.
*/
var $command = ''; // Something like "catdoc"
/**
* Output of the command
*/
var $aCommandOutput = array();
/**
* Any options to send to the command before the input file.
*/
var $args = array();
var $support_url = 'http://wiki.knowledgetree.com/Document_Indexers';
/**
* Setting use_pipes to true will cause the output of the command to
* be sent to a temporary file created and chosen by the system.
*
* If it is false, the temporary file will be sent as the last
* parameter.
*/
var $use_pipes = true;
/* return a diagnostic string _if_ there is something wrong. NULL otherwise. */
function getDiagnostic() {
return null;
}
function setDocument($oDocument) {
$this->oDocument = $oDocument;
}
function transform() {
$iMimeTypeId = $this->oDocument->getMimeTypeId();
$sMimeType = KTMime::getMimeTypeName($iMimeTypeId);
if (!array_key_exists($sMimeType, $this->mimetypes)) {
return;
}
$oStorage = KTStorageManagerUtil::getSingleton();
$sFile = $oStorage->temporaryFile($this->oDocument);
$tempstub = 'transform';
if ($this->command != null) {
$tempstub = $this->command;
}
$oKTConfig =& KTConfig::getSingleton();
$sBasedir = $oKTConfig->get("urls/tmpDirectory");
$myfilename = tempnam($sBasedir, 'kt.' . $tempstub);
if (OS_WINDOWS) {
$intermediate = tempnam($sBasedir, 'kt.' . $tempstub);
if (!@copy($sFile, $intermediate)) {
return ;
}
} else {
$intermediate = $sFile;
}
$contents = $this->extract_contents($intermediate, $myfilename);
@unlink($myfilename);
if (OS_WINDOWS) { @unlink($intermediate); }
if (empty($contents)) {
return;
}
$aInsertValues = array(
'document_id' => $this->oDocument->getId(),
'document_text' => $contents,
);
$sTable = KTUtil::getTableName('document_text');
// clean up the document query "stuff".
// FIXME this suggests that we should move the _old_ document_searchable_text across to the old-document's id if its a checkin.
DBUtil::runQuery(array('DELETE FROM ' . $sTable . ' WHERE document_id = ?', array($this->oDocument->getId())));
DBUtil::autoInsert($sTable, $aInsertValues, array('noid' => true));
}
// handles certain, _very_ simple reader types.
function extract_contents($sFilename, $sTempFilename) {
$sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
if (empty($sCommand)) {
return false;
}
$cmdline = array($sCommand);
$cmdline = kt_array_merge($cmdline, $this->args);
$cmdline[] = $sFilename;
$aOptions = array();
$aOptions['exec_wait'] = 'true';
if ($this->use_pipes) {
$aOptions["append"] = $sTempFilename;
} else {
$cmdline[] = $sTempFilename;
}
$aRet = KTUtil::pexec($cmdline, $aOptions);
$this->aCommandOutput = $aRet['out'];
$contents = file_get_contents($sTempFilename);
return $contents;
}
}
?>