Commit b2d8d8b7e6cebdaba386313dcd69cfaf9ae119e7

Authored by conradverm
1 parent 56866914

KTS-2601

"Create indexer administration page"
Updated. Added better text descriptions for admin section and implemented status_msg update mechanism.

Committed By: Conrad Vermeulen
Reviewed By: Jonathan Byrne

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7943 c91229c3-7414-0410-bfa2-8a42b809f60b
plugins/ktcore/KTCorePlugin.php
@@ -294,23 +294,23 @@ class KTCorePlugin extends KTPlugin { @@ -294,23 +294,23 @@ class KTCorePlugin extends KTPlugin {
294 294
295 //Search and Indexing 295 //Search and Indexing
296 $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search', 296 $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search',
297 - _kt('Mime Types'), _kt('Mime type information.'), 297 + _kt('Mime Types'), _kt('This report lists all mime types and extensions that can be identified by KnowledgeTree.'),
298 '../search2/reporting/ManageMimeTypes.php', null); 298 '../search2/reporting/ManageMimeTypes.php', null);
299 299
300 $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search', 300 $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search',
301 - _kt('Extractor Information'), _kt('Extractor information.'), 301 + _kt('Extractor Information'), _kt('This report lists the text extractors and their supported mime types.'),
302 '../search2/reporting/ExtractorInfo.php', null); 302 '../search2/reporting/ExtractorInfo.php', null);
303 303
304 $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search', 304 $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search',
305 - _kt('Document Indexing Diagnostics'), _kt('Document Indexing Diagnostics'), 305 + _kt('Document Indexing Diagnostics'), _kt('This report will help to diagnose problems with document indexing.'),
306 '../search2/reporting/IndexErrors.php', null); 306 '../search2/reporting/IndexErrors.php', null);
307 307
308 $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search', 308 $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search',
309 - _kt('Pending Documents Indexing Queue'), _kt('Pending Documents Indexing Information'), 309 + _kt('Pending Documents Indexing Queue'), _kt('This report lists documents that are waiting to be indexed.'),
310 '../search2/reporting/PendingDocuments.php', null); 310 '../search2/reporting/PendingDocuments.php', null);
311 311
312 $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search', 312 $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search',
313 - _kt('Reschedule all documents'), _kt('Reschedule all documents'), 313 + _kt('Reschedule all documents'), _kt('This function allows you to re-index your entire repository.'),
314 '../search2/reporting/RescheduleDocuments.php', null); 314 '../search2/reporting/RescheduleDocuments.php', null);
315 315
316 // misc 316 // misc
plugins/search2/reporting/templates/extractorinfo.smarty
1 -<h2>{i18n}Extractor Information{/i18n}</h2>  
2 -<p class="descriptiveText">{i18n}Important information about the current document extractors{/i18n}</p>  
3 -  
4 -{if $extractor_info}  
5 -  
6 -{foreach key=key from=$extractor_info item=extractor}  
7 -<br>  
8 -<dl class='panel_menu'><h3>{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}</h3><dl/>  
9 -  
10 -<table class=kt_collection>  
11 -  
12 -<thead>  
13 -<tr>  
14 -<th width="300">{i18n}Description{/i18n}</th>  
15 -<th width="400">{i18n}Mime Types{/i18n}</th>  
16 -<th>{i18n}Extensions{/i18n}</th>  
17 -</tr>  
18 -</thead>  
19 -  
20 -<tbody>  
21 -  
22 -{foreach from=$extractor.mimeTypes key=key item=mimeType}  
23 -<tr>  
24 - <td>{$mimeType.description}</td>  
25 - <td>{$key}</td>  
26 - <td>{$extractor.mimeTypes.$key.extensions}</td>  
27 -</tr>  
28 -{/foreach}  
29 -  
30 -</tbody>  
31 -  
32 -</table>  
33 -  
34 -{/foreach}  
35 -  
36 -{else}  
37 -<br>  
38 -<h3>{i18n}There are no extractors registered.{/i18n}</h3>  
39 -{/if} 1 +<h2>{i18n}Extractor Information{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This report lists the text extractors and their supported mime types.{/i18n}</p>
  3 +
  4 +{if $extractor_info}
  5 +
  6 +{foreach key=key from=$extractor_info item=extractor}
  7 +<br>
  8 +<dl class='panel_menu'><h3>{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}</h3><dl/>
  9 +
  10 +<table class=kt_collection>
  11 +
  12 +<thead>
  13 +<tr>
  14 +<th width="300">{i18n}Description{/i18n}</th>
  15 +<th width="400">{i18n}Mime Types{/i18n}</th>
  16 +<th>{i18n}Extensions{/i18n}</th>
  17 +</tr>
  18 +</thead>
  19 +
  20 +<tbody>
  21 +
  22 +{foreach from=$extractor.mimeTypes key=key item=mimeType}
  23 +<tr>
  24 + <td>{$mimeType.description}</td>
  25 + <td>{$key}</td>
  26 + <td>{$extractor.mimeTypes.$key.extensions}</td>
  27 +</tr>
  28 +{/foreach}
  29 +
  30 +</tbody>
  31 +
  32 +</table>
  33 +
  34 +{/foreach}
  35 +
  36 +{else}
  37 +<br>
  38 +<h3>{i18n}There are no extractors registered.{/i18n}</h3>
  39 +{/if}
plugins/search2/reporting/templates/indexerrors.smarty
1 -{literal}  
2 -<script type="text/javascript">  
3 - function doReschedule(rescheduleAction)  
4 - {  
5 - var hiddenVal = document.getElementById("rescheduleValue");  
6 - hiddenVal.value = rescheduleAction;  
7 - document.scheduleForm.submit();  
8 - }  
9 -  
10 -</script>  
11 -{/literal}  
12 -  
13 -<h2>{i18n}Document Indexing Diagnostics{/i18n}</h2>  
14 -<p class="descriptiveText">{i18n}This report will help to diagnose problems with document indexing.{/i18n}</p>  
15 -  
16 -<br>  
17 -{if $index_errors}  
18 -  
19 -<form name="scheduleForm" action="{$smarty.server.PHP_SELF}" method="POST">  
20 -<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="" />  
21 -  
22 -<table class=kt_collection>  
23 -<thead>  
24 - <tr>  
25 - <th width="10"></th>  
26 - <th width="80"><nobr>{i18n}Document ID{/i18n}</th>  
27 - <th ><nobr>{i18n}Filename{/i18n}</th>  
28 - <th width="100"><nobr>{i18n}Extension{/i18n}</th>  
29 - <th width="100"><nobr>{i18n}Mime Type{/i18n}</th>  
30 - <th width="100"><nobr>{i18n}Extractor{/i18n}</th>  
31 - <th width="100"><nobr>{i18n}Index Date{/i18n}</th>  
32 - </tr>  
33 -</thead>  
34 -  
35 -<tbody>  
36 -{foreach key=key from=$index_errors item=indexError}  
37 -<tr>  
38 - <td class="centered"><input type="checkbox" name="index_error[{$indexError.document_id}]" value="1"/></td>  
39 - <td>{$indexError.document_id}</td>  
40 - <td>{$indexError.filename|truncate:40:'...'}</td>  
41 - <td>{$indexError.filetypes}</td>  
42 - <td>{$indexError.mimetypes}</td>  
43 - <td>{if $pendingDocs.extractor}{$indexError.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>  
44 - <td>{$indexError.indexdate}</td>  
45 -  
46 -</tr>  
47 -<tr>  
48 - <td>&nbsp;</td>  
49 - <td colspan=4><pre>{$indexError.status_msg}</pre></td>  
50 -</tr>  
51 -  
52 -  
53 -{/foreach}  
54 -  
55 -</tbody>  
56 -  
57 -</table>  
58 -  
59 -<br>  
60 -<input type="button" name="Reschedule" value="{i18n}Reschedule{/i18n}" onclick="doReschedule('reschedule');"/>  
61 -<input type="button" name="Reschedule All" value="{i18n}Reschedule All{/i18n}" onclick="doReschedule('rescheduleall');"/>  
62 -{else}  
63 -<h3>{i18n}There are no indexing issues.{/i18n}</h3>  
64 -{/if} 1 +{literal}
  2 +<script type="text/javascript">
  3 + function doReschedule(rescheduleAction)
  4 + {
  5 + var hiddenVal = document.getElementById("rescheduleValue");
  6 + hiddenVal.value = rescheduleAction;
  7 + document.scheduleForm.submit();
  8 + }
  9 +
  10 +</script>
  11 +{/literal}
  12 +
  13 +<h2>{i18n}Document Indexing Diagnostics{/i18n}</h2>
  14 +<p class="descriptiveText">{i18n}This report will help to diagnose problems with document indexing.{/i18n}</p>
  15 +
  16 +<br>
  17 +{if $index_errors}
  18 +
  19 +<form name="scheduleForm" action="{$smarty.server.PHP_SELF}" method="POST">
  20 +<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="" />
  21 +
  22 +<table class=kt_collection>
  23 +<thead>
  24 + <tr>
  25 + <th width="10"></th>
  26 + <th width="80"><nobr>{i18n}Document ID{/i18n}</th>
  27 + <th ><nobr>{i18n}Filename{/i18n}</th>
  28 + <th width="100"><nobr>{i18n}Extension{/i18n}</th>
  29 + <th width="100"><nobr>{i18n}Mime Type{/i18n}</th>
  30 + <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
  31 + <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
  32 + </tr>
  33 +</thead>
  34 +
  35 +<tbody>
  36 +{foreach key=key from=$index_errors item=indexError}
  37 +<tr>
  38 + <td class="centered"><input type="checkbox" name="index_error[{$indexError.document_id}]" value="1"/></td>
  39 + <td>{$indexError.document_id}</td>
  40 + <td>{$indexError.filename|truncate:40:'...'}</td>
  41 + <td>{$indexError.filetypes}</td>
  42 + <td>{$indexError.mimetypes}</td>
  43 + <td>{if $pendingDocs.extractor}{$indexError.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
  44 + <td>{$indexError.indexdate}</td>
  45 +
  46 +</tr>
  47 +<tr>
  48 + <td>&nbsp;</td>
  49 + <td colspan=4><pre>{$indexError.status_msg}</pre></td>
  50 +</tr>
  51 +
  52 +
  53 +{/foreach}
  54 +
  55 +</tbody>
  56 +
  57 +</table>
  58 +
  59 +<br>
  60 +<input type="button" name="Reschedule" value="{i18n}Reschedule{/i18n}" onclick="doReschedule('reschedule');"/>
  61 +<input type="button" name="Reschedule All" value="{i18n}Reschedule All{/i18n}" onclick="doReschedule('rescheduleall');"/>
  62 +{else}
  63 +<h3>{i18n}There are no indexing issues.{/i18n}</h3>
  64 +{/if}
65 </form> 65 </form>
66 \ No newline at end of file 66 \ No newline at end of file
plugins/search2/reporting/templates/managemimetypes.smarty
1 -<h2>{i18n}Manage Mime Types{/i18n}</h2>  
2 -<p class="descriptiveText">{i18n}Mime type information{/i18n}</p>  
3 -  
4 -  
5 -{if $mime_types}  
6 -  
7 -<table class=kt_collection>  
8 -  
9 -<thead>  
10 -<tr>  
11 -<th>Icon </th>  
12 -<th> Extension </th>  
13 -<th> Mime Type </th>  
14 -<th> Description </th>  
15 -<th> Extractor </th>  
16 -</tr>  
17 -</thead>  
18 -  
19 -<tbody>  
20 -{foreach from=$mime_types item=mimetype}  
21 -<tr>  
22 -<td><span class="contenttype {$mimetype.icon_path}"></span></td>  
23 -<td>{$mimetype.filetypes}</td>  
24 -<td>{$mimetype.mimetypes}</td>  
25 -<td>{if $mimetype.friendly_name}  
26 -{$mimetype.friendly_name}  
27 -{else}  
28 -<p><font color="#FF9933">no description</font></p>  
29 -{/if}</td>  
30 -<td>{if $mimetype.extractor}  
31 -{$mimetype.extractor}  
32 -{else}  
33 -<p><font color="#FF9933">n/a</font></p>  
34 -{/if}</td>  
35 -</tr>  
36 -{/foreach}  
37 -</tbody>  
38 -  
39 -</table>  
40 -  
41 -<table>  
42 -<tr><td>Number of Extensions<td>{$numExtensions}  
43 -<tr><td>Number of indexed extensions<td>{$numIndexedExtensions} ( {$indexedPercentage}% )  
44 -</table>  
45 -  
46 -  
47 -{/if}  
48 - 1 +<h2>{i18n}Manage Mime Types{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This report lists all mime types and extensions that can be identified by KnowledgeTree.{/i18n}</p>
  3 +
  4 +
  5 +{if $mime_types}
  6 +
  7 +<table class=kt_collection>
  8 +
  9 +<thead>
  10 +<tr>
  11 +<th>Icon </th>
  12 +<th> Extension </th>
  13 +<th> Mime Type </th>
  14 +<th> Description </th>
  15 +<th> Extractor </th>
  16 +</tr>
  17 +</thead>
  18 +
  19 +<tbody>
  20 +{foreach from=$mime_types item=mimetype}
  21 +<tr>
  22 +<td><span class="contenttype {$mimetype.icon_path}"></span></td>
  23 +<td>{$mimetype.filetypes}</td>
  24 +<td>{$mimetype.mimetypes}</td>
  25 +<td>{if $mimetype.friendly_name}
  26 +{$mimetype.friendly_name}
  27 +{else}
  28 +<p><font color="#FF9933">no description</font></p>
  29 +{/if}</td>
  30 +<td>{if $mimetype.extractor}
  31 +{$mimetype.extractor}
  32 +{else}
  33 +<p><font color="#FF9933">n/a</font></p>
  34 +{/if}</td>
  35 +</tr>
  36 +{/foreach}
  37 +</tbody>
  38 +
  39 +</table>
  40 +
  41 +<table>
  42 +<tr><td>Number of Extensions<td>{$numExtensions}
  43 +<tr><td>Number of indexed extensions<td>{$numIndexedExtensions} ( {$indexedPercentage}% )
  44 +</table>
  45 +
  46 +
  47 +{/if}
  48 +
plugins/search2/reporting/templates/pendingdocuments.smarty
1 -<h2>{i18n}Pending Documents Indexing Queue{/i18n}</h2>  
2 -{i18n}This report lists documents that are waiting to be indexed.  
3 -<br><br>  
4 -If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n}  
5 -<br><br>  
6 -  
7 -{if empty($pending_docs)}  
8 -  
9 -<b>{i18n}There are no documents in the indexing queue.{/i18n}</b>  
10 -  
11 -{else}  
12 -  
13 -  
14 -<table class=kt_collection>  
15 -  
16 -<thead>  
17 -<tr>  
18 - <th width="10"><nobr>{i18n}Document ID{/i18n}</th>  
19 - <th><nobr>{i18n}Filename{/i18n}</th>  
20 - <th width="100"><nobr>{i18n}Extension{/i18n}</th>  
21 - <th width="150"><nobr>{i18n}Mime Type{/i18n}</th>  
22 - <th width="100"><nobr>{i18n}Extractor{/i18n}</th>  
23 - <th width="100"><nobr>{i18n}Index Date{/i18n}</th>  
24 -</tr>  
25 -</thead>  
26 -  
27 -<tbody>  
28 -  
29 -{foreach key=key from=$pending_docs item=pendingDocs}  
30 -<tr>  
31 - <td><a href="/view.php?fDocumentId={$pendingDocs.document_id}">{$pendingDocs.document_id}</a></td>  
32 - <td>{$pendingDocs.filename|truncate:40:'...'}</td>  
33 - <td>{$pendingDocs.filetypes}</td>  
34 - <td>{$pendingDocs.mimetypes}</td>  
35 - <td>{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>  
36 - <td>{$pendingDocs.indexdate}</td>  
37 -</tr>  
38 -{/foreach}  
39 -  
40 -</tbody>  
41 -  
42 -</table>  
43 - 1 +<h2>{i18n}Pending Documents Indexing Queue{/i18n}</h2>
  2 +{i18n}This report lists documents that are waiting to be indexed.{/i18n}
  3 +<br><br>
  4 +{i18n}If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n}
  5 +<br><br>
  6 +
  7 +{if empty($pending_docs)}
  8 +
  9 +<b>{i18n}There are no documents in the indexing queue.{/i18n}</b>
  10 +
  11 +{else}
  12 +
  13 +
  14 +<table class=kt_collection>
  15 +
  16 +<thead>
  17 +<tr>
  18 + <th width="10"><nobr>{i18n}Document ID{/i18n}</th>
  19 + <th><nobr>{i18n}Filename{/i18n}</th>
  20 + <th width="100"><nobr>{i18n}Extension{/i18n}</th>
  21 + <th width="150"><nobr>{i18n}Mime Type{/i18n}</th>
  22 + <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
  23 + <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
  24 +</tr>
  25 +</thead>
  26 +
  27 +<tbody>
  28 +
  29 +{foreach key=key from=$pending_docs item=pendingDocs}
  30 +<tr>
  31 + <td><a href="/view.php?fDocumentId={$pendingDocs.document_id}">{$pendingDocs.document_id}</a></td>
  32 + <td>{$pendingDocs.filename|truncate:40:'...'}</td>
  33 + <td>{$pendingDocs.filetypes}</td>
  34 + <td>{$pendingDocs.mimetypes}</td>
  35 + <td>{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
  36 + <td>{$pendingDocs.indexdate}</td>
  37 +</tr>
  38 +{/foreach}
  39 +
  40 +</tbody>
  41 +
  42 +</table>
  43 +
44 {/if} 44 {/if}
45 \ No newline at end of file 45 \ No newline at end of file
plugins/search2/reporting/templates/rescheduledocuments.smarty
1 -<h2>{i18n}Reschedule All Documents{/i18n}</h2>  
2 -<p class="descriptiveText">{i18n}Reschedule all documents for indexing{/i18n}</p>  
3 -<p class="descriptiveText">{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository{/i18n}</h3>  
4 -  
5 -<form name="rescheduleAllDocs" action="{$smarty.server.PHP_SELF}" method="POST">  
6 -<br>  
7 -{if $rescheduleDone == true}  
8 -<p>Documents Rescheduled</p>  
9 -<p>You can view the schedule queue <a href=admin.php?kt_path_info=search/pendingdocuments>here</a></p>  
10 -<br>  
11 -{/if}  
12 -<input type="submit" name="Reschedule" value="{i18n}Reschedule All{/i18n}" />  
13 -<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="reschedule" />  
14 - 1 +<h2>{i18n}Reschedule All Documents{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This function allows you to re-index your entire repository.{/i18n}</p>
  3 +<p class="descriptiveText">{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository.{/i18n}</h3>
  4 +
  5 +<form name="rescheduleAllDocs" action="{$smarty.server.PHP_SELF}" method="POST">
  6 +<br>
  7 +{if $rescheduleDone == true}
  8 +<p>Documents Rescheduled</p>
  9 +<p>You can view the schedule queue <a href=admin.php?kt_path_info=search/pendingdocuments>here</a></p>
  10 +<br>
  11 +{/if}
  12 +<input type="submit" name="Reschedule" value="{i18n}Reschedule All{/i18n}" />
  13 +<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="reschedule" />
  14 +
15 </form> 15 </form>
16 \ No newline at end of file 16 \ No newline at end of file
search2/indexing/indexerCore.inc.php
@@ -755,34 +755,68 @@ abstract class Indexer @@ -755,34 +755,68 @@ abstract class Indexer
755 KTUtil::setSystemSetting('mimeTypesRegistered', true); 755 KTUtil::setSystemSetting('mimeTypesRegistered', true);
756 } 756 }
757 757
  758 + private function updatePendingDocumentStatus($documentId, $message, $level)
  759 + {
  760 + $this->indexingHistory .= "\n" . $level . ': ' . $message;
  761 + $message = sanitizeForSQL($this->indexingHistory);
  762 + $sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId";
  763 + DBUtil::runQuery($sql);
  764 + }
  765 +
  766 + /**
  767 + *
  768 + * @param int $documentId
  769 + * @param string $message
  770 + * @param string $level This may be info, error, debug
  771 + */
  772 + private function logPendingDocumentInfoStatus($documentId, $message, $level)
  773 + {
  774 + $this->updatePendingDocumentStatus($documentId, $message, $level);
  775 + global $default;
  776 +
  777 + switch ($level)
  778 + {
  779 + case 'debug':
  780 + if ($this->debug)
  781 + {
  782 + $default->log->debug($message);
  783 + }
  784 + break;
  785 + default:
  786 + $default->log->$level($message);
  787 + }
  788 + }
  789 +
  790 +
  791 +
758 public function getExtractor($extractorClass) 792 public function getExtractor($extractorClass)
759 { 793 {
760 $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php'; 794 $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php';
761 if (!file_exists($includeFile)) 795 if (!file_exists($includeFile))
762 { 796 {
763 throw new Exception("Extractor file does not exist: $includeFile"); 797 throw new Exception("Extractor file does not exist: $includeFile");
764 - }  
765 - 798 + }
  799 +
766 require_once($includeFile); 800 require_once($includeFile);
767 801
768 if (!class_exists($extractorClass)) 802 if (!class_exists($extractorClass))
769 { 803 {
770 - throw new Exception("Extractor '$classname' not defined in file: $includeFile"); 804 + throw new Exception("Extractor '$classname' not defined in file: $includeFile");
771 } 805 }
772 - 806 +
773 $extractor = new $extractorClass(); 807 $extractor = new $extractorClass();
774 - 808 +
775 if (!($extractor instanceof DocumentExtractor)) 809 if (!($extractor instanceof DocumentExtractor))
776 { 810 {
777 throw new Exception("Class $classname was expected to be of type DocumentExtractor"); 811 throw new Exception("Class $classname was expected to be of type DocumentExtractor");
778 } 812 }
779 - 813 +
780 return $extractor; 814 return $extractor;
781 } 815 }
782 816
783 public static function getIndexingQueue($problemItemsOnly=true) 817 public static function getIndexingQueue($problemItemsOnly=true)
784 { 818 {
785 - 819 +
786 if ($problemItemsOnly) 820 if ($problemItemsOnly)
787 { 821 {
788 $sql = "SELECT 822 $sql = "SELECT
@@ -811,16 +845,16 @@ abstract class Indexer @@ -811,16 +845,16 @@ abstract class Indexer
811 LEFT JOIN mime_extractors me ON mt.extractor_id=me.id 845 LEFT JOIN mime_extractors me ON mt.extractor_id=me.id
812 WHERE 846 WHERE
813 (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1 847 (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1
814 - ORDER BY indexdate "; 848 + ORDER BY indexdate ";
815 } 849 }
816 $aResult = DBUtil::getResultArray($sql); 850 $aResult = DBUtil::getResultArray($sql);
817 - 851 +
818 return $aResult; 852 return $aResult;
819 } 853 }
820 - 854 +
821 public static function getPendingIndexingQueue() 855 public static function getPendingIndexingQueue()
822 { 856 {
823 - return Indexer::getIndexingQueue(false); 857 + return Indexer::getIndexingQueue(false);
824 } 858 }
825 859
826 /** 860 /**
@@ -922,11 +956,10 @@ abstract class Indexer @@ -922,11 +956,10 @@ abstract class Indexer
922 $extractorClass=$docinfo['extractor']; 956 $extractorClass=$docinfo['extractor'];
923 $indexDocument = in_array($docinfo['what'], array('A','C')); 957 $indexDocument = in_array($docinfo['what'], array('A','C'));
924 $indexDiscussion = in_array($docinfo['what'], array('A','D')); 958 $indexDiscussion = in_array($docinfo['what'], array('A','D'));
  959 + $this->indexingHistory = '';
  960 +
  961 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass), 'debug');
925 962
926 - if ($this->debug)  
927 - {  
928 - if ($this->debug) $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass));  
929 - }  
930 963
931 if (empty($extractorClass)) 964 if (empty($extractorClass))
932 { 965 {
@@ -936,13 +969,13 @@ abstract class Indexer @@ -936,13 +969,13 @@ abstract class Indexer
936 969
937 if (!$this->isExtractorEnabled($extractorClass)) 970 if (!$this->isExtractorEnabled($extractorClass))
938 { 971 {
939 - $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass)); 972 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info');
940 continue; 973 continue;
941 } 974 }
942 975
943 if ($this->debug) 976 if ($this->debug)
944 { 977 {
945 - $default->log->info(sprintf(_kt("Processing docid: %d.\n"),$docId)); 978 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"),$docId), 'info');
946 } 979 }
947 980
948 $removeFromQueue = true; 981 $removeFromQueue = true;
@@ -954,26 +987,12 @@ abstract class Indexer @@ -954,26 +987,12 @@ abstract class Indexer
954 } 987 }
955 else 988 else
956 { 989 {
957 - require_once('extractors/' . $extractorClass . '.inc.php');  
958 -  
959 - if (!class_exists($extractorClass))  
960 - {  
961 - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' does not exist."),$extractorClass));  
962 - continue;  
963 - }  
964 -  
965 - $extractor = $extractorCache[$extractorClass] = new $extractorClass();  
966 - }  
967 -  
968 - if (is_null($extractor))  
969 - {  
970 - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' not resolved - it is null."),$extractorClass));  
971 - continue; 990 + $extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass);
972 } 991 }
973 992
974 if (!($extractor instanceof DocumentExtractor)) 993 if (!($extractor instanceof DocumentExtractor))
975 { 994 {
976 - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass)); 995 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass), 'error');
977 continue; 996 continue;
978 } 997 }
979 998
@@ -995,7 +1014,7 @@ abstract class Indexer @@ -995,7 +1014,7 @@ abstract class Indexer
995 $result = @copy($sourceFile, $intermediate); 1014 $result = @copy($sourceFile, $intermediate);
996 if ($result === false) 1015 if ($result === false)
997 { 1016 {
998 - $default->log->error(sprintf(_kt("Could not create intermediate file from document %d"),$docId)); 1017 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"),$docId), 'error');
999 // problem. lets try again later. probably permission related. log the issue. 1018 // problem. lets try again later. probably permission related. log the issue.
1000 continue; 1019 continue;
1001 } 1020 }
@@ -1012,7 +1031,7 @@ abstract class Indexer @@ -1012,7 +1031,7 @@ abstract class Indexer
1012 $extractor->setIndexingStatus(null); 1031 $extractor->setIndexingStatus(null);
1013 $extractor->setExtractionStatus(null); 1032 $extractor->setExtractionStatus(null);
1014 1033
1015 - if ($this->debug) $default->log->debug(sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile)); 1034 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile), 'debug');
1016 1035
1017 $this->executeHook($extractor, 'pre_extract'); 1036 $this->executeHook($extractor, 'pre_extract');
1018 $this->executeHook($extractor, 'pre_extract', $mimeType); 1037 $this->executeHook($extractor, 'pre_extract', $mimeType);
@@ -1033,7 +1052,8 @@ abstract class Indexer @@ -1033,7 +1052,8 @@ abstract class Indexer
1033 1052
1034 if (!$indexStatus) 1053 if (!$indexStatus)
1035 { 1054 {
1036 - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId)); 1055 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId), 'error');
  1056 +
1037 } 1057 }
1038 1058
1039 $extractor->setIndexingStatus($indexStatus); 1059 $extractor->setIndexingStatus($indexStatus);
@@ -1042,7 +1062,7 @@ abstract class Indexer @@ -1042,7 +1062,7 @@ abstract class Indexer
1042 { 1062 {
1043 if (!$this->filterText($targetFile)) 1063 if (!$this->filterText($targetFile))
1044 { 1064 {
1045 - $default->log->error(sprintf(_kt("Problem filtering document %d"),$docId)); 1065 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error');
1046 } 1066 }
1047 else 1067 else
1048 { 1068 {
@@ -1050,7 +1070,8 @@ abstract class Indexer @@ -1050,7 +1070,8 @@ abstract class Indexer
1050 1070
1051 if (!$indexStatus) 1071 if (!$indexStatus)
1052 { 1072 {
1053 - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocument"),$docId)); 1073 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error');
  1074 + $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
1054 } 1075 }
1055 1076
1056 $extractor->setIndexingStatus($indexStatus); 1077 $extractor->setIndexingStatus($indexStatus);
@@ -1063,7 +1084,7 @@ abstract class Indexer @@ -1063,7 +1084,7 @@ abstract class Indexer
1063 else 1084 else
1064 { 1085 {
1065 $extractor->setExtractionStatus(false); 1086 $extractor->setExtractionStatus(false);
1066 - $default->log->error(sprintf(_kt("Could not extract contents from document %d"),$docId)); 1087 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error');
1067 } 1088 }
1068 1089
1069 $this->executeHook($extractor, 'post_extract', $mimeType); 1090 $this->executeHook($extractor, 'post_extract', $mimeType);