Commit b2d8d8b7e6cebdaba386313dcd69cfaf9ae119e7

Authored by conradverm
1 parent 56866914

KTS-2601

"Create indexer administration page"
Updated. Added better text descriptions for admin section and implemented status_msg update mechanism.

Committed By: Conrad Vermeulen
Reviewed By: Jonathan Byrne

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7943 c91229c3-7414-0410-bfa2-8a42b809f60b
plugins/ktcore/KTCorePlugin.php
... ... @@ -294,23 +294,23 @@ class KTCorePlugin extends KTPlugin {
294 294  
295 295 //Search and Indexing
296 296 $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search',
297   - _kt('Mime Types'), _kt('Mime type information.'),
  297 + _kt('Mime Types'), _kt('This report lists all mime types and extensions that can be identified by KnowledgeTree.'),
298 298 '../search2/reporting/ManageMimeTypes.php', null);
299 299  
300 300 $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search',
301   - _kt('Extractor Information'), _kt('Extractor information.'),
  301 + _kt('Extractor Information'), _kt('This report lists the text extractors and their supported mime types.'),
302 302 '../search2/reporting/ExtractorInfo.php', null);
303 303  
304 304 $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search',
305   - _kt('Document Indexing Diagnostics'), _kt('Document Indexing Diagnostics'),
  305 + _kt('Document Indexing Diagnostics'), _kt('This report will help to diagnose problems with document indexing.'),
306 306 '../search2/reporting/IndexErrors.php', null);
307 307  
308 308 $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search',
309   - _kt('Pending Documents Indexing Queue'), _kt('Pending Documents Indexing Information'),
  309 + _kt('Pending Documents Indexing Queue'), _kt('This report lists documents that are waiting to be indexed.'),
310 310 '../search2/reporting/PendingDocuments.php', null);
311 311  
312 312 $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search',
313   - _kt('Reschedule all documents'), _kt('Reschedule all documents'),
  313 + _kt('Reschedule all documents'), _kt('This function allows you to re-index your entire repository.'),
314 314 '../search2/reporting/RescheduleDocuments.php', null);
315 315  
316 316 // misc
... ...
plugins/search2/reporting/templates/extractorinfo.smarty
1   -<h2>{i18n}Extractor Information{/i18n}</h2>
2   -<p class="descriptiveText">{i18n}Important information about the current document extractors{/i18n}</p>
3   -
4   -{if $extractor_info}
5   -
6   -{foreach key=key from=$extractor_info item=extractor}
7   -<br>
8   -<dl class='panel_menu'><h3>{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}</h3><dl/>
9   -
10   -<table class=kt_collection>
11   -
12   -<thead>
13   -<tr>
14   -<th width="300">{i18n}Description{/i18n}</th>
15   -<th width="400">{i18n}Mime Types{/i18n}</th>
16   -<th>{i18n}Extensions{/i18n}</th>
17   -</tr>
18   -</thead>
19   -
20   -<tbody>
21   -
22   -{foreach from=$extractor.mimeTypes key=key item=mimeType}
23   -<tr>
24   - <td>{$mimeType.description}</td>
25   - <td>{$key}</td>
26   - <td>{$extractor.mimeTypes.$key.extensions}</td>
27   -</tr>
28   -{/foreach}
29   -
30   -</tbody>
31   -
32   -</table>
33   -
34   -{/foreach}
35   -
36   -{else}
37   -<br>
38   -<h3>{i18n}There are no extractors registered.{/i18n}</h3>
39   -{/if}
  1 +<h2>{i18n}Extractor Information{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This report lists the text extractors and their supported mime types.{/i18n}</p>
  3 +
  4 +{if $extractor_info}
  5 +
  6 +{foreach key=key from=$extractor_info item=extractor}
  7 +<br>
  8 +<dl class='panel_menu'><h3>{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}</h3><dl/>
  9 +
  10 +<table class=kt_collection>
  11 +
  12 +<thead>
  13 +<tr>
  14 +<th width="300">{i18n}Description{/i18n}</th>
  15 +<th width="400">{i18n}Mime Types{/i18n}</th>
  16 +<th>{i18n}Extensions{/i18n}</th>
  17 +</tr>
  18 +</thead>
  19 +
  20 +<tbody>
  21 +
  22 +{foreach from=$extractor.mimeTypes key=key item=mimeType}
  23 +<tr>
  24 + <td>{$mimeType.description}</td>
  25 + <td>{$key}</td>
  26 + <td>{$extractor.mimeTypes.$key.extensions}</td>
  27 +</tr>
  28 +{/foreach}
  29 +
  30 +</tbody>
  31 +
  32 +</table>
  33 +
  34 +{/foreach}
  35 +
  36 +{else}
  37 +<br>
  38 +<h3>{i18n}There are no extractors registered.{/i18n}</h3>
  39 +{/if}
... ...
plugins/search2/reporting/templates/indexerrors.smarty
1   -{literal}
2   -<script type="text/javascript">
3   - function doReschedule(rescheduleAction)
4   - {
5   - var hiddenVal = document.getElementById("rescheduleValue");
6   - hiddenVal.value = rescheduleAction;
7   - document.scheduleForm.submit();
8   - }
9   -
10   -</script>
11   -{/literal}
12   -
13   -<h2>{i18n}Document Indexing Diagnostics{/i18n}</h2>
14   -<p class="descriptiveText">{i18n}This report will help to diagnose problems with document indexing.{/i18n}</p>
15   -
16   -<br>
17   -{if $index_errors}
18   -
19   -<form name="scheduleForm" action="{$smarty.server.PHP_SELF}" method="POST">
20   -<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="" />
21   -
22   -<table class=kt_collection>
23   -<thead>
24   - <tr>
25   - <th width="10"></th>
26   - <th width="80"><nobr>{i18n}Document ID{/i18n}</th>
27   - <th ><nobr>{i18n}Filename{/i18n}</th>
28   - <th width="100"><nobr>{i18n}Extension{/i18n}</th>
29   - <th width="100"><nobr>{i18n}Mime Type{/i18n}</th>
30   - <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
31   - <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
32   - </tr>
33   -</thead>
34   -
35   -<tbody>
36   -{foreach key=key from=$index_errors item=indexError}
37   -<tr>
38   - <td class="centered"><input type="checkbox" name="index_error[{$indexError.document_id}]" value="1"/></td>
39   - <td>{$indexError.document_id}</td>
40   - <td>{$indexError.filename|truncate:40:'...'}</td>
41   - <td>{$indexError.filetypes}</td>
42   - <td>{$indexError.mimetypes}</td>
43   - <td>{if $pendingDocs.extractor}{$indexError.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
44   - <td>{$indexError.indexdate}</td>
45   -
46   -</tr>
47   -<tr>
48   - <td>&nbsp;</td>
49   - <td colspan=4><pre>{$indexError.status_msg}</pre></td>
50   -</tr>
51   -
52   -
53   -{/foreach}
54   -
55   -</tbody>
56   -
57   -</table>
58   -
59   -<br>
60   -<input type="button" name="Reschedule" value="{i18n}Reschedule{/i18n}" onclick="doReschedule('reschedule');"/>
61   -<input type="button" name="Reschedule All" value="{i18n}Reschedule All{/i18n}" onclick="doReschedule('rescheduleall');"/>
62   -{else}
63   -<h3>{i18n}There are no indexing issues.{/i18n}</h3>
64   -{/if}
  1 +{literal}
  2 +<script type="text/javascript">
  3 + function doReschedule(rescheduleAction)
  4 + {
  5 + var hiddenVal = document.getElementById("rescheduleValue");
  6 + hiddenVal.value = rescheduleAction;
  7 + document.scheduleForm.submit();
  8 + }
  9 +
  10 +</script>
  11 +{/literal}
  12 +
  13 +<h2>{i18n}Document Indexing Diagnostics{/i18n}</h2>
  14 +<p class="descriptiveText">{i18n}This report will help to diagnose problems with document indexing.{/i18n}</p>
  15 +
  16 +<br>
  17 +{if $index_errors}
  18 +
  19 +<form name="scheduleForm" action="{$smarty.server.PHP_SELF}" method="POST">
  20 +<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="" />
  21 +
  22 +<table class=kt_collection>
  23 +<thead>
  24 + <tr>
  25 + <th width="10"></th>
  26 + <th width="80"><nobr>{i18n}Document ID{/i18n}</th>
  27 + <th ><nobr>{i18n}Filename{/i18n}</th>
  28 + <th width="100"><nobr>{i18n}Extension{/i18n}</th>
  29 + <th width="100"><nobr>{i18n}Mime Type{/i18n}</th>
  30 + <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
  31 + <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
  32 + </tr>
  33 +</thead>
  34 +
  35 +<tbody>
  36 +{foreach key=key from=$index_errors item=indexError}
  37 +<tr>
  38 + <td class="centered"><input type="checkbox" name="index_error[{$indexError.document_id}]" value="1"/></td>
  39 + <td>{$indexError.document_id}</td>
  40 + <td>{$indexError.filename|truncate:40:'...'}</td>
  41 + <td>{$indexError.filetypes}</td>
  42 + <td>{$indexError.mimetypes}</td>
  43 + <td>{if $pendingDocs.extractor}{$indexError.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
  44 + <td>{$indexError.indexdate}</td>
  45 +
  46 +</tr>
  47 +<tr>
  48 + <td>&nbsp;</td>
  49 + <td colspan=4><pre>{$indexError.status_msg}</pre></td>
  50 +</tr>
  51 +
  52 +
  53 +{/foreach}
  54 +
  55 +</tbody>
  56 +
  57 +</table>
  58 +
  59 +<br>
  60 +<input type="button" name="Reschedule" value="{i18n}Reschedule{/i18n}" onclick="doReschedule('reschedule');"/>
  61 +<input type="button" name="Reschedule All" value="{i18n}Reschedule All{/i18n}" onclick="doReschedule('rescheduleall');"/>
  62 +{else}
  63 +<h3>{i18n}There are no indexing issues.{/i18n}</h3>
  64 +{/if}
65 65 </form>
66 66 \ No newline at end of file
... ...
plugins/search2/reporting/templates/managemimetypes.smarty
1   -<h2>{i18n}Manage Mime Types{/i18n}</h2>
2   -<p class="descriptiveText">{i18n}Mime type information{/i18n}</p>
3   -
4   -
5   -{if $mime_types}
6   -
7   -<table class=kt_collection>
8   -
9   -<thead>
10   -<tr>
11   -<th>Icon </th>
12   -<th> Extension </th>
13   -<th> Mime Type </th>
14   -<th> Description </th>
15   -<th> Extractor </th>
16   -</tr>
17   -</thead>
18   -
19   -<tbody>
20   -{foreach from=$mime_types item=mimetype}
21   -<tr>
22   -<td><span class="contenttype {$mimetype.icon_path}"></span></td>
23   -<td>{$mimetype.filetypes}</td>
24   -<td>{$mimetype.mimetypes}</td>
25   -<td>{if $mimetype.friendly_name}
26   -{$mimetype.friendly_name}
27   -{else}
28   -<p><font color="#FF9933">no description</font></p>
29   -{/if}</td>
30   -<td>{if $mimetype.extractor}
31   -{$mimetype.extractor}
32   -{else}
33   -<p><font color="#FF9933">n/a</font></p>
34   -{/if}</td>
35   -</tr>
36   -{/foreach}
37   -</tbody>
38   -
39   -</table>
40   -
41   -<table>
42   -<tr><td>Number of Extensions<td>{$numExtensions}
43   -<tr><td>Number of indexed extensions<td>{$numIndexedExtensions} ( {$indexedPercentage}% )
44   -</table>
45   -
46   -
47   -{/if}
48   -
  1 +<h2>{i18n}Manage Mime Types{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This report lists all mime types and extensions that can be identified by KnowledgeTree.{/i18n}</p>
  3 +
  4 +
  5 +{if $mime_types}
  6 +
  7 +<table class=kt_collection>
  8 +
  9 +<thead>
  10 +<tr>
  11 +<th>Icon </th>
  12 +<th> Extension </th>
  13 +<th> Mime Type </th>
  14 +<th> Description </th>
  15 +<th> Extractor </th>
  16 +</tr>
  17 +</thead>
  18 +
  19 +<tbody>
  20 +{foreach from=$mime_types item=mimetype}
  21 +<tr>
  22 +<td><span class="contenttype {$mimetype.icon_path}"></span></td>
  23 +<td>{$mimetype.filetypes}</td>
  24 +<td>{$mimetype.mimetypes}</td>
  25 +<td>{if $mimetype.friendly_name}
  26 +{$mimetype.friendly_name}
  27 +{else}
  28 +<p><font color="#FF9933">no description</font></p>
  29 +{/if}</td>
  30 +<td>{if $mimetype.extractor}
  31 +{$mimetype.extractor}
  32 +{else}
  33 +<p><font color="#FF9933">n/a</font></p>
  34 +{/if}</td>
  35 +</tr>
  36 +{/foreach}
  37 +</tbody>
  38 +
  39 +</table>
  40 +
  41 +<table>
  42 +<tr><td>Number of Extensions<td>{$numExtensions}
  43 +<tr><td>Number of indexed extensions<td>{$numIndexedExtensions} ( {$indexedPercentage}% )
  44 +</table>
  45 +
  46 +
  47 +{/if}
  48 +
... ...
plugins/search2/reporting/templates/pendingdocuments.smarty
1   -<h2>{i18n}Pending Documents Indexing Queue{/i18n}</h2>
2   -{i18n}This report lists documents that are waiting to be indexed.
3   -<br><br>
4   -If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n}
5   -<br><br>
6   -
7   -{if empty($pending_docs)}
8   -
9   -<b>{i18n}There are no documents in the indexing queue.{/i18n}</b>
10   -
11   -{else}
12   -
13   -
14   -<table class=kt_collection>
15   -
16   -<thead>
17   -<tr>
18   - <th width="10"><nobr>{i18n}Document ID{/i18n}</th>
19   - <th><nobr>{i18n}Filename{/i18n}</th>
20   - <th width="100"><nobr>{i18n}Extension{/i18n}</th>
21   - <th width="150"><nobr>{i18n}Mime Type{/i18n}</th>
22   - <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
23   - <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
24   -</tr>
25   -</thead>
26   -
27   -<tbody>
28   -
29   -{foreach key=key from=$pending_docs item=pendingDocs}
30   -<tr>
31   - <td><a href="/view.php?fDocumentId={$pendingDocs.document_id}">{$pendingDocs.document_id}</a></td>
32   - <td>{$pendingDocs.filename|truncate:40:'...'}</td>
33   - <td>{$pendingDocs.filetypes}</td>
34   - <td>{$pendingDocs.mimetypes}</td>
35   - <td>{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
36   - <td>{$pendingDocs.indexdate}</td>
37   -</tr>
38   -{/foreach}
39   -
40   -</tbody>
41   -
42   -</table>
43   -
  1 +<h2>{i18n}Pending Documents Indexing Queue{/i18n}</h2>
  2 +{i18n}This report lists documents that are waiting to be indexed.{/i18n}
  3 +<br><br>
  4 +{i18n}If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n}
  5 +<br><br>
  6 +
  7 +{if empty($pending_docs)}
  8 +
  9 +<b>{i18n}There are no documents in the indexing queue.{/i18n}</b>
  10 +
  11 +{else}
  12 +
  13 +
  14 +<table class=kt_collection>
  15 +
  16 +<thead>
  17 +<tr>
  18 + <th width="10"><nobr>{i18n}Document ID{/i18n}</th>
  19 + <th><nobr>{i18n}Filename{/i18n}</th>
  20 + <th width="100"><nobr>{i18n}Extension{/i18n}</th>
  21 + <th width="150"><nobr>{i18n}Mime Type{/i18n}</th>
  22 + <th width="100"><nobr>{i18n}Extractor{/i18n}</th>
  23 + <th width="100"><nobr>{i18n}Index Date{/i18n}</th>
  24 +</tr>
  25 +</thead>
  26 +
  27 +<tbody>
  28 +
  29 +{foreach key=key from=$pending_docs item=pendingDocs}
  30 +<tr>
  31 + <td><a href="/view.php?fDocumentId={$pendingDocs.document_id}">{$pendingDocs.document_id}</a></td>
  32 + <td>{$pendingDocs.filename|truncate:40:'...'}</td>
  33 + <td>{$pendingDocs.filetypes}</td>
  34 + <td>{$pendingDocs.mimetypes}</td>
  35 + <td>{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}<p><font color="#FF9933">{i18n}n/a{/i18n}</font></p>{/if}</td>
  36 + <td>{$pendingDocs.indexdate}</td>
  37 +</tr>
  38 +{/foreach}
  39 +
  40 +</tbody>
  41 +
  42 +</table>
  43 +
44 44 {/if}
45 45 \ No newline at end of file
... ...
plugins/search2/reporting/templates/rescheduledocuments.smarty
1   -<h2>{i18n}Reschedule All Documents{/i18n}</h2>
2   -<p class="descriptiveText">{i18n}Reschedule all documents for indexing{/i18n}</p>
3   -<p class="descriptiveText">{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository{/i18n}</h3>
4   -
5   -<form name="rescheduleAllDocs" action="{$smarty.server.PHP_SELF}" method="POST">
6   -<br>
7   -{if $rescheduleDone == true}
8   -<p>Documents Rescheduled</p>
9   -<p>You can view the schedule queue <a href=admin.php?kt_path_info=search/pendingdocuments>here</a></p>
10   -<br>
11   -{/if}
12   -<input type="submit" name="Reschedule" value="{i18n}Reschedule All{/i18n}" />
13   -<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="reschedule" />
14   -
  1 +<h2>{i18n}Reschedule All Documents{/i18n}</h2>
  2 +<p class="descriptiveText">{i18n}This function allows you to re-index your entire repository.{/i18n}</p>
  3 +<p class="descriptiveText">{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository.{/i18n}</h3>
  4 +
  5 +<form name="rescheduleAllDocs" action="{$smarty.server.PHP_SELF}" method="POST">
  6 +<br>
  7 +{if $rescheduleDone == true}
  8 +<p>Documents Rescheduled</p>
  9 +<p>You can view the schedule queue <a href=admin.php?kt_path_info=search/pendingdocuments>here</a></p>
  10 +<br>
  11 +{/if}
  12 +<input type="submit" name="Reschedule" value="{i18n}Reschedule All{/i18n}" />
  13 +<input type="hidden" name="rescheduleValue" id="rescheduleValue" value="reschedule" />
  14 +
15 15 </form>
16 16 \ No newline at end of file
... ...
search2/indexing/indexerCore.inc.php
... ... @@ -755,34 +755,68 @@ abstract class Indexer
755 755 KTUtil::setSystemSetting('mimeTypesRegistered', true);
756 756 }
757 757  
  758 + private function updatePendingDocumentStatus($documentId, $message, $level)
  759 + {
  760 + $this->indexingHistory .= "\n" . $level . ': ' . $message;
  761 + $message = sanitizeForSQL($this->indexingHistory);
  762 + $sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId";
  763 + DBUtil::runQuery($sql);
  764 + }
  765 +
  766 + /**
  767 + *
  768 + * @param int $documentId
  769 + * @param string $message
  770 + * @param string $level This may be info, error, debug
  771 + */
  772 + private function logPendingDocumentInfoStatus($documentId, $message, $level)
  773 + {
  774 + $this->updatePendingDocumentStatus($documentId, $message, $level);
  775 + global $default;
  776 +
  777 + switch ($level)
  778 + {
  779 + case 'debug':
  780 + if ($this->debug)
  781 + {
  782 + $default->log->debug($message);
  783 + }
  784 + break;
  785 + default:
  786 + $default->log->$level($message);
  787 + }
  788 + }
  789 +
  790 +
  791 +
758 792 public function getExtractor($extractorClass)
759 793 {
760 794 $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php';
761 795 if (!file_exists($includeFile))
762 796 {
763 797 throw new Exception("Extractor file does not exist: $includeFile");
764   - }
765   -
  798 + }
  799 +
766 800 require_once($includeFile);
767 801  
768 802 if (!class_exists($extractorClass))
769 803 {
770   - throw new Exception("Extractor '$classname' not defined in file: $includeFile");
  804 + throw new Exception("Extractor '$classname' not defined in file: $includeFile");
771 805 }
772   -
  806 +
773 807 $extractor = new $extractorClass();
774   -
  808 +
775 809 if (!($extractor instanceof DocumentExtractor))
776 810 {
777 811 throw new Exception("Class $classname was expected to be of type DocumentExtractor");
778 812 }
779   -
  813 +
780 814 return $extractor;
781 815 }
782 816  
783 817 public static function getIndexingQueue($problemItemsOnly=true)
784 818 {
785   -
  819 +
786 820 if ($problemItemsOnly)
787 821 {
788 822 $sql = "SELECT
... ... @@ -811,16 +845,16 @@ abstract class Indexer
811 845 LEFT JOIN mime_extractors me ON mt.extractor_id=me.id
812 846 WHERE
813 847 (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1
814   - ORDER BY indexdate ";
  848 + ORDER BY indexdate ";
815 849 }
816 850 $aResult = DBUtil::getResultArray($sql);
817   -
  851 +
818 852 return $aResult;
819 853 }
820   -
  854 +
821 855 public static function getPendingIndexingQueue()
822 856 {
823   - return Indexer::getIndexingQueue(false);
  857 + return Indexer::getIndexingQueue(false);
824 858 }
825 859  
826 860 /**
... ... @@ -922,11 +956,10 @@ abstract class Indexer
922 956 $extractorClass=$docinfo['extractor'];
923 957 $indexDocument = in_array($docinfo['what'], array('A','C'));
924 958 $indexDiscussion = in_array($docinfo['what'], array('A','D'));
  959 + $this->indexingHistory = '';
  960 +
  961 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass), 'debug');
925 962  
926   - if ($this->debug)
927   - {
928   - if ($this->debug) $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass));
929   - }
930 963  
931 964 if (empty($extractorClass))
932 965 {
... ... @@ -936,13 +969,13 @@ abstract class Indexer
936 969  
937 970 if (!$this->isExtractorEnabled($extractorClass))
938 971 {
939   - $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass));
  972 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info');
940 973 continue;
941 974 }
942 975  
943 976 if ($this->debug)
944 977 {
945   - $default->log->info(sprintf(_kt("Processing docid: %d.\n"),$docId));
  978 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"),$docId), 'info');
946 979 }
947 980  
948 981 $removeFromQueue = true;
... ... @@ -954,26 +987,12 @@ abstract class Indexer
954 987 }
955 988 else
956 989 {
957   - require_once('extractors/' . $extractorClass . '.inc.php');
958   -
959   - if (!class_exists($extractorClass))
960   - {
961   - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' does not exist."),$extractorClass));
962   - continue;
963   - }
964   -
965   - $extractor = $extractorCache[$extractorClass] = new $extractorClass();
966   - }
967   -
968   - if (is_null($extractor))
969   - {
970   - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' not resolved - it is null."),$extractorClass));
971   - continue;
  990 + $extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass);
972 991 }
973 992  
974 993 if (!($extractor instanceof DocumentExtractor))
975 994 {
976   - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass));
  995 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass), 'error');
977 996 continue;
978 997 }
979 998  
... ... @@ -995,7 +1014,7 @@ abstract class Indexer
995 1014 $result = @copy($sourceFile, $intermediate);
996 1015 if ($result === false)
997 1016 {
998   - $default->log->error(sprintf(_kt("Could not create intermediate file from document %d"),$docId));
  1017 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"),$docId), 'error');
999 1018 // problem. lets try again later. probably permission related. log the issue.
1000 1019 continue;
1001 1020 }
... ... @@ -1012,7 +1031,7 @@ abstract class Indexer
1012 1031 $extractor->setIndexingStatus(null);
1013 1032 $extractor->setExtractionStatus(null);
1014 1033  
1015   - if ($this->debug) $default->log->debug(sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile));
  1034 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile), 'debug');
1016 1035  
1017 1036 $this->executeHook($extractor, 'pre_extract');
1018 1037 $this->executeHook($extractor, 'pre_extract', $mimeType);
... ... @@ -1033,7 +1052,8 @@ abstract class Indexer
1033 1052  
1034 1053 if (!$indexStatus)
1035 1054 {
1036   - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId));
  1055 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId), 'error');
  1056 +
1037 1057 }
1038 1058  
1039 1059 $extractor->setIndexingStatus($indexStatus);
... ... @@ -1042,7 +1062,7 @@ abstract class Indexer
1042 1062 {
1043 1063 if (!$this->filterText($targetFile))
1044 1064 {
1045   - $default->log->error(sprintf(_kt("Problem filtering document %d"),$docId));
  1065 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error');
1046 1066 }
1047 1067 else
1048 1068 {
... ... @@ -1050,7 +1070,8 @@ abstract class Indexer
1050 1070  
1051 1071 if (!$indexStatus)
1052 1072 {
1053   - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocument"),$docId));
  1073 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error');
  1074 + $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
1054 1075 }
1055 1076  
1056 1077 $extractor->setIndexingStatus($indexStatus);
... ... @@ -1063,7 +1084,7 @@ abstract class Indexer
1063 1084 else
1064 1085 {
1065 1086 $extractor->setExtractionStatus(false);
1066   - $default->log->error(sprintf(_kt("Could not extract contents from document %d"),$docId));
  1087 + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error');
1067 1088 }
1068 1089  
1069 1090 $this->executeHook($extractor, 'post_extract', $mimeType);
... ...