diff --git a/plugins/ktcore/KTCorePlugin.php b/plugins/ktcore/KTCorePlugin.php index 1ee992b..e80bcf4 100644 --- a/plugins/ktcore/KTCorePlugin.php +++ b/plugins/ktcore/KTCorePlugin.php @@ -294,23 +294,23 @@ class KTCorePlugin extends KTPlugin { //Search and Indexing $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search', - _kt('Mime Types'), _kt('Mime type information.'), + _kt('Mime Types'), _kt('This report lists all mime types and extensions that can be identified by KnowledgeTree.'), '../search2/reporting/ManageMimeTypes.php', null); $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search', - _kt('Extractor Information'), _kt('Extractor information.'), + _kt('Extractor Information'), _kt('This report lists the text extractors and their supported mime types.'), '../search2/reporting/ExtractorInfo.php', null); $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search', - _kt('Document Indexing Diagnostics'), _kt('Document Indexing Diagnostics'), + _kt('Document Indexing Diagnostics'), _kt('This report will help to diagnose problems with document indexing.'), '../search2/reporting/IndexErrors.php', null); $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search', - _kt('Pending Documents Indexing Queue'), _kt('Pending Documents Indexing Information'), + _kt('Pending Documents Indexing Queue'), _kt('This report lists documents that are waiting to be indexed.'), '../search2/reporting/PendingDocuments.php', null); $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search', - _kt('Reschedule all documents'), _kt('Reschedule all documents'), + _kt('Reschedule all documents'), _kt('This function allows you to re-index your entire repository.'), '../search2/reporting/RescheduleDocuments.php', null); // misc diff --git a/plugins/search2/reporting/templates/extractorinfo.smarty b/plugins/search2/reporting/templates/extractorinfo.smarty index ab27efa..220f6fe 100644 --- a/plugins/search2/reporting/templates/extractorinfo.smarty +++ b/plugins/search2/reporting/templates/extractorinfo.smarty @@ -1,39 +1,39 @@ -
{i18n}Important information about the current document extractors{/i18n}
- -{if $extractor_info} - -{foreach key=key from=$extractor_info item=extractor} -{i18n}Mime type information{/i18n}
- - -{if $mime_types} - -| Icon | -Extension | -Mime Type | -Description | -Extractor | -
|---|---|---|---|---|
| - | {$mimetype.filetypes} | -{$mimetype.mimetypes} | -{if $mimetype.friendly_name}
-{$mimetype.friendly_name}
-{else}
- no description -{/if} |
-{if $mimetype.extractor}
-{$mimetype.extractor}
-{else}
- n/a -{/if} |
-
| Number of Extensions | {$numExtensions} - |
| Number of indexed extensions | {$numIndexedExtensions} ( {$indexedPercentage}% ) - |
{i18n}This report lists all mime types and extensions that can be identified by KnowledgeTree.{/i18n}
+ + +{if $mime_types} + +| Icon | +Extension | +Mime Type | +Description | +Extractor | +
|---|---|---|---|---|
| + | {$mimetype.filetypes} | +{$mimetype.mimetypes} | +{if $mimetype.friendly_name}
+{$mimetype.friendly_name}
+{else}
+ no description +{/if} |
+{if $mimetype.extractor}
+{$mimetype.extractor}
+{else}
+ n/a +{/if} |
+
| Number of Extensions | {$numExtensions} + |
| Number of indexed extensions | {$numIndexedExtensions} ( {$indexedPercentage}% ) + |
| {$pendingDocs.document_id} | -{$pendingDocs.filename|truncate:40:'...'} | -{$pendingDocs.filetypes} | -{$pendingDocs.mimetypes} | -{if $pendingDocs.extractor}{$pendingDocs.extractor}{else} {i18n}n/a{/i18n} {/if} |
- {$pendingDocs.indexdate} | -
| {$pendingDocs.document_id} | +{$pendingDocs.filename|truncate:40:'...'} | +{$pendingDocs.filetypes} | +{$pendingDocs.mimetypes} | +{if $pendingDocs.extractor}{$pendingDocs.extractor}{else} {i18n}n/a{/i18n} {/if} |
+ {$pendingDocs.indexdate} | +
{i18n}Reschedule all documents for indexing{/i18n}
-{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository{/i18n} - -
\ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php index e025763..f458902 100755 --- a/search2/indexing/indexerCore.inc.php +++ b/search2/indexing/indexerCore.inc.php @@ -755,34 +755,68 @@ abstract class Indexer KTUtil::setSystemSetting('mimeTypesRegistered', true); } + private function updatePendingDocumentStatus($documentId, $message, $level) + { + $this->indexingHistory .= "\n" . $level . ': ' . $message; + $message = sanitizeForSQL($this->indexingHistory); + $sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId"; + DBUtil::runQuery($sql); + } + + /** + * + * @param int $documentId + * @param string $message + * @param string $level This may be info, error, debug + */ + private function logPendingDocumentInfoStatus($documentId, $message, $level) + { + $this->updatePendingDocumentStatus($documentId, $message, $level); + global $default; + + switch ($level) + { + case 'debug': + if ($this->debug) + { + $default->log->debug($message); + } + break; + default: + $default->log->$level($message); + } + } + + + public function getExtractor($extractorClass) { $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php'; if (!file_exists($includeFile)) { throw new Exception("Extractor file does not exist: $includeFile"); - } - + } + require_once($includeFile); if (!class_exists($extractorClass)) { - throw new Exception("Extractor '$classname' not defined in file: $includeFile"); + throw new Exception("Extractor '$classname' not defined in file: $includeFile"); } - + $extractor = new $extractorClass(); - + if (!($extractor instanceof DocumentExtractor)) { throw new Exception("Class $classname was expected to be of type DocumentExtractor"); } - + return $extractor; } public static function getIndexingQueue($problemItemsOnly=true) { - + if ($problemItemsOnly) { $sql = "SELECT @@ -811,16 +845,16 @@ abstract class Indexer LEFT JOIN mime_extractors me ON mt.extractor_id=me.id WHERE (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1 - ORDER BY indexdate "; + ORDER BY indexdate "; } $aResult = DBUtil::getResultArray($sql); - + return $aResult; } - + public static function getPendingIndexingQueue() { - return Indexer::getIndexingQueue(false); + return Indexer::getIndexingQueue(false); } /** @@ -922,11 +956,10 @@ abstract class Indexer $extractorClass=$docinfo['extractor']; $indexDocument = in_array($docinfo['what'], array('A','C')); $indexDiscussion = in_array($docinfo['what'], array('A','D')); + $this->indexingHistory = ''; + + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass), 'debug'); - if ($this->debug) - { - if ($this->debug) $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass)); - } if (empty($extractorClass)) { @@ -936,13 +969,13 @@ abstract class Indexer if (!$this->isExtractorEnabled($extractorClass)) { - $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info'); continue; } if ($this->debug) { - $default->log->info(sprintf(_kt("Processing docid: %d.\n"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"),$docId), 'info'); } $removeFromQueue = true; @@ -954,26 +987,12 @@ abstract class Indexer } else { - require_once('extractors/' . $extractorClass . '.inc.php'); - - if (!class_exists($extractorClass)) - { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' does not exist."),$extractorClass)); - continue; - } - - $extractor = $extractorCache[$extractorClass] = new $extractorClass(); - } - - if (is_null($extractor)) - { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' not resolved - it is null."),$extractorClass)); - continue; + $extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass); } if (!($extractor instanceof DocumentExtractor)) { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass), 'error'); continue; } @@ -995,7 +1014,7 @@ abstract class Indexer $result = @copy($sourceFile, $intermediate); if ($result === false) { - $default->log->error(sprintf(_kt("Could not create intermediate file from document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"),$docId), 'error'); // problem. lets try again later. probably permission related. log the issue. continue; } @@ -1012,7 +1031,7 @@ abstract class Indexer $extractor->setIndexingStatus(null); $extractor->setExtractionStatus(null); - if ($this->debug) $default->log->debug(sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile), 'debug'); $this->executeHook($extractor, 'pre_extract'); $this->executeHook($extractor, 'pre_extract', $mimeType); @@ -1033,7 +1052,8 @@ abstract class Indexer if (!$indexStatus) { - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId), 'error'); + } $extractor->setIndexingStatus($indexStatus); @@ -1042,7 +1062,7 @@ abstract class Indexer { if (!$this->filterText($targetFile)) { - $default->log->error(sprintf(_kt("Problem filtering document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error'); } else { @@ -1050,7 +1070,8 @@ abstract class Indexer if (!$indexStatus) { - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocument"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error'); + $this->logPendingDocumentInfoStatus($docId, '', 'error'); } $extractor->setIndexingStatus($indexStatus); @@ -1063,7 +1084,7 @@ abstract class Indexer else { $extractor->setExtractionStatus(false); - $default->log->error(sprintf(_kt("Could not extract contents from document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error'); } $this->executeHook($extractor, 'post_extract', $mimeType);