diff --git a/plugins/ktcore/KTCorePlugin.php b/plugins/ktcore/KTCorePlugin.php index 1ee992b..e80bcf4 100644 --- a/plugins/ktcore/KTCorePlugin.php +++ b/plugins/ktcore/KTCorePlugin.php @@ -294,23 +294,23 @@ class KTCorePlugin extends KTPlugin { //Search and Indexing $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search', - _kt('Mime Types'), _kt('Mime type information.'), + _kt('Mime Types'), _kt('This report lists all mime types and extensions that can be identified by KnowledgeTree.'), '../search2/reporting/ManageMimeTypes.php', null); $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search', - _kt('Extractor Information'), _kt('Extractor information.'), + _kt('Extractor Information'), _kt('This report lists the text extractors and their supported mime types.'), '../search2/reporting/ExtractorInfo.php', null); $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search', - _kt('Document Indexing Diagnostics'), _kt('Document Indexing Diagnostics'), + _kt('Document Indexing Diagnostics'), _kt('This report will help to diagnose problems with document indexing.'), '../search2/reporting/IndexErrors.php', null); $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search', - _kt('Pending Documents Indexing Queue'), _kt('Pending Documents Indexing Information'), + _kt('Pending Documents Indexing Queue'), _kt('This report lists documents that are waiting to be indexed.'), '../search2/reporting/PendingDocuments.php', null); $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search', - _kt('Reschedule all documents'), _kt('Reschedule all documents'), + _kt('Reschedule all documents'), _kt('This function allows you to re-index your entire repository.'), '../search2/reporting/RescheduleDocuments.php', null); // misc diff --git a/plugins/search2/reporting/templates/extractorinfo.smarty b/plugins/search2/reporting/templates/extractorinfo.smarty index ab27efa..220f6fe 100644 --- a/plugins/search2/reporting/templates/extractorinfo.smarty +++ b/plugins/search2/reporting/templates/extractorinfo.smarty @@ -1,39 +1,39 @@ -

{i18n}Extractor Information{/i18n}

-

{i18n}Important information about the current document extractors{/i18n}

- -{if $extractor_info} - -{foreach key=key from=$extractor_info item=extractor} -
-

{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}

- - - - - - - - - - - - - -{foreach from=$extractor.mimeTypes key=key item=mimeType} - - - - - -{/foreach} - - - -
{i18n}Description{/i18n}{i18n}Mime Types{/i18n}{i18n}Extensions{/i18n}
{$mimeType.description}{$key}{$extractor.mimeTypes.$key.extensions}
- -{/foreach} - -{else} -
-

{i18n}There are no extractors registered.{/i18n}

-{/if} +

{i18n}Extractor Information{/i18n}

+

{i18n}This report lists the text extractors and their supported mime types.{/i18n}

+ +{if $extractor_info} + +{foreach key=key from=$extractor_info item=extractor} +
+

{$extractor.name}{if $extractor.active == 1} ({i18n}Active{/i18n}){else} ({i18n}Inactive{/i18n}){/if}

+ + + + + + + + + + + + + +{foreach from=$extractor.mimeTypes key=key item=mimeType} + + + + + +{/foreach} + + + +
{i18n}Description{/i18n}{i18n}Mime Types{/i18n}{i18n}Extensions{/i18n}
{$mimeType.description}{$key}{$extractor.mimeTypes.$key.extensions}
+ +{/foreach} + +{else} +
+

{i18n}There are no extractors registered.{/i18n}

+{/if} diff --git a/plugins/search2/reporting/templates/indexerrors.smarty b/plugins/search2/reporting/templates/indexerrors.smarty index cb51f5a..b120ea5 100644 --- a/plugins/search2/reporting/templates/indexerrors.smarty +++ b/plugins/search2/reporting/templates/indexerrors.smarty @@ -1,65 +1,65 @@ -{literal} - -{/literal} - -

{i18n}Document Indexing Diagnostics{/i18n}

-

{i18n}This report will help to diagnose problems with document indexing.{/i18n}

- -
-{if $index_errors} - -
- - - - - - - - - - - - - - - - -{foreach key=key from=$index_errors item=indexError} - - - - - - - - - - - - - - - - -{/foreach} - - - -
{i18n}Document ID{/i18n}{i18n}Filename{/i18n}{i18n}Extension{/i18n}{i18n}Mime Type{/i18n}{i18n}Extractor{/i18n}{i18n}Index Date{/i18n}
{$indexError.document_id}{$indexError.filename|truncate:40:'...'}{$indexError.filetypes}{$indexError.mimetypes}{if $pendingDocs.extractor}{$indexError.extractor}{else}

{i18n}n/a{/i18n}

{/if}
{$indexError.indexdate}
 
{$indexError.status_msg}
- -
- - -{else} -

{i18n}There are no indexing issues.{/i18n}

-{/if} +{literal} + +{/literal} + +

{i18n}Document Indexing Diagnostics{/i18n}

+

{i18n}This report will help to diagnose problems with document indexing.{/i18n}

+ +
+{if $index_errors} + + + + + + + + + + + + + + + + + + +{foreach key=key from=$index_errors item=indexError} + + + + + + + + + + + + + + + + +{/foreach} + + + +
{i18n}Document ID{/i18n}{i18n}Filename{/i18n}{i18n}Extension{/i18n}{i18n}Mime Type{/i18n}{i18n}Extractor{/i18n}{i18n}Index Date{/i18n}
{$indexError.document_id}{$indexError.filename|truncate:40:'...'}{$indexError.filetypes}{$indexError.mimetypes}{if $pendingDocs.extractor}{$indexError.extractor}{else}

{i18n}n/a{/i18n}

{/if}
{$indexError.indexdate}
 
{$indexError.status_msg}
+ +
+ + +{else} +

{i18n}There are no indexing issues.{/i18n}

+{/if}
\ No newline at end of file diff --git a/plugins/search2/reporting/templates/managemimetypes.smarty b/plugins/search2/reporting/templates/managemimetypes.smarty index 1729a7e..862c72f 100644 --- a/plugins/search2/reporting/templates/managemimetypes.smarty +++ b/plugins/search2/reporting/templates/managemimetypes.smarty @@ -1,48 +1,48 @@ -

{i18n}Manage Mime Types{/i18n}

-

{i18n}Mime type information{/i18n}

- - -{if $mime_types} - - - - - - - - - - - - - - -{foreach from=$mime_types item=mimetype} - - - - - - - -{/foreach} - - -
Icon Extension Mime Type Description Extractor
{$mimetype.filetypes}{$mimetype.mimetypes}{if $mimetype.friendly_name} -{$mimetype.friendly_name} -{else} -

no description

-{/if}
{if $mimetype.extractor} -{$mimetype.extractor} -{else} -

n/a

-{/if}
- - -
Number of Extensions{$numExtensions} -
Number of indexed extensions{$numIndexedExtensions} ( {$indexedPercentage}% ) -
- - -{/if} - +

{i18n}Manage Mime Types{/i18n}

+

{i18n}This report lists all mime types and extensions that can be identified by KnowledgeTree.{/i18n}

+ + +{if $mime_types} + + + + + + + + + + + + + + +{foreach from=$mime_types item=mimetype} + + + + + + + +{/foreach} + + +
Icon Extension Mime Type Description Extractor
{$mimetype.filetypes}{$mimetype.mimetypes}{if $mimetype.friendly_name} +{$mimetype.friendly_name} +{else} +

no description

+{/if}
{if $mimetype.extractor} +{$mimetype.extractor} +{else} +

n/a

+{/if}
+ + +
Number of Extensions{$numExtensions} +
Number of indexed extensions{$numIndexedExtensions} ( {$indexedPercentage}% ) +
+ + +{/if} + diff --git a/plugins/search2/reporting/templates/pendingdocuments.smarty b/plugins/search2/reporting/templates/pendingdocuments.smarty index 3d5c2e4..6aa2b03 100644 --- a/plugins/search2/reporting/templates/pendingdocuments.smarty +++ b/plugins/search2/reporting/templates/pendingdocuments.smarty @@ -1,44 +1,44 @@ -

{i18n}Pending Documents Indexing Queue{/i18n}

-{i18n}This report lists documents that are waiting to be indexed. -

-If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n} -

- -{if empty($pending_docs)} - -{i18n}There are no documents in the indexing queue.{/i18n} - -{else} - - - - - - - - - - - - - - - - - -{foreach key=key from=$pending_docs item=pendingDocs} - - - - - - - - -{/foreach} - - - -
{i18n}Document ID{/i18n}{i18n}Filename{/i18n}{i18n}Extension{/i18n}{i18n}Mime Type{/i18n}{i18n}Extractor{/i18n}{i18n}Index Date{/i18n}
{$pendingDocs.document_id}{$pendingDocs.filename|truncate:40:'...'}{$pendingDocs.filetypes}{$pendingDocs.mimetypes}{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}

{i18n}n/a{/i18n}

{/if}
{$pendingDocs.indexdate}
- +

{i18n}Pending Documents Indexing Queue{/i18n}

+{i18n}This report lists documents that are waiting to be indexed.{/i18n} +

+{i18n}If a document is not associated with an extractor, no content will be added to the index. These documents can be identified in the list by the extractor column reflecting n/a.{/i18n} +

+ +{if empty($pending_docs)} + +{i18n}There are no documents in the indexing queue.{/i18n} + +{else} + + + + + + + + + + + + + + + + + +{foreach key=key from=$pending_docs item=pendingDocs} + + + + + + + + +{/foreach} + + + +
{i18n}Document ID{/i18n}{i18n}Filename{/i18n}{i18n}Extension{/i18n}{i18n}Mime Type{/i18n}{i18n}Extractor{/i18n}{i18n}Index Date{/i18n}
{$pendingDocs.document_id}{$pendingDocs.filename|truncate:40:'...'}{$pendingDocs.filetypes}{$pendingDocs.mimetypes}{if $pendingDocs.extractor}{$pendingDocs.extractor}{else}

{i18n}n/a{/i18n}

{/if}
{$pendingDocs.indexdate}
+ {/if} \ No newline at end of file diff --git a/plugins/search2/reporting/templates/rescheduledocuments.smarty b/plugins/search2/reporting/templates/rescheduledocuments.smarty index 76fc3a9..9038956 100644 --- a/plugins/search2/reporting/templates/rescheduledocuments.smarty +++ b/plugins/search2/reporting/templates/rescheduledocuments.smarty @@ -1,15 +1,15 @@ -

{i18n}Reschedule All Documents{/i18n}

-

{i18n}Reschedule all documents for indexing{/i18n}

-

{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository{/i18n} - -

-
-{if $rescheduleDone == true} -

Documents Rescheduled

-

You can view the schedule queue here

-
-{/if} - - - +

{i18n}Reschedule All Documents{/i18n}

+

{i18n}This function allows you to re-index your entire repository.{/i18n}

+

{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository.{/i18n} + + +
+{if $rescheduleDone == true} +

Documents Rescheduled

+

You can view the schedule queue here

+
+{/if} + + +
\ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php index e025763..f458902 100755 --- a/search2/indexing/indexerCore.inc.php +++ b/search2/indexing/indexerCore.inc.php @@ -755,34 +755,68 @@ abstract class Indexer KTUtil::setSystemSetting('mimeTypesRegistered', true); } + private function updatePendingDocumentStatus($documentId, $message, $level) + { + $this->indexingHistory .= "\n" . $level . ': ' . $message; + $message = sanitizeForSQL($this->indexingHistory); + $sql = "UPDATE index_files SET status_msg='$message' WHERE document_id=$documentId"; + DBUtil::runQuery($sql); + } + + /** + * + * @param int $documentId + * @param string $message + * @param string $level This may be info, error, debug + */ + private function logPendingDocumentInfoStatus($documentId, $message, $level) + { + $this->updatePendingDocumentStatus($documentId, $message, $level); + global $default; + + switch ($level) + { + case 'debug': + if ($this->debug) + { + $default->log->debug($message); + } + break; + default: + $default->log->$level($message); + } + } + + + public function getExtractor($extractorClass) { $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php'; if (!file_exists($includeFile)) { throw new Exception("Extractor file does not exist: $includeFile"); - } - + } + require_once($includeFile); if (!class_exists($extractorClass)) { - throw new Exception("Extractor '$classname' not defined in file: $includeFile"); + throw new Exception("Extractor '$classname' not defined in file: $includeFile"); } - + $extractor = new $extractorClass(); - + if (!($extractor instanceof DocumentExtractor)) { throw new Exception("Class $classname was expected to be of type DocumentExtractor"); } - + return $extractor; } public static function getIndexingQueue($problemItemsOnly=true) { - + if ($problemItemsOnly) { $sql = "SELECT @@ -811,16 +845,16 @@ abstract class Indexer LEFT JOIN mime_extractors me ON mt.extractor_id=me.id WHERE (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1 - ORDER BY indexdate "; + ORDER BY indexdate "; } $aResult = DBUtil::getResultArray($sql); - + return $aResult; } - + public static function getPendingIndexingQueue() { - return Indexer::getIndexingQueue(false); + return Indexer::getIndexingQueue(false); } /** @@ -922,11 +956,10 @@ abstract class Indexer $extractorClass=$docinfo['extractor']; $indexDocument = in_array($docinfo['what'], array('A','C')); $indexDiscussion = in_array($docinfo['what'], array('A','D')); + $this->indexingHistory = ''; + + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass), 'debug'); - if ($this->debug) - { - if ($this->debug) $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass)); - } if (empty($extractorClass)) { @@ -936,13 +969,13 @@ abstract class Indexer if (!$this->isExtractorEnabled($extractorClass)) { - $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info'); continue; } if ($this->debug) { - $default->log->info(sprintf(_kt("Processing docid: %d.\n"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"),$docId), 'info'); } $removeFromQueue = true; @@ -954,26 +987,12 @@ abstract class Indexer } else { - require_once('extractors/' . $extractorClass . '.inc.php'); - - if (!class_exists($extractorClass)) - { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' does not exist."),$extractorClass)); - continue; - } - - $extractor = $extractorCache[$extractorClass] = new $extractorClass(); - } - - if (is_null($extractor)) - { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' not resolved - it is null."),$extractorClass)); - continue; + $extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass); } if (!($extractor instanceof DocumentExtractor)) { - $default->log->error(sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."),$extractorClass), 'error'); continue; } @@ -995,7 +1014,7 @@ abstract class Indexer $result = @copy($sourceFile, $intermediate); if ($result === false) { - $default->log->error(sprintf(_kt("Could not create intermediate file from document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"),$docId), 'error'); // problem. lets try again later. probably permission related. log the issue. continue; } @@ -1012,7 +1031,7 @@ abstract class Indexer $extractor->setIndexingStatus(null); $extractor->setExtractionStatus(null); - if ($this->debug) $default->log->debug(sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"),$docId,$sourceFile,$targetFile), 'debug'); $this->executeHook($extractor, 'pre_extract'); $this->executeHook($extractor, 'pre_extract', $mimeType); @@ -1033,7 +1052,8 @@ abstract class Indexer if (!$indexStatus) { - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"),$docId), 'error'); + } $extractor->setIndexingStatus($indexStatus); @@ -1042,7 +1062,7 @@ abstract class Indexer { if (!$this->filterText($targetFile)) { - $default->log->error(sprintf(_kt("Problem filtering document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"),$docId), 'error'); } else { @@ -1050,7 +1070,8 @@ abstract class Indexer if (!$indexStatus) { - $default->log->error(sprintf(_kt("Problem indexing document %d - indexDocument"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"),$docId), 'error'); + $this->logPendingDocumentInfoStatus($docId, '' . $extractor->output . '', 'error'); } $extractor->setIndexingStatus($indexStatus); @@ -1063,7 +1084,7 @@ abstract class Indexer else { $extractor->setExtractionStatus(false); - $default->log->error(sprintf(_kt("Could not extract contents from document %d"),$docId)); + $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"),$docId), 'error'); } $this->executeHook($extractor, 'post_extract', $mimeType);