diff --git a/lib/mime.inc.php b/lib/mime.inc.php index 693d85e..dad1b44 100644 --- a/lib/mime.inc.php +++ b/lib/mime.inc.php @@ -207,6 +207,44 @@ class KTMime { function stripAllButExtension($sFileName) { return strtolower(substr($sFileName, strrpos($sFileName, ".")+1, strlen($sFileName) - strrpos($sFileName, "."))); } + + /** + * getAllMimeTypesInformation is a staic function used to get a fuller set of + * information on the mime types held in the database. + * + */ + function getAllMimeTypesInformation() + { + $sTable = KTUtil::getTableName('mimetypes'); + $aQuery = array('SELECT MT.id, MT.filetypes, MT.mimetypes, MT.icon_path, MT.friendly_name, ME.name as extractor FROM ' + . $sTable .' MT LEFT JOIN mime_extractors ME ON(MT.extractor_id = ME.id) ORDER BY MT.filetypes', array()); + $res = DBUtil::getResultArray($aQuery); + return $res; + } + + /** + * get all information on all the extractors in the database + * + */ + function getMimeExtractorInformation() + { + $aQuery = array('SELECT id, name, active FROM mime_extractors ORDER BY name', array()); + $res = DBUtil::getResultArray($aQuery); + return $res; + } + + /** + *give the mimetype name and get the friendly names and the extensions + * + */ + function getFriendlyNameAndExtension($sMimeType) + { + $sTable = KTUtil::getTableName('mimetypes'); + $sQuery = "SELECT friendly_name, filetypes FROM " . $sTable . " WHERE mimetypes = ?"; + $aQuery = array($sQuery, array($sMimeType)); + $res = DBUtil::getResultArray($aQuery); + return $res; + } } $_KT_icon_path_cache = array(); diff --git a/plugins/ktcore/KTCorePlugin.php b/plugins/ktcore/KTCorePlugin.php index 5e59514..1570229 100644 --- a/plugins/ktcore/KTCorePlugin.php +++ b/plugins/ktcore/KTCorePlugin.php @@ -232,6 +232,8 @@ class KTCorePlugin extends KTPlugin { _kt('Manage checked-out, archived and deleted documents.')); $this->registerAdminCategory('documents', _kt('Document Metadata and Workflow Configuration'), _kt('Configure the document metadata: Document Types, Document Fieldsets, Link Types and Workflows.')); + $this->registerAdminCategory('search', _kt('Search and Indexing'), + _kt('Search and Indexing Settings')); $this->registerAdminCategory('misc', _kt('Miscellaneous'), _kt('Various settings which do not fit into the other categories, including managing help and saved searches.')); @@ -289,7 +291,26 @@ class KTCorePlugin extends KTPlugin { _kt('Restore or Expunge Deleted Documents'), _kt('Restore previously deleted documents, or permanently expunge them.'), 'admin/deletedDocuments.php', null); - + //Search and Indexing + $this->registerAdminPage('managemimetypes', 'ManageMimeTypesDispatcher', 'search', + _kt('Mime Types'), _kt('Mime type information.'), + '../search2/reporting/ManageMimeTypes.php', null); + + $this->registerAdminPage('extractorinfo', 'ExtractorInfoDispatcher', 'search', + _kt('Extractor Information'), _kt('Extractor information.'), + '../search2/reporting/ExtractorInfo.php', null); + + $this->registerAdminPage('indexerrors', 'IndexErrorsDispatcher', 'search', + _kt('Document Indexing Diagnostics'), _kt('Document Indexing Diagnostics'), + '../search2/reporting/IndexErrors.php', null); + + $this->registerAdminPage('pendingdocuments', 'PendingDocumentsDispatcher', 'search', + _kt('Pending Documents Indexing Queue'), _kt('Pending Documents Indexing Information'), + '../search2/reporting/PendingDocuments.php', null); + + $this->registerAdminPage('reschedulealldocuments', 'RescheduleDocumentsDispatcher', 'search', + _kt('Reschedule all documents'), _kt('Reschedule all documents'), + '../search2/reporting/RescheduleDocuments.php', null); // misc $this->registerAdminPage('helpmanagement', 'ManageHelpDispatcher', 'misc', diff --git a/plugins/search2/reporting/ExtractorInfo.php b/plugins/search2/reporting/ExtractorInfo.php new file mode 100644 index 0000000..0e6fc1d --- /dev/null +++ b/plugins/search2/reporting/ExtractorInfo.php @@ -0,0 +1,129 @@ +. + * + * You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place, + * Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ + +require_once(KT_LIB_DIR . '/dispatcher.inc.php'); +require_once(KT_LIB_DIR . '/templating/templating.inc.php'); +require_once(KT_LIB_DIR . '/mime.inc.php'); + +class ExtractorInfoDispatcher extends KTAdminDispatcher { + + + function check() { + $this->aBreadcrumbs[] = array( + 'url' => $_SERVER['PHP_SELF'], + 'name' => _kt('Extractor Information'), + ); + return parent::check(); + } + + function do_main() { + + //registerTypes registers the mime types and populates the needed tables. + $indexer = Indexer::get(); + $indexer->registerTypes(); + + $oTemplating =& KTTemplating::getSingleton(); + $oTemplating->addLocation('Extractor Information', '/plugins/search2/reporting/templates'); + + $oTemplate =& $oTemplating->loadTemplate('extractorinfo'); + + $aExtractorInfo = KTMime::getMimeExtractorInformation(); + + if(empty($aExtractorInfo)) + { + $oTemplate->setData(array( + 'context' => $this, + 'extractor_info' => $aExtractorInfo + )); + + return $oTemplate; + } + + foreach($aExtractorInfo as $key=>$info) + { + $extractorClass = $info['name']; + $extractor = $indexer->getExtractor($extractorClass); + $info['mimeTypes'] = array(); + $aMimeTypes = $this->getSupportedMimeTypesDB($extractorClass);//$extractor->getSupportedMimeTypes(); + + + foreach($aMimeTypes as $mimeType) + { + $sMimeInfo = KTMime::getFriendlyNameAndExtension($mimeType); + + $info['mimeTypes'][$mimeType] = array('description'=>$sMimeInfo[0]['friendly_name'], 'extensions'=>array($sMimeInfo[0]['filetypes'])); + + $extensions = array(); + foreach($sMimeInfo as $item) + { + $extensions[] = $item['filetypes']; + } + $info['mimeTypes'][$mimeType]['extensions'] = implode(', ', $extensions); + } + + $aExtractorInfo[$key] = $info; + } + + $oTemplate->setData(array( + 'context' => $this, + 'extractor_info' => $aExtractorInfo + )); + return $oTemplate; + } + + function getSupportedMimeTypesDB($sExtractorName) + { + $sQuery = "SELECT MT.mimetypes FROM mime_extractors as ME LEFT JOIN mime_types as MT ON " . + "(ME.id = MT.extractor_id) WHERE ME.name = ?"; + $aQuery = array($sQuery, array($sExtractorName)); + $aTempRes = DBUtil::getResultArray($aQuery); + $aRes = array(); + for($i = 0; $i < count($aTempRes); $i++ ) + { + if(!in_array($aTempRes[$i]['mimetypes'], $aRes)) + { + $aRes[] = $aTempRes[$i]['mimetypes']; + + } + } + return $aRes; + } + +} + + +?> diff --git a/plugins/search2/reporting/IndexErrors.php b/plugins/search2/reporting/IndexErrors.php new file mode 100644 index 0000000..0f25265 --- /dev/null +++ b/plugins/search2/reporting/IndexErrors.php @@ -0,0 +1,98 @@ +. + * + * You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place, + * Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ + +require_once(KT_LIB_DIR . '/dispatcher.inc.php'); +require_once(KT_LIB_DIR . '/templating/templating.inc.php'); +require_once(KT_LIB_DIR . '/mime.inc.php'); + +class IndexErrorsDispatcher extends KTAdminDispatcher { + + function check() { + $this->aBreadcrumbs[] = array( + 'url' => $_SERVER['PHP_SELF'], + 'name' => _kt('Document Indexing Diagnostics'), + ); + return parent::check(); + } + + function do_main() { + + //registerTypes registers the mime types and populates the needed tables. + $indexer = Indexer::get(); + $indexer->registerTypes(); + + if($_REQUEST['rescheduleValue'] == 'reschedule') + { + + foreach(KTUtil::arrayGet($_REQUEST, 'index_error', array()) as $sDocId => $v) + { + Indexer::reindexDocument($sDocId); + + } + + } + else if($_REQUEST['rescheduleValue'] == 'rescheduleall') + { + $aIndexerValues = Indexer::getIndexingQueue(); + foreach ($aIndexerValues as $sDocValues) + { + Indexer::reindexDocument($sDocValues['document_id']); + } + + } + require_once(KT_LIB_DIR . "/templating/templating.inc.php"); + $oTemplating =& KTTemplating::getSingleton(); + $oTemplating->addLocation('Index Errors', '/plugins/search2/reporting/templates'); + + $oTemplate =& $oTemplating->loadTemplate('indexerrors'); + + $aIndexerValues = Indexer::getIndexingQueue(); + + $oTemplate->setData(array( + 'context' => $this, + 'index_errors' => $aIndexerValues + + )); + return $oTemplate; + } + + + +} + + +?> diff --git a/plugins/search2/reporting/ManageMimeTypes.php b/plugins/search2/reporting/ManageMimeTypes.php new file mode 100644 index 0000000..cea5ff4 --- /dev/null +++ b/plugins/search2/reporting/ManageMimeTypes.php @@ -0,0 +1,98 @@ +. + * + * You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place, + * Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ +require_once(KT_LIB_DIR . '/dispatcher.inc.php'); +require_once(KT_LIB_DIR . '/templating/templating.inc.php'); +require_once(KT_LIB_DIR . '/mime.inc.php'); + +class ManageMimeTypesDispatcher extends KTAdminDispatcher { + + function check() { + $this->aBreadcrumbs[] = array( + 'url' => $_SERVER['PHP_SELF'], + 'name' => _kt('Manage Mime Types'), + ); + return parent::check(); + } + + function do_main() { + + //registerTypes registers the mime types and populates the needed tables. + $indexer = Indexer::get(); + $indexer->registerTypes(); + + $oTemplating =& KTTemplating::getSingleton(); + $oTemplating->addLocation('Manage Mime Type Plugin', '/plugins/search2/reporting/templates'); + + $oTemplate =& $oTemplating->loadTemplate('managemimetypes'); + + $aMimeTypes = KTMime::getAllMimeTypesInformation(); + + $indexer = Indexer::get(); + + $numExtensions = 0; + $numIndexedExtensions = 0; + + foreach($aMimeTypes as $key=>$mimeType) + { + $extractorClass = $mimeType['extractor']; + $numExtensions++; + if (empty($extractorClass)) + { + continue; + } + $extractor = $indexer->getExtractor($extractorClass); + $aMimeTypes[$key]['extractor'] = $extractor->getDisplayName(); + $numIndexedExtensions++; + } + + $indexedPercentage = 0; + if ($numExtensions > 0) + { + $indexedPercentage = number_format(($numIndexedExtensions * 100)/$numExtensions,2,'.',','); + } + + $oTemplate->setData(array( + 'context' => $this, + 'mime_types' => $aMimeTypes, + 'numExtensions'=>$numExtensions, + 'numIndexedExtensions'=>$numIndexedExtensions, + 'indexedPercentage'=>$indexedPercentage + + )); + return $oTemplate; + } +} \ No newline at end of file diff --git a/plugins/search2/reporting/PendingDocuments.php b/plugins/search2/reporting/PendingDocuments.php new file mode 100644 index 0000000..d13cbe7 --- /dev/null +++ b/plugins/search2/reporting/PendingDocuments.php @@ -0,0 +1,74 @@ +. + * + * You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place, + * Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ + +require_once(KT_LIB_DIR . '/dispatcher.inc.php'); +require_once(KT_LIB_DIR . '/templating/templating.inc.php'); + +class PendingDocumentsDispatcher extends KTAdminDispatcher +{ + function check() { + $this->aBreadcrumbs[] = array( + 'url' => $_SERVER['PHP_SELF'], + 'name' => _kt('Pending Documents Indexing Queue'), + ); + return parent::check(); + } + + function do_main() { + + //registerTypes registers the mime types and populates the needed tables. + $indexer = Indexer::get(); + $indexer->registerTypes(); + + $aPendingDocs = Indexer::getPendingIndexingQueue(); + + $oTemplating =& KTTemplating::getSingleton(); + $oTemplating->addLocation('Pending Documents', '/plugins/search2/reporting/templates'); + $oTemplate =& $oTemplating->loadTemplate('pendingdocuments'); + + $oTemplate->setData(array( + 'context' => $this, + 'pending_docs' => $aPendingDocs + + )); + return $oTemplate; + } + +} + + +?> diff --git a/plugins/search2/reporting/RescheduleDocuments.php b/plugins/search2/reporting/RescheduleDocuments.php new file mode 100644 index 0000000..f9eb65f --- /dev/null +++ b/plugins/search2/reporting/RescheduleDocuments.php @@ -0,0 +1,82 @@ +. + * + * You can contact The Jam Warehouse Software (Pty) Limited, Unit 1, Tramber Place, + * Blake Street, Observatory, 7925 South Africa. or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ + +require_once(KT_LIB_DIR . '/dispatcher.inc.php'); +require_once(KT_LIB_DIR . '/templating/templating.inc.php'); + +class RescheduleDocumentsDispatcher extends KTAdminDispatcher +{ + function check() { + $this->aBreadcrumbs[] = array( + 'url' => $_SERVER['PHP_SELF'], + 'name' => _kt('Reschedule all documents'), + ); + return parent::check(); + } + + function do_main() { + + //registerTypes registers the mime types and populates the needed tables. + $indexer = Indexer::get(); + $indexer->registerTypes(); + + $oTemplating =& KTTemplating::getSingleton(); + $oTemplating->addLocation('Reschedule Documents', '/plugins/search2/reporting/templates'); + + + $oTemplate =& $oTemplating->loadTemplate('rescheduledocuments'); + + if ($_REQUEST['rescheduleValue'] == 'reschedule') + { + Indexer::indexAll(); + $oTemplate->setData(array( + 'context' => $this, + 'rescheduleDone' => true + )); + return $oTemplate; + } + + $oTemplate->setData(array( + 'context' => $this, + 'rescheduleDone' => false + + )); + return $oTemplate; + } +} + +?> diff --git a/plugins/search2/reporting/templates/extractorinfo.smarty b/plugins/search2/reporting/templates/extractorinfo.smarty new file mode 100644 index 0000000..ab27efa --- /dev/null +++ b/plugins/search2/reporting/templates/extractorinfo.smarty @@ -0,0 +1,39 @@ +
{i18n}Important information about the current document extractors{/i18n}
+ +{if $extractor_info} + +{foreach key=key from=$extractor_info item=extractor} +{i18n}Mime type information{/i18n}
+ + +{if $mime_types} + +| Icon | +Extension | +Mime Type | +Description | +Extractor | +
|---|---|---|---|---|
| + | {$mimetype.filetypes} | +{$mimetype.mimetypes} | +{if $mimetype.friendly_name}
+{$mimetype.friendly_name}
+{else}
+ no description +{/if} |
+{if $mimetype.extractor}
+{$mimetype.extractor}
+{else}
+ n/a +{/if} |
+
| Number of Extensions | {$numExtensions} + |
| Number of indexed extensions | {$numIndexedExtensions} ( {$indexedPercentage}% ) + |
| {$pendingDocs.document_id} | +{$pendingDocs.filename|truncate:40:'...'} | +{$pendingDocs.filetypes} | +{$pendingDocs.mimetypes} | +{if $pendingDocs.extractor}{$pendingDocs.extractor}{else} {i18n}n/a{/i18n} {/if} |
+ {$pendingDocs.indexdate} | +
{i18n}Reschedule all documents for indexing{/i18n}
+{i18n}Please note that rescheduling all documents may take a long time, depending on the size if the repository{/i18n} + +
\ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php index 50af536..e025763 100755 --- a/search2/indexing/indexerCore.inc.php +++ b/search2/indexing/indexerCore.inc.php @@ -36,6 +36,7 @@ * */ +define('SEARCH2_INDEXER_DIR',realpath(dirname(__FILE__)) . '/'); require_once('indexing/extractorCore.inc.php'); require_once(KT_DIR . '/plugins/ktcore/scheduler/schedulerUtil.php'); @@ -492,6 +493,19 @@ abstract class Indexer $default->log->debug("index: Queuing indexing of $document_id"); } + public static function reindexQueue() + { + $sql = "UPDATE index_files SET processdate = null"; + DBUtil::runQuery($sql); + } + + public static function reindexDocument($documentId) + { + $sql = "UPDATE index_files SET processdate=null, status_msg=null WHERE document_id=$documentId"; + DBUtil::runQuery($sql); + } + + public static function indexAll() { @@ -741,6 +755,73 @@ abstract class Indexer KTUtil::setSystemSetting('mimeTypesRegistered', true); } + public function getExtractor($extractorClass) + { + $includeFile = SEARCH2_INDEXER_DIR . 'extractors/' . $extractorClass . '.inc.php'; + if (!file_exists($includeFile)) + { + throw new Exception("Extractor file does not exist: $includeFile"); + } + + require_once($includeFile); + + if (!class_exists($extractorClass)) + { + throw new Exception("Extractor '$classname' not defined in file: $includeFile"); + } + + $extractor = new $extractorClass(); + + if (!($extractor instanceof DocumentExtractor)) + { + throw new Exception("Class $classname was expected to be of type DocumentExtractor"); + } + + return $extractor; + } + + public static function getIndexingQueue($problemItemsOnly=true) + { + + if ($problemItemsOnly) + { + $sql = "SELECT + iff.document_id, iff.indexdate, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what, iff.status_msg, dcv.filename + FROM + index_files iff + INNER JOIN documents d ON iff.document_id=d.id + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id + INNER JOIN mime_types mt ON dcv.mime_id=mt.id + LEFT JOIN mime_extractors me ON mt.extractor_id=me.id + WHERE + (iff.status_msg IS NOT NULL) AND dmv.status_id=1 + ORDER BY indexdate "; + } + else + { + $sql = "SELECT + iff.document_id, iff.indexdate, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what, iff.status_msg, dcv.filename + FROM + index_files iff + INNER JOIN documents d ON iff.document_id=d.id + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id + INNER JOIN mime_types mt ON dcv.mime_id=mt.id + LEFT JOIN mime_extractors me ON mt.extractor_id=me.id + WHERE + (iff.status_msg IS NULL or iff.status_msg = '') AND dmv.status_id=1 + ORDER BY indexdate "; + } + $aResult = DBUtil::getResultArray($sql); + + return $aResult; + } + + public static function getPendingIndexingQueue() + { + return Indexer::getIndexingQueue(false); + } /** * The main function that may be called repeatedly to index documents.