Commit ebf14f1e2be0bc59efd8d0adfeb915b855c31285
1 parent
7c1f0c28
Rebuilt Lucene with extra debug statements. Split the processors and indexing into 2 queues.
PT: 1731097 Committed by: Megan Watson
Showing
10 changed files
with
266 additions
and
39 deletions
bin/luceneserver/ktlucene.jar
No preview for this file type
call_home.php
0 → 100644
| 1 | +<?php | ||
| 2 | +/* | ||
| 3 | +* Data incoming format: <installation guid>|<user count>|<document count>|<KT version>|<KT edition>|<OS info> | ||
| 4 | +*/ | ||
| 5 | + | ||
| 6 | +$data = isset($_REQUEST['system_info']) ? strip_tags($_REQUEST['system_info']) : ''; | ||
| 7 | + | ||
| 8 | +if(empty($data)){ | ||
| 9 | + exit(0); | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +$file = 'var/system_info.txt'; | ||
| 13 | +$fp = fopen($file, 'a'); | ||
| 14 | +fwrite($fp, $data."\n"); | ||
| 15 | +fclose($fp); | ||
| 16 | + | ||
| 17 | +exit(0); | ||
| 18 | +?> | ||
| 0 | \ No newline at end of file | 19 | \ No newline at end of file |
search2/documentProcessor/bin/documentProcessor.php
| @@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
| 6 | * KnowledgeTree Community Edition | 6 | * KnowledgeTree Community Edition |
| 7 | * Document Management Made Simple | 7 | * Document Management Made Simple |
| 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. | 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 9 | - * | 9 | + * |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify it under | 11 | * This program is free software; you can redistribute it and/or modify it under |
| 12 | * the terms of the GNU General Public License version 3 as published by the | 12 | * the terms of the GNU General Public License version 3 as published by the |
| @@ -47,6 +47,7 @@ chdir(dirname(__FILE__)); | @@ -47,6 +47,7 @@ chdir(dirname(__FILE__)); | ||
| 47 | require_once('../documentProcessor.inc.php'); | 47 | require_once('../documentProcessor.inc.php'); |
| 48 | 48 | ||
| 49 | $documentProcessor = DocumentProcessor::get(); | 49 | $documentProcessor = DocumentProcessor::get(); |
| 50 | +$documentProcessor->processIndexQueue(); | ||
| 50 | $documentProcessor->processQueue(); | 51 | $documentProcessor->processQueue(); |
| 51 | exit; | 52 | exit; |
| 52 | ?> | 53 | ?> |
search2/documentProcessor/documentProcessor.inc.php
| @@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
| 6 | * KnowledgeTree Community Edition | 6 | * KnowledgeTree Community Edition |
| 7 | * Document Management Made Simple | 7 | * Document Management Made Simple |
| 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. | 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 9 | - * | 9 | + * |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify it under | 11 | * This program is free software; you can redistribute it and/or modify it under |
| 12 | * the terms of the GNU General Public License version 3 as published by the | 12 | * the terms of the GNU General Public License version 3 as published by the |
| @@ -105,6 +105,11 @@ class DocumentProcessor | @@ -105,6 +105,11 @@ class DocumentProcessor | ||
| 105 | return $singleton; | 105 | return $singleton; |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | + /** | ||
| 109 | + * Load the processors that will get run on the documents, eg pdf generation | ||
| 110 | + * | ||
| 111 | + * @return array | ||
| 112 | + */ | ||
| 108 | private function loadProcessors() | 113 | private function loadProcessors() |
| 109 | { | 114 | { |
| 110 | // Get list of registered processors (plugins) | 115 | // Get list of registered processors (plugins) |
| @@ -116,7 +121,7 @@ class DocumentProcessor | @@ -116,7 +121,7 @@ class DocumentProcessor | ||
| 116 | 121 | ||
| 117 | if(PEAR::isError($results)){ | 122 | if(PEAR::isError($results)){ |
| 118 | global $default; | 123 | global $default; |
| 119 | - $default->log->debug('documentProcessor: error loading processors').' - '.$results->getMessage(); | 124 | + $default->log->error('documentProcessor: error loading processors').' - '.$results->getMessage(); |
| 120 | return false; | 125 | return false; |
| 121 | } | 126 | } |
| 122 | 127 | ||
| @@ -139,34 +144,65 @@ class DocumentProcessor | @@ -139,34 +144,65 @@ class DocumentProcessor | ||
| 139 | return $processors; | 144 | return $processors; |
| 140 | } | 145 | } |
| 141 | 146 | ||
| 142 | - public function processQueue() | 147 | + /** |
| 148 | + * Fetch the documents in the indexing queue and start the indexer | ||
| 149 | + * | ||
| 150 | + */ | ||
| 151 | + public function processIndexQueue() | ||
| 143 | { | 152 | { |
| 144 | global $default; | 153 | global $default; |
| 145 | - $default->log->debug('documentProcessor: starting'); | 154 | + |
| 155 | + if(!$default->enableIndexing){ | ||
| 156 | + $default->log->debug('documentProcessor: indexer disabled'); | ||
| 157 | + return ; | ||
| 158 | + } | ||
| 159 | + | ||
| 160 | + $default->log->debug('documentProcessor: starting indexer'); | ||
| 146 | 161 | ||
| 147 | // Check for lock file to ensure processor is not currently running | 162 | // Check for lock file to ensure processor is not currently running |
| 148 | $cacheDir = $default->cacheDirectory; | 163 | $cacheDir = $default->cacheDirectory; |
| 149 | $lockFile = $cacheDir . DIRECTORY_SEPARATOR . 'document_processor.lock'; | 164 | $lockFile = $cacheDir . DIRECTORY_SEPARATOR . 'document_processor.lock'; |
| 150 | 165 | ||
| 151 | if(file_exists($lockFile)){ | 166 | if(file_exists($lockFile)){ |
| 152 | - // lock file exists, exit | ||
| 153 | - $default->log->debug('documentProcessor: stopping, lock file in place '.$lockFile); | ||
| 154 | - return ; | 167 | + // If something causes the document processor to stop part way through processing, the lock |
| 168 | + // file will remain stopping the document processor from resuming. To workaround this problem | ||
| 169 | + // we check the creation date of the lockfile and remove it if it is older than 24 hours or | ||
| 170 | + // 48 hours if the batch size is greater than 1000 documents. | ||
| 171 | + $stat = stat($lockFile); | ||
| 172 | + $created = $stat['mtime']; | ||
| 173 | + | ||
| 174 | + $gap = 24; | ||
| 175 | + if($this->limit > 1000){ | ||
| 176 | + $gap = 48; | ||
| 177 | + $default->log->warn('documentProcessor: batch size of documents to index is set to '.$this->limit.', this could cause problems.'); | ||
| 178 | + } | ||
| 179 | + $check = time() - ($gap*60*60); | ||
| 180 | + | ||
| 181 | + if($check > $created){ | ||
| 182 | + $default->log->error('documentProcessor: lock file is older than '.$gap.' hours, deleting it to restart indexing - '.$lockFile); | ||
| 183 | + @unlink($lockFile); | ||
| 184 | + }else{ | ||
| 185 | + // lock file exists, exit | ||
| 186 | + // through a warning if the lock file is older than half an hour | ||
| 187 | + $small_gap = time() - (30*60); | ||
| 188 | + if($small_gap > $created){ | ||
| 189 | + $default->log->warn('documentProcessor: stopping, lock file in place since '. date('Y-m-d H:i:s', $created) .' - '.$lockFile); | ||
| 190 | + } | ||
| 191 | + return ; | ||
| 192 | + } | ||
| 155 | } | 193 | } |
| 156 | 194 | ||
| 157 | - if($default->enableIndexing){ | ||
| 158 | - // Setup indexing - load extractors, run diagnostics | ||
| 159 | - if($this->indexer->preIndexingSetup() === false){ | ||
| 160 | - $default->log->debug('documentProcessor: stopping - indexer setup failed.'); | ||
| 161 | - return; | ||
| 162 | - } | 195 | + // Setup indexing - load extractors, run diagnostics |
| 196 | + if($this->indexer->preIndexingSetup() === false){ | ||
| 197 | + $default->log->error('documentProcessor: stopping - indexer setup failed.'); | ||
| 198 | + return; | ||
| 163 | } | 199 | } |
| 164 | 200 | ||
| 165 | // Get document queue | 201 | // Get document queue |
| 166 | $queue = $this->indexer->getDocumentsQueue($this->limit); | 202 | $queue = $this->indexer->getDocumentsQueue($this->limit); |
| 167 | 203 | ||
| 168 | if(empty($queue)){ | 204 | if(empty($queue)){ |
| 169 | - $default->log->debug('documentProcessor: stopping - no documents in processing queue'); | 205 | + $default->log->debug('documentProcessor: stopping - no documents in indexing queue'); |
| 170 | return ; | 206 | return ; |
| 171 | } | 207 | } |
| 172 | 208 | ||
| @@ -177,7 +213,8 @@ class DocumentProcessor | @@ -177,7 +213,8 @@ class DocumentProcessor | ||
| 177 | foreach($queue as $item){ | 213 | foreach($queue as $item){ |
| 178 | 214 | ||
| 179 | // Get the document object | 215 | // Get the document object |
| 180 | - $document = Document::get($item['document_id']); | 216 | + $docId = $item['document_id']; |
| 217 | + $document = Document::get($docId); | ||
| 181 | 218 | ||
| 182 | if (PEAR::isError($document)) | 219 | if (PEAR::isError($document)) |
| 183 | { | 220 | { |
| @@ -186,9 +223,54 @@ class DocumentProcessor | @@ -186,9 +223,54 @@ class DocumentProcessor | ||
| 186 | } | 223 | } |
| 187 | 224 | ||
| 188 | // index document | 225 | // index document |
| 189 | - if($default->enableIndexing){ | ||
| 190 | - $this->indexer->processDocument($document, $item); | ||
| 191 | - } | 226 | + $this->indexer->processDocument($document, $item); |
| 227 | + } | ||
| 228 | + | ||
| 229 | + // update the indexer statistics | ||
| 230 | + $this->indexer->updateIndexStats(); | ||
| 231 | + | ||
| 232 | + // Remove lock file to indicate processing has completed | ||
| 233 | + if(file_exists($lockFile)){ | ||
| 234 | + @unlink($lockFile); | ||
| 235 | + } | ||
| 236 | + | ||
| 237 | + $default->log->debug('documentProcessor: stopping indexer, batch completed'); | ||
| 238 | + } | ||
| 239 | + | ||
| 240 | + /** | ||
| 241 | + * Fetch the process queue for running the processors on | ||
| 242 | + * | ||
| 243 | + */ | ||
| 244 | + public function processQueue() | ||
| 245 | + { | ||
| 246 | + global $default; | ||
| 247 | + $default->log->debug('documentProcessor: starting processing'); | ||
| 248 | + | ||
| 249 | + // Get processing queue | ||
| 250 | + // Use the same batch size as the indexer (for now) | ||
| 251 | + // If the batch size is huge then reset it to a smaller number | ||
| 252 | + // Open office leaks memory, so we don't want to do too many documents at once | ||
| 253 | + $batch = ($this->limit > 500) ? 500 : $this->limit; | ||
| 254 | + | ||
| 255 | + $queue = $this->indexer->getDocumentProcessingQueue($batch); | ||
| 256 | + | ||
| 257 | + if(empty($queue)){ | ||
| 258 | + $default->log->debug('documentProcessor: stopping - no documents in processing queue'); | ||
| 259 | + return ; | ||
| 260 | + } | ||
| 261 | + | ||
| 262 | + // Process queue | ||
| 263 | + foreach($queue as $item){ | ||
| 264 | + | ||
| 265 | + // Get the document object | ||
| 266 | + $docId = $item['document_id']; | ||
| 267 | + $document = Document::get($docId); | ||
| 268 | + | ||
| 269 | + if (PEAR::isError($document)) | ||
| 270 | + { | ||
| 271 | + Indexer::unqueueDocFromProcessing($docId, "Cannot resolve document id: {$document->getMessage()}", 'error'); | ||
| 272 | + continue; | ||
| 273 | + } | ||
| 192 | 274 | ||
| 193 | // loop through processors | 275 | // loop through processors |
| 194 | if($this->processors !== false){ | 276 | if($this->processors !== false){ |
| @@ -204,19 +286,13 @@ class DocumentProcessor | @@ -204,19 +286,13 @@ class DocumentProcessor | ||
| 204 | // Process document | 286 | // Process document |
| 205 | $processor->setDocument($document); | 287 | $processor->setDocument($document); |
| 206 | $processor->processDocument(); | 288 | $processor->processDocument(); |
| 289 | + | ||
| 290 | + Indexer::unqueueDocFromProcessing($docId, "Document processed", 'debug'); | ||
| 207 | } | 291 | } |
| 208 | } | 292 | } |
| 209 | } | 293 | } |
| 210 | 294 | ||
| 211 | - // update the indexer statistics | ||
| 212 | - $this->indexer->updateIndexStats(); | ||
| 213 | - | ||
| 214 | - // Remove lock file to indicate processing has completed | ||
| 215 | - if(file_exists($lockFile)){ | ||
| 216 | - @unlink($lockFile); | ||
| 217 | - } | ||
| 218 | - | ||
| 219 | - $default->log->debug('documentProcessor: stopping'); | 295 | + $default->log->debug('documentProcessor: stopping processing, batch completed'); |
| 220 | } | 296 | } |
| 221 | 297 | ||
| 222 | /** | 298 | /** |
search2/indexing/extractors/OpenOfficeTextExtractor.inc.php
| 1 | <?php | 1 | <?php |
| 2 | - | ||
| 3 | -require_once(KT_DIR.'/thirdparty/peclzip/pclzip.lib.php'); | ||
| 4 | - | ||
| 5 | /** | 2 | /** |
| 6 | * $Id:$ | 3 | * $Id:$ |
| 7 | * | 4 | * |
| 8 | * KnowledgeTree Community Edition | 5 | * KnowledgeTree Community Edition |
| 9 | * Document Management Made Simple | 6 | * Document Management Made Simple |
| 10 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. | 7 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 11 | - * | 8 | + * |
| 12 | * | 9 | * |
| 13 | * This program is free software; you can redistribute it and/or modify it under | 10 | * This program is free software; you can redistribute it and/or modify it under |
| 14 | * the terms of the GNU General Public License version 3 as published by the | 11 | * the terms of the GNU General Public License version 3 as published by the |
| @@ -39,6 +36,8 @@ require_once(KT_DIR.'/thirdparty/peclzip/pclzip.lib.php'); | @@ -39,6 +36,8 @@ require_once(KT_DIR.'/thirdparty/peclzip/pclzip.lib.php'); | ||
| 39 | * | 36 | * |
| 40 | */ | 37 | */ |
| 41 | 38 | ||
| 39 | +require_once(KT_DIR.'/thirdparty/peclzip/pclzip.lib.php'); | ||
| 40 | + | ||
| 42 | class OpenOfficeTextExtractor extends ExternalDocumentExtractor | 41 | class OpenOfficeTextExtractor extends ExternalDocumentExtractor |
| 43 | { | 42 | { |
| 44 | public function __construct() | 43 | public function __construct() |
| @@ -138,6 +137,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | @@ -138,6 +137,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | ||
| 138 | */ | 137 | */ |
| 139 | public function diagnose() | 138 | public function diagnose() |
| 140 | { | 139 | { |
| 140 | + return null; | ||
| 141 | if (false === $this->unzip) | 141 | if (false === $this->unzip) |
| 142 | { | 142 | { |
| 143 | return sprintf(_kt("Cannot locate unzip: %s."), $this->unzip); | 143 | return sprintf(_kt("Cannot locate unzip: %s."), $this->unzip); |
search2/indexing/extractors/OpenXmlTextExtractor.inc.php
| @@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
| 6 | * KnowledgeTree Community Edition | 6 | * KnowledgeTree Community Edition |
| 7 | * Document Management Made Simple | 7 | * Document Management Made Simple |
| 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. | 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 9 | - * | 9 | + * |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify it under | 11 | * This program is free software; you can redistribute it and/or modify it under |
| 12 | * the terms of the GNU General Public License version 3 as published by the | 12 | * the terms of the GNU General Public License version 3 as published by the |
| @@ -37,6 +37,8 @@ | @@ -37,6 +37,8 @@ | ||
| 37 | * | 37 | * |
| 38 | */ | 38 | */ |
| 39 | 39 | ||
| 40 | +require_once(KT_DIR.'/thirdparty/peclzip/pclzip.lib.php'); | ||
| 41 | + | ||
| 40 | class OpenXmlTextExtractor extends ExternalDocumentExtractor | 42 | class OpenXmlTextExtractor extends ExternalDocumentExtractor |
| 41 | { | 43 | { |
| 42 | public function __construct() | 44 | public function __construct() |
| @@ -321,6 +323,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | @@ -321,6 +323,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | ||
| 321 | */ | 323 | */ |
| 322 | public function diagnose() | 324 | public function diagnose() |
| 323 | { | 325 | { |
| 326 | + return null; | ||
| 324 | if (false === $this->unzip) | 327 | if (false === $this->unzip) |
| 325 | { | 328 | { |
| 326 | return sprintf(_kt("Cannot locate unzip: %s."), $this->unzip); | 329 | return sprintf(_kt("Cannot locate unzip: %s."), $this->unzip); |
search2/indexing/indexerCore.inc.php
| @@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
| 6 | * KnowledgeTree Community Edition | 6 | * KnowledgeTree Community Edition |
| 7 | * Document Management Made Simple | 7 | * Document Management Made Simple |
| 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. | 8 | * Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 9 | - * | 9 | + * |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify it under | 11 | * This program is free software; you can redistribute it and/or modify it under |
| 12 | * the terms of the GNU General Public License version 3 as published by the | 12 | * the terms of the GNU General Public License version 3 as published by the |
| @@ -643,6 +643,16 @@ abstract class Indexer | @@ -643,6 +643,16 @@ abstract class Indexer | ||
| 643 | 643 | ||
| 644 | $default->log->debug("index: Queuing indexing of $document_id"); | 644 | $default->log->debug("index: Queuing indexing of $document_id"); |
| 645 | 645 | ||
| 646 | + // Appending the process queue to the index for convenience | ||
| 647 | + // Don't want to complicate matters by creating too many new classes and files | ||
| 648 | + Indexer::unqueueDocFromProcessing($document_id); | ||
| 649 | + | ||
| 650 | + // enqueue item | ||
| 651 | + $date = date('Y-m-d H:i:s'); | ||
| 652 | + $sql = "INSERT INTO process_queue(document_id, date_added) VALUES($document_id, '$date')"; | ||
| 653 | + DBUtil::runQuery($sql); | ||
| 654 | + | ||
| 655 | + $default->log->debug("Processing queue: Queuing document for processing - $document_id"); | ||
| 646 | } | 656 | } |
| 647 | 657 | ||
| 648 | private static function incrementCount() | 658 | private static function incrementCount() |
| @@ -722,8 +732,37 @@ abstract class Indexer | @@ -722,8 +732,37 @@ abstract class Indexer | ||
| 722 | DBUtil::runQuery($sql); | 732 | DBUtil::runQuery($sql); |
| 723 | 733 | ||
| 724 | $default->log->debug("Indexer::clearoutDeleted: removed documents from indexing queue that have been deleted"); | 734 | $default->log->debug("Indexer::clearoutDeleted: removed documents from indexing queue that have been deleted"); |
| 735 | + | ||
| 736 | + // Multiple indexing processes cannot occur at the same time - the lock file prevents this. | ||
| 737 | + // However if the indexing is interrupted the documents can get stuck in the queue with the processdate set | ||
| 738 | + // but never having been indexed. To prevent this we will clear the processdate on all documents without errors. | ||
| 739 | + $sql = 'UPDATE index_files SET processdate = null where processdate is not null and status_msg is null'; | ||
| 740 | + $res = DBUtil::runQuery($sql); | ||
| 741 | + | ||
| 742 | + if(PEAR::isError($res)){ | ||
| 743 | + $default->log->error("Indexer::clearoutDeleted: something happened ".$res->getMessage); | ||
| 744 | + } | ||
| 745 | + | ||
| 746 | + $default->log->debug("Indexer::clearoutDeleted: resetting processdate for documents that may be stuck"); | ||
| 725 | } | 747 | } |
| 726 | 748 | ||
| 749 | + /** | ||
| 750 | + * Clearout the processing of documents that no longer exist. | ||
| 751 | + * | ||
| 752 | + */ | ||
| 753 | + public static function clearoutDeletedFromProcessor() | ||
| 754 | + { | ||
| 755 | + global $default; | ||
| 756 | + | ||
| 757 | + $sql = 'DELETE FROM | ||
| 758 | + process_queue | ||
| 759 | + WHERE | ||
| 760 | + document_id in (SELECT d.id FROM documents AS d WHERE d.status_id=3) OR | ||
| 761 | + NOT EXISTS(SELECT process_queue.document_id FROM documents WHERE process_queue.document_id=documents.id)'; | ||
| 762 | + $result = DBUtil::runQuery($sql); | ||
| 763 | + | ||
| 764 | + $default->log->debug("Process queue: removed documents from processing queue that have been deleted"); | ||
| 765 | + } | ||
| 727 | 766 | ||
| 728 | /** | 767 | /** |
| 729 | * Check if a document is scheduled to be indexed | 768 | * Check if a document is scheduled to be indexed |
| @@ -1191,7 +1230,7 @@ abstract class Indexer | @@ -1191,7 +1230,7 @@ abstract class Indexer | ||
| 1191 | } | 1230 | } |
| 1192 | 1231 | ||
| 1193 | /** | 1232 | /** |
| 1194 | - * Get the queue of documents for processing | 1233 | + * Get the queue of documents for indexing |
| 1195 | * Refactored from indexDocuments() | 1234 | * Refactored from indexDocuments() |
| 1196 | */ | 1235 | */ |
| 1197 | public function getDocumentsQueue($max = null) | 1236 | public function getDocumentsQueue($max = null) |
| @@ -1222,7 +1261,7 @@ abstract class Indexer | @@ -1222,7 +1261,7 @@ abstract class Indexer | ||
| 1222 | if (PEAR::isError($result)) | 1261 | if (PEAR::isError($result)) |
| 1223 | { | 1262 | { |
| 1224 | //unlink($indexLockFile); | 1263 | //unlink($indexLockFile); |
| 1225 | - if ($this->debug) $default->log->debug('indexDocuments: stopping - db error'); | 1264 | + if ($this->debug) $default->log->error('indexDocuments: stopping - db error'); |
| 1226 | return; | 1265 | return; |
| 1227 | } | 1266 | } |
| 1228 | KTUtil::setSystemSetting('luceneIndexingDate', time()); | 1267 | KTUtil::setSystemSetting('luceneIndexingDate', time()); |
| @@ -1253,6 +1292,51 @@ abstract class Indexer | @@ -1253,6 +1292,51 @@ abstract class Indexer | ||
| 1253 | } | 1292 | } |
| 1254 | 1293 | ||
| 1255 | /** | 1294 | /** |
| 1295 | + * Get the queue of documents for processing | ||
| 1296 | + * | ||
| 1297 | + */ | ||
| 1298 | + public function getDocumentProcessingQueue($max = null) | ||
| 1299 | + { | ||
| 1300 | + global $default; | ||
| 1301 | + $max = (empty($max)) ? 20 : $max; | ||
| 1302 | + | ||
| 1303 | + // Cleanup the queue | ||
| 1304 | + Indexer::clearoutDeletedFromProcessor(); | ||
| 1305 | + | ||
| 1306 | + $date = date('Y-m-d H:i:s'); | ||
| 1307 | + // identify the indexers that must run | ||
| 1308 | + // mysql specific limit! | ||
| 1309 | + $sql = "SELECT | ||
| 1310 | + pq.document_id, mt.filetypes, mt.mimetypes | ||
| 1311 | + FROM | ||
| 1312 | + process_queue pq | ||
| 1313 | + INNER JOIN documents d ON pq.document_id=d.id | ||
| 1314 | + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id | ||
| 1315 | + INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id | ||
| 1316 | + INNER JOIN mime_types mt ON dcv.mime_id=mt.id | ||
| 1317 | + WHERE | ||
| 1318 | + (pq.date_processed IS NULL or pq.date_processed < date_sub('$date', interval 1 day)) AND dmv.status_id=1 | ||
| 1319 | + ORDER BY date_added | ||
| 1320 | + LIMIT $max"; | ||
| 1321 | + | ||
| 1322 | + $result = DBUtil::getResultArray($sql); | ||
| 1323 | + if (PEAR::isError($result)) | ||
| 1324 | + { | ||
| 1325 | + $default->log->error('Processing queue: stopping - db error: '.$result->getMessage()); | ||
| 1326 | + return; | ||
| 1327 | + } | ||
| 1328 | + | ||
| 1329 | + // bail if no work to do | ||
| 1330 | + if (count($result) == 0) | ||
| 1331 | + { | ||
| 1332 | + $default->log->debug('Processing queue: stopping - no work to be done'); | ||
| 1333 | + return; | ||
| 1334 | + } | ||
| 1335 | + | ||
| 1336 | + return $result; | ||
| 1337 | + } | ||
| 1338 | + | ||
| 1339 | + /** | ||
| 1256 | * Process a document - extract text and index it | 1340 | * Process a document - extract text and index it |
| 1257 | * Refactored from indexDocuments() | 1341 | * Refactored from indexDocuments() |
| 1258 | * | 1342 | * |
| @@ -1813,7 +1897,7 @@ abstract class Indexer | @@ -1813,7 +1897,7 @@ abstract class Indexer | ||
| 1813 | } | 1897 | } |
| 1814 | 1898 | ||
| 1815 | /** | 1899 | /** |
| 1816 | - * Remove the document from the queue. This is normally called when it has been processed. | 1900 | + * Remove the document from the indexing queue. This is normally called when it has been processed. |
| 1817 | * | 1901 | * |
| 1818 | * @param int $docid | 1902 | * @param int $docid |
| 1819 | */ | 1903 | */ |
| @@ -1829,6 +1913,23 @@ abstract class Indexer | @@ -1829,6 +1913,23 @@ abstract class Indexer | ||
| 1829 | } | 1913 | } |
| 1830 | 1914 | ||
| 1831 | /** | 1915 | /** |
| 1916 | + * Remove the document from the processing queue. This is normally called when it has been processed. | ||
| 1917 | + * | ||
| 1918 | + * @param int $docid | ||
| 1919 | + */ | ||
| 1920 | + public static function unqueueDocFromProcessing($docid, $reason=false, $level='debug') | ||
| 1921 | + { | ||
| 1922 | + $sql = "DELETE FROM process_queue WHERE document_id=$docid"; | ||
| 1923 | + $result = DBUtil::runQuery($sql); | ||
| 1924 | + | ||
| 1925 | + if ($reason !== false) | ||
| 1926 | + { | ||
| 1927 | + global $default; | ||
| 1928 | + $default->log->$level("Processor queue: removing document $docid from the queue - $reason"); | ||
| 1929 | + } | ||
| 1930 | + } | ||
| 1931 | + | ||
| 1932 | + /** | ||
| 1832 | * Run a query on the index. | 1933 | * Run a query on the index. |
| 1833 | * | 1934 | * |
| 1834 | * @param string $query | 1935 | * @param string $query |
sql/mysql/install/data.sql
| @@ -1774,7 +1774,8 @@ INSERT INTO `upgrades` VALUES | @@ -1774,7 +1774,8 @@ INSERT INTO `upgrades` VALUES | ||
| 1774 | (230,'sql*3.7.0-1*0*3.7.0-1/hide_zip_config.sql','Database upgrade to version 3.7.0-1: Hide zip config','2009-09-01 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), | 1774 | (230,'sql*3.7.0-1*0*3.7.0-1/hide_zip_config.sql','Database upgrade to version 3.7.0-1: Hide zip config','2009-09-01 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), |
| 1775 | (231,'sql*3.7.0-1*0*3.7.0-1/mime_extractors_reset.sql','Database upgrade to version 3.7.0-1: Mime extractors reset','2009-09-01 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), | 1775 | (231,'sql*3.7.0-1*0*3.7.0-1/mime_extractors_reset.sql','Database upgrade to version 3.7.0-1: Mime extractors reset','2009-09-01 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), |
| 1776 | (232,'upgrade*3.7.0-1*99*upgrade3.7.0-1','Upgrade from version 3.6.3 to 3.7.0-1','2009-11-13 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), | 1776 | (232,'upgrade*3.7.0-1*99*upgrade3.7.0-1','Upgrade from version 3.6.3 to 3.7.0-1','2009-11-13 00:00:00',1,'upgrade*3.7.0-1*99*upgrade3.7.0-1'), |
| 1777 | -(233,'upgrade*3.7.0.2*99*upgrade3.7.0.2','Upgrade from version 3.7.0-1 to 3.7.0.2','2009-11-19 00:00:00',1,'upgrade*3.7.0.2*99*upgrade3.7.0.2'); | 1777 | +(233,'sql*3.7.0.2*0*3.7.0.2/processor_queue.sql','Database upgrade to version 3.7.0-1: Processor Queue','2009-09-01 00:00:00',1,'upgrade*3.7.0.2*99*upgrade3.7.0.2'), |
| 1778 | +(234,'upgrade*3.7.0.2*99*upgrade3.7.0.2','Upgrade from version 3.7.0-1 to 3.7.0.2','2009-11-19 00:00:00',1,'upgrade*3.7.0.2*99*upgrade3.7.0.2'); | ||
| 1778 | /*!40000 ALTER TABLE `upgrades` ENABLE KEYS */; | 1779 | /*!40000 ALTER TABLE `upgrades` ENABLE KEYS */; |
| 1779 | UNLOCK TABLES; | 1780 | UNLOCK TABLES; |
| 1780 | 1781 |
sql/mysql/install/structure.sql
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | -- KnowledgeTree Community Edition | 4 | -- KnowledgeTree Community Edition |
| 5 | -- Document Management Made Simple | 5 | -- Document Management Made Simple |
| 6 | -- Copyright (C) 2008, 2009 KnowledgeTree Inc. | 6 | -- Copyright (C) 2008, 2009 KnowledgeTree Inc. |
| 7 | --- | 7 | +-- |
| 8 | -- | 8 | -- |
| 9 | -- This program is free software; you can redistribute it and/or modify it under | 9 | -- This program is free software; you can redistribute it and/or modify it under |
| 10 | -- the terms of the GNU General Public License version 3 as published by the | 10 | -- the terms of the GNU General Public License version 3 as published by the |
| @@ -1327,6 +1327,20 @@ CREATE TABLE `plugins` ( | @@ -1327,6 +1327,20 @@ CREATE TABLE `plugins` ( | ||
| 1327 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; | 1327 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; |
| 1328 | 1328 | ||
| 1329 | -- | 1329 | -- |
| 1330 | +-- Table structure for table `process_queue` | ||
| 1331 | +-- | ||
| 1332 | + | ||
| 1333 | +CREATE table `process_queue` ( | ||
| 1334 | + `document_id` int(11) NOT NULL, | ||
| 1335 | + `date_added` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, | ||
| 1336 | + `date_processed` timestamp, | ||
| 1337 | + `status_msg` mediumtext, | ||
| 1338 | + `process_type` varchar(20), | ||
| 1339 | + PRIMARY KEY (`document_id`), | ||
| 1340 | + CONSTRAINT `process_queue_ibfk_1` FOREIGN KEY (`document_id`) REFERENCES `documents` (`id`) ON DELETE CASCADE ON UPDATE CASCADE | ||
| 1341 | +) ENGINE=InnoDB DEFAULT CHARSET=utf8; | ||
| 1342 | + | ||
| 1343 | +-- | ||
| 1330 | -- Table structure for table `role_allocations` | 1344 | -- Table structure for table `role_allocations` |
| 1331 | -- | 1345 | -- |
| 1332 | 1346 |
sql/mysql/upgrade/3.7.0.2/processor_queue.sql
0 → 100644
| 1 | +-- | ||
| 2 | +-- Table structure for table `process_queue` | ||
| 3 | +-- | ||
| 4 | + | ||
| 5 | +CREATE table `process_queue` ( | ||
| 6 | + `document_id` int(11) NOT NULL, | ||
| 7 | + `date_added` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, | ||
| 8 | + `date_processed` timestamp, | ||
| 9 | + `status_msg` mediumtext, | ||
| 10 | + `process_type` varchar(20), | ||
| 11 | + PRIMARY KEY (`document_id`), | ||
| 12 | + CONSTRAINT `process_queue_ibfk_1` FOREIGN KEY (`document_id`) REFERENCES `documents` (`id`) ON DELETE CASCADE ON UPDATE CASCADE | ||
| 13 | +) ENGINE=InnoDB DEFAULT CHARSET=utf8; |