Commit 53f5c06c4a8c14a64df80472daa6a7b792f27efa
1 parent
00eccba8
KTS-673
"The search algorithm needs some work" Updated. KTS-2514 "Indexing requirements is harsh currently as it requires all indexing issues to be resolved" Fixed. Committed By: Conrad Vermeulen Reviewed By: Kevin Fourie git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7442 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
1 changed file
with
41 additions
and
5 deletions
search2/indexing/indexerCore.inc.php
| @@ -252,6 +252,8 @@ abstract class Indexer | @@ -252,6 +252,8 @@ abstract class Indexer | ||
| 252 | */ | 252 | */ |
| 253 | private $hookPath; | 253 | private $hookPath; |
| 254 | 254 | ||
| 255 | + private $enabledExtractors; | ||
| 256 | + | ||
| 255 | /** | 257 | /** |
| 256 | * Initialise the indexer | 258 | * Initialise the indexer |
| 257 | * | 259 | * |
| @@ -267,6 +269,29 @@ abstract class Indexer | @@ -267,6 +269,29 @@ abstract class Indexer | ||
| 267 | 269 | ||
| 268 | $this->extractorPath = $config->get('indexer/extractorPath', 'extractors'); | 270 | $this->extractorPath = $config->get('indexer/extractorPath', 'extractors'); |
| 269 | $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks'); | 271 | $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks'); |
| 272 | + | ||
| 273 | + | ||
| 274 | + $this->loadExtractorStatus(); | ||
| 275 | + } | ||
| 276 | + | ||
| 277 | + /** | ||
| 278 | + * Get the list if enabled extractors | ||
| 279 | + * | ||
| 280 | + */ | ||
| 281 | + private function loadExtractorStatus() | ||
| 282 | + { | ||
| 283 | + $sql = "SELECT id, name FROM mime_extractors WHERE active=1"; | ||
| 284 | + $rs = DBUtil::getResultArray($sql); | ||
| 285 | + $this->enabledExtractors = array(); | ||
| 286 | + foreach($rs as $item) | ||
| 287 | + { | ||
| 288 | + $this->enabledExtractors[] = $item['name']; | ||
| 289 | + } | ||
| 290 | + } | ||
| 291 | + | ||
| 292 | + private function isExtractorEnabled($extractor) | ||
| 293 | + { | ||
| 294 | + return in_array($extractor, $this->enabledExtractors); | ||
| 270 | } | 295 | } |
| 271 | 296 | ||
| 272 | /** | 297 | /** |
| @@ -599,6 +624,7 @@ abstract class Indexer | @@ -599,6 +624,7 @@ abstract class Indexer | ||
| 599 | { | 624 | { |
| 600 | global $default; | 625 | global $default; |
| 601 | 626 | ||
| 627 | + $default->log->info('indexDocuments: start'); | ||
| 602 | if (!$this->doesDiagnosticsPass()) | 628 | if (!$this->doesDiagnosticsPass()) |
| 603 | { | 629 | { |
| 604 | return; | 630 | return; |
| @@ -618,13 +644,14 @@ abstract class Indexer | @@ -618,13 +644,14 @@ abstract class Indexer | ||
| 618 | // identify the indexers that must run | 644 | // identify the indexers that must run |
| 619 | // mysql specific limit! | 645 | // mysql specific limit! |
| 620 | $sql = "SELECT | 646 | $sql = "SELECT |
| 621 | - iff.document_id, mt.filetypes, mt.mimetypes, mt.extractor, iff.what | 647 | + iff.document_id, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what |
| 622 | FROM | 648 | FROM |
| 623 | index_files iff | 649 | index_files iff |
| 624 | INNER JOIN documents d ON iff.document_id=d.id | 650 | INNER JOIN documents d ON iff.document_id=d.id |
| 625 | INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id | 651 | INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id |
| 626 | INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id | 652 | INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id |
| 627 | INNER JOIN mime_types mt ON dcv.mime_id=mt.id | 653 | INNER JOIN mime_types mt ON dcv.mime_id=mt.id |
| 654 | + INNER JOIN mime_extractors me ON mt.extractor_id=me.id | ||
| 628 | WHERE | 655 | WHERE |
| 629 | (iff.processdate IS NULL or iff.processdate < cast(cast('$date' as date) -1 as date)) AND dmv.status_id=1 | 656 | (iff.processdate IS NULL or iff.processdate < cast(cast('$date' as date) -1 as date)) AND dmv.status_id=1 |
| 630 | ORDER BY indexdate | 657 | ORDER BY indexdate |
| @@ -674,6 +701,12 @@ abstract class Indexer | @@ -674,6 +701,12 @@ abstract class Indexer | ||
| 674 | $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass)); | 701 | $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass)); |
| 675 | } | 702 | } |
| 676 | 703 | ||
| 704 | + if (!$this->isExtractorEnabled($extractorClass)) | ||
| 705 | + { | ||
| 706 | + $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass)); | ||
| 707 | + continue; | ||
| 708 | + } | ||
| 709 | + | ||
| 677 | if (empty($extractorClass)) | 710 | if (empty($extractorClass)) |
| 678 | { | 711 | { |
| 679 | if ($this->debug) | 712 | if ($this->debug) |
| @@ -835,10 +868,7 @@ abstract class Indexer | @@ -835,10 +868,7 @@ abstract class Indexer | ||
| 835 | } | 868 | } |
| 836 | 869 | ||
| 837 | } | 870 | } |
| 838 | - if ($this->debug) | ||
| 839 | - { | ||
| 840 | - $default->log->debug(_kt("Done.")); | ||
| 841 | - } | 871 | + $default->log->info('indexDocuments: done'); |
| 842 | } | 872 | } |
| 843 | 873 | ||
| 844 | public function migrateDocuments($max=null) | 874 | public function migrateDocuments($max=null) |
| @@ -1032,6 +1062,12 @@ abstract class Indexer | @@ -1032,6 +1062,12 @@ abstract class Indexer | ||
| 1032 | continue; | 1062 | continue; |
| 1033 | } | 1063 | } |
| 1034 | 1064 | ||
| 1065 | + if (!$this->isExtractorEnabled($class)) | ||
| 1066 | + { | ||
| 1067 | + $default->log->info(sprintf(_kt("diagnose: extractor '%s' is disabled."), $class)); | ||
| 1068 | + continue; | ||
| 1069 | + } | ||
| 1070 | + | ||
| 1035 | $extractor = new $class(); | 1071 | $extractor = new $class(); |
| 1036 | if (!is_a($extractor, $baseclass)) | 1072 | if (!is_a($extractor, $baseclass)) |
| 1037 | { | 1073 | { |