Commit 53f5c06c4a8c14a64df80472daa6a7b792f27efa

Authored by Conrad Vermeulen
1 parent 00eccba8

KTS-673

"The search algorithm needs some work"
Updated. 

KTS-2514
"Indexing requirements is harsh currently as it requires all indexing issues to be resolved"
Fixed.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7442 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/indexerCore.inc.php
... ... @@ -252,6 +252,8 @@ abstract class Indexer
252 252 */
253 253 private $hookPath;
254 254  
  255 + private $enabledExtractors;
  256 +
255 257 /**
256 258 * Initialise the indexer
257 259 *
... ... @@ -267,6 +269,29 @@ abstract class Indexer
267 269  
268 270 $this->extractorPath = $config->get('indexer/extractorPath', 'extractors');
269 271 $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks');
  272 +
  273 +
  274 + $this->loadExtractorStatus();
  275 + }
  276 +
  277 + /**
  278 + * Get the list if enabled extractors
  279 + *
  280 + */
  281 + private function loadExtractorStatus()
  282 + {
  283 + $sql = "SELECT id, name FROM mime_extractors WHERE active=1";
  284 + $rs = DBUtil::getResultArray($sql);
  285 + $this->enabledExtractors = array();
  286 + foreach($rs as $item)
  287 + {
  288 + $this->enabledExtractors[] = $item['name'];
  289 + }
  290 + }
  291 +
  292 + private function isExtractorEnabled($extractor)
  293 + {
  294 + return in_array($extractor, $this->enabledExtractors);
270 295 }
271 296  
272 297 /**
... ... @@ -599,6 +624,7 @@ abstract class Indexer
599 624 {
600 625 global $default;
601 626  
  627 + $default->log->info('indexDocuments: start');
602 628 if (!$this->doesDiagnosticsPass())
603 629 {
604 630 return;
... ... @@ -618,13 +644,14 @@ abstract class Indexer
618 644 // identify the indexers that must run
619 645 // mysql specific limit!
620 646 $sql = "SELECT
621   - iff.document_id, mt.filetypes, mt.mimetypes, mt.extractor, iff.what
  647 + iff.document_id, mt.filetypes, mt.mimetypes, me.name as extractor, iff.what
622 648 FROM
623 649 index_files iff
624 650 INNER JOIN documents d ON iff.document_id=d.id
625 651 INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id
626 652 INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id
627 653 INNER JOIN mime_types mt ON dcv.mime_id=mt.id
  654 + INNER JOIN mime_extractors me ON mt.extractor_id=me.id
628 655 WHERE
629 656 (iff.processdate IS NULL or iff.processdate < cast(cast('$date' as date) -1 as date)) AND dmv.status_id=1
630 657 ORDER BY indexdate
... ... @@ -674,6 +701,12 @@ abstract class Indexer
674 701 $default->log->debug(sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension,$mimeType,$extractorClass));
675 702 }
676 703  
  704 + if (!$this->isExtractorEnabled($extractorClass))
  705 + {
  706 + $default->log->info(sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass));
  707 + continue;
  708 + }
  709 +
677 710 if (empty($extractorClass))
678 711 {
679 712 if ($this->debug)
... ... @@ -835,10 +868,7 @@ abstract class Indexer
835 868 }
836 869  
837 870 }
838   - if ($this->debug)
839   - {
840   - $default->log->debug(_kt("Done."));
841   - }
  871 + $default->log->info('indexDocuments: done');
842 872 }
843 873  
844 874 public function migrateDocuments($max=null)
... ... @@ -1032,6 +1062,12 @@ abstract class Indexer
1032 1062 continue;
1033 1063 }
1034 1064  
  1065 + if (!$this->isExtractorEnabled($class))
  1066 + {
  1067 + $default->log->info(sprintf(_kt("diagnose: extractor '%s' is disabled."), $class));
  1068 + continue;
  1069 + }
  1070 +
1035 1071 $extractor = new $class();
1036 1072 if (!is_a($extractor, $baseclass))
1037 1073 {
... ...