Commit bc1bb8bfe21001edc84f4c275022848efa027693

Authored by conradverm
1 parent 03fc43f8

KTS-673

"The search algorithm needs some work"
Updated. Include version of document to the index.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7239 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php
1 <? 1 <?
2 -require_once('indexer.inc.php'); 2 +
  3 +require_once('indexing/lib/XmlRpcLucene.inc.php');
3 4
4 class JavaXMLRPCLuceneIndexer extends Indexer 5 class JavaXMLRPCLuceneIndexer extends Indexer
5 { 6 {
6 - protected function indexDocument($docid, $textfile) 7 + /**
  8 + * @var XmlRpcLucene
  9 + */
  10 + private $lucene;
  11 +
  12 + /**
  13 + * The constructor for PHP Lucene
  14 + *
  15 + * @param boolean $create Optional. If true, the lucene index will be recreated.
  16 + */
  17 + public function __construct()
  18 + {
  19 + parent::__construct();
  20 +
  21 + $config =& KTConfig::getSingleton();
  22 + $javaServerUrl = $config->get('indexer/JavaLuceneURL', 'http://localhost:8875');
  23 + $this->lucene = new XmlRpcLucene($javaServerUrl);
  24 + }
  25 +
  26 + /**
  27 + * Creates an index to be used.
  28 + *
  29 + */
  30 + public static function createIndex()
  31 + {
  32 + // do nothing. The java lucene indexer will create the indexes if required
  33 + }
  34 +
  35 + /**
  36 + * Indexes a document based on a text file.
  37 + *
  38 + * @param int $docid
  39 + * @param string $textfile
  40 + * @return boolean
  41 + */
  42 + protected function indexDocument($docid, $textfile, $title, $version)
7 { 43 {
8 - throw new Exception('TODO'); 44 + try
  45 + {
  46 + return $this->lucene->addDocument($docid, $textfile, '', $title, $version);
  47 + }
  48 + catch(Exception $e)
  49 + {
  50 + return false;
  51 + }
9 } 52 }
  53 +
  54 + /**
  55 + * Indexes the content and discussions on a document.
  56 + *
  57 + * @param int $docid
  58 + * @param string $textfile
  59 + * @return boolean
  60 + */
  61 + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version)
  62 + {
  63 + try
  64 + {
  65 + $discussion = Indexer::getDiscussionText($docid);
  66 + return $this->lucene->addDocument($docid, $textfile, $discussion, $title, $version);
  67 + }
  68 + catch(Exception $e)
  69 + {
  70 + return false;
  71 + }
  72 + }
  73 +
  74 + /**
  75 + * Indexes a discussion on a document..
  76 + *
  77 + * @param int $docid
  78 + * @return boolean
  79 + */
  80 + protected function indexDiscussion($docid)
  81 + {
  82 + try
  83 + {
  84 + $discussion = Indexer::getDiscussionText($docid);
  85 + return $this->lucene->updateDiscussion($docid, $discussion);
  86 + }
  87 + catch(Exception $e)
  88 + {
  89 + return false;
  90 + }
  91 +
  92 + return true;
  93 + }
  94 +
  95 + /**
  96 + * Optimise the lucene index.
  97 + * This can be called periodically to optimise performance and size of the lucene index.
  98 + *
  99 + */
  100 + public function optimise()
  101 + {
  102 + $this->lucene->optimize();
  103 + }
  104 +
  105 + /**
  106 + * Removes a document from the index.
  107 + *
  108 + * @param int $docid
  109 + * @return array containing (content, discussion, title)
  110 + */
  111 + public function deleteDocument($docid)
  112 + {
  113 + return $this->lucene->deleteDocument($docid);
  114 + }
  115 +
  116 + /**
  117 + * Enter description here...
  118 + *
  119 + * @param string $query
  120 + * @return array
  121 + */
  122 + public function query($query)
  123 + {
  124 + $results = array();
  125 + $hits = $this->lucene->query($query);
  126 + if (is_array($hits))
  127 + {
  128 + foreach ($hits as $hit)
  129 + {
  130 +
  131 +
  132 + $document_id = $hit->DocumentID;
  133 + $content = $hit->Text;
  134 + $discussion = $hit->Title; //TODO: fix to be discussion. lucen server is not returning discussion text as well..
  135 + $title = $hit->Title;
  136 + $score = $hit->Rank;
  137 +
  138 + // avoid adding duplicates. If it is in already, it has higher priority.
  139 + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score)
  140 + {
  141 + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion);
  142 + }
  143 + }
  144 + }
  145 + else
  146 + {
  147 + $_SESSION['KTErrorMessage'][] = _kt('The XMLRPC Server did not respond correctly. Please notify the system administrator to investigate.');
  148 + }
  149 + return $results;
  150 + }
  151 +
10 } 152 }
11 ?> 153 ?>
12 \ No newline at end of file 154 \ No newline at end of file
search2/indexing/indexers/PHPLuceneIndexer.inc.php
@@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer @@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer
30 { 30 {
31 $config =& KTConfig::getSingleton(); 31 $config =& KTConfig::getSingleton();
32 $indexPath = $config->get('indexer/luceneDirectory'); 32 $indexPath = $config->get('indexer/luceneDirectory');
33 - $lucene = new Zend_Search_Lucene($indexPath, true); 33 + new Zend_Search_Lucene($indexPath, true);
34 } 34 }
35 35
36 36
@@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer @@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer
41 * @param string $content 41 * @param string $content
42 * @param string $discussion 42 * @param string $discussion
43 */ 43 */
44 - private function addDocument($docid, $content, $discussion, $title='') 44 + private function addDocument($docid, $content, $discussion, $title, $version)
45 { 45 {
46 $doc = new Zend_Search_Lucene_Document(); 46 $doc = new Zend_Search_Lucene_Document();
47 $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); 47 $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid)));
48 $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); 48 $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8'));
49 $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); 49 $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8'));
50 $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); 50 $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8'));
  51 + $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8'));
51 $this->lucene->addDocument($doc); 52 $this->lucene->addDocument($doc);
52 } 53 }
53 54
@@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer @@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer
58 * @param string $textfile 59 * @param string $textfile
59 * @return boolean 60 * @return boolean
60 */ 61 */
61 - protected function indexDocument($docid, $textfile, $title='') 62 + protected function indexDocument($docid, $textfile, $title, $version)
62 { 63 {
63 global $default; 64 global $default;
64 65
@@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer @@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer
68 return false; 69 return false;
69 } 70 }
70 71
71 - list($content, $discussion) = $this->deleteDocument($docid); 72 + list($content, $discussion, $title2, $version2) = $this->deleteDocument($docid);
72 73
73 - $this->addDocument($docid, file_get_contents($textfile), $discussion, $title); 74 + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title, $version);
74 75
75 return true; 76 return true;
76 } 77 }
@@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer @@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer
82 * @param string $textfile 83 * @param string $textfile
83 * @return boolean 84 * @return boolean
84 */ 85 */
85 - protected function indexDocumentAndDiscussion($docid, $textfile, $title='') 86 + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version)
86 { 87 {
87 global $default; 88 global $default;
88 89
@@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer @@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer
94 95
95 $this->deleteDocument($docid); 96 $this->deleteDocument($docid);
96 97
97 - $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title); 98 + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title, $version);
98 99
99 return true; 100 return true;
100 } 101 }
@@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer @@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer
107 */ 108 */
108 protected function indexDiscussion($docid) 109 protected function indexDiscussion($docid)
109 { 110 {
110 - list($content, $discussion, $title) = $this->deleteDocument($docid); 111 + list($content, $discussion, $title, $version) = $this->deleteDocument($docid);
111 112
112 - $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title); 113 + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title, $version);
113 114
114 return true; 115 return true;
115 } 116 }
@@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer @@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer
142 $content = $hit->Content; 143 $content = $hit->Content;
143 $discussion = $hit->Discussion; 144 $discussion = $hit->Discussion;
144 $title = $hit->Title; 145 $title = $hit->Title;
145 - $title=''; 146 + $version = $hit->Version;
146 147
147 $this->lucene->delete($hit); 148 $this->lucene->delete($hit);
148 } 149 }
149 - return array($content, $discussion, $title); 150 + return array($content, $discussion, $title, $version);
150 } 151 }
151 152
152 /** 153 /**