Commit bc1bb8bfe21001edc84f4c275022848efa027693
1 parent
03fc43f8
KTS-673
"The search algorithm needs some work" Updated. Include version of document to the index. Committed By: Conrad Vermeulen Reviewed By: Kevin Fourie git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7239 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
2 changed files
with
157 additions
and
14 deletions
search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php
| 1 | <? | 1 | <? |
| 2 | -require_once('indexer.inc.php'); | 2 | + |
| 3 | +require_once('indexing/lib/XmlRpcLucene.inc.php'); | ||
| 3 | 4 | ||
| 4 | class JavaXMLRPCLuceneIndexer extends Indexer | 5 | class JavaXMLRPCLuceneIndexer extends Indexer |
| 5 | { | 6 | { |
| 6 | - protected function indexDocument($docid, $textfile) | 7 | + /** |
| 8 | + * @var XmlRpcLucene | ||
| 9 | + */ | ||
| 10 | + private $lucene; | ||
| 11 | + | ||
| 12 | + /** | ||
| 13 | + * The constructor for PHP Lucene | ||
| 14 | + * | ||
| 15 | + * @param boolean $create Optional. If true, the lucene index will be recreated. | ||
| 16 | + */ | ||
| 17 | + public function __construct() | ||
| 18 | + { | ||
| 19 | + parent::__construct(); | ||
| 20 | + | ||
| 21 | + $config =& KTConfig::getSingleton(); | ||
| 22 | + $javaServerUrl = $config->get('indexer/JavaLuceneURL', 'http://localhost:8875'); | ||
| 23 | + $this->lucene = new XmlRpcLucene($javaServerUrl); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + /** | ||
| 27 | + * Creates an index to be used. | ||
| 28 | + * | ||
| 29 | + */ | ||
| 30 | + public static function createIndex() | ||
| 31 | + { | ||
| 32 | + // do nothing. The java lucene indexer will create the indexes if required | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + /** | ||
| 36 | + * Indexes a document based on a text file. | ||
| 37 | + * | ||
| 38 | + * @param int $docid | ||
| 39 | + * @param string $textfile | ||
| 40 | + * @return boolean | ||
| 41 | + */ | ||
| 42 | + protected function indexDocument($docid, $textfile, $title, $version) | ||
| 7 | { | 43 | { |
| 8 | - throw new Exception('TODO'); | 44 | + try |
| 45 | + { | ||
| 46 | + return $this->lucene->addDocument($docid, $textfile, '', $title, $version); | ||
| 47 | + } | ||
| 48 | + catch(Exception $e) | ||
| 49 | + { | ||
| 50 | + return false; | ||
| 51 | + } | ||
| 9 | } | 52 | } |
| 53 | + | ||
| 54 | + /** | ||
| 55 | + * Indexes the content and discussions on a document. | ||
| 56 | + * | ||
| 57 | + * @param int $docid | ||
| 58 | + * @param string $textfile | ||
| 59 | + * @return boolean | ||
| 60 | + */ | ||
| 61 | + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version) | ||
| 62 | + { | ||
| 63 | + try | ||
| 64 | + { | ||
| 65 | + $discussion = Indexer::getDiscussionText($docid); | ||
| 66 | + return $this->lucene->addDocument($docid, $textfile, $discussion, $title, $version); | ||
| 67 | + } | ||
| 68 | + catch(Exception $e) | ||
| 69 | + { | ||
| 70 | + return false; | ||
| 71 | + } | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + /** | ||
| 75 | + * Indexes a discussion on a document.. | ||
| 76 | + * | ||
| 77 | + * @param int $docid | ||
| 78 | + * @return boolean | ||
| 79 | + */ | ||
| 80 | + protected function indexDiscussion($docid) | ||
| 81 | + { | ||
| 82 | + try | ||
| 83 | + { | ||
| 84 | + $discussion = Indexer::getDiscussionText($docid); | ||
| 85 | + return $this->lucene->updateDiscussion($docid, $discussion); | ||
| 86 | + } | ||
| 87 | + catch(Exception $e) | ||
| 88 | + { | ||
| 89 | + return false; | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + return true; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + /** | ||
| 96 | + * Optimise the lucene index. | ||
| 97 | + * This can be called periodically to optimise performance and size of the lucene index. | ||
| 98 | + * | ||
| 99 | + */ | ||
| 100 | + public function optimise() | ||
| 101 | + { | ||
| 102 | + $this->lucene->optimize(); | ||
| 103 | + } | ||
| 104 | + | ||
| 105 | + /** | ||
| 106 | + * Removes a document from the index. | ||
| 107 | + * | ||
| 108 | + * @param int $docid | ||
| 109 | + * @return array containing (content, discussion, title) | ||
| 110 | + */ | ||
| 111 | + public function deleteDocument($docid) | ||
| 112 | + { | ||
| 113 | + return $this->lucene->deleteDocument($docid); | ||
| 114 | + } | ||
| 115 | + | ||
| 116 | + /** | ||
| 117 | + * Enter description here... | ||
| 118 | + * | ||
| 119 | + * @param string $query | ||
| 120 | + * @return array | ||
| 121 | + */ | ||
| 122 | + public function query($query) | ||
| 123 | + { | ||
| 124 | + $results = array(); | ||
| 125 | + $hits = $this->lucene->query($query); | ||
| 126 | + if (is_array($hits)) | ||
| 127 | + { | ||
| 128 | + foreach ($hits as $hit) | ||
| 129 | + { | ||
| 130 | + | ||
| 131 | + | ||
| 132 | + $document_id = $hit->DocumentID; | ||
| 133 | + $content = $hit->Text; | ||
| 134 | + $discussion = $hit->Title; //TODO: fix to be discussion. lucen server is not returning discussion text as well.. | ||
| 135 | + $title = $hit->Title; | ||
| 136 | + $score = $hit->Rank; | ||
| 137 | + | ||
| 138 | + // avoid adding duplicates. If it is in already, it has higher priority. | ||
| 139 | + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score) | ||
| 140 | + { | ||
| 141 | + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion); | ||
| 142 | + } | ||
| 143 | + } | ||
| 144 | + } | ||
| 145 | + else | ||
| 146 | + { | ||
| 147 | + $_SESSION['KTErrorMessage'][] = _kt('The XMLRPC Server did not respond correctly. Please notify the system administrator to investigate.'); | ||
| 148 | + } | ||
| 149 | + return $results; | ||
| 150 | + } | ||
| 151 | + | ||
| 10 | } | 152 | } |
| 11 | ?> | 153 | ?> |
| 12 | \ No newline at end of file | 154 | \ No newline at end of file |
search2/indexing/indexers/PHPLuceneIndexer.inc.php
| @@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer | @@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer | ||
| 30 | { | 30 | { |
| 31 | $config =& KTConfig::getSingleton(); | 31 | $config =& KTConfig::getSingleton(); |
| 32 | $indexPath = $config->get('indexer/luceneDirectory'); | 32 | $indexPath = $config->get('indexer/luceneDirectory'); |
| 33 | - $lucene = new Zend_Search_Lucene($indexPath, true); | 33 | + new Zend_Search_Lucene($indexPath, true); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | 36 | ||
| @@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer | @@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer | ||
| 41 | * @param string $content | 41 | * @param string $content |
| 42 | * @param string $discussion | 42 | * @param string $discussion |
| 43 | */ | 43 | */ |
| 44 | - private function addDocument($docid, $content, $discussion, $title='') | 44 | + private function addDocument($docid, $content, $discussion, $title, $version) |
| 45 | { | 45 | { |
| 46 | $doc = new Zend_Search_Lucene_Document(); | 46 | $doc = new Zend_Search_Lucene_Document(); |
| 47 | $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); | 47 | $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); |
| 48 | $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); | 48 | $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); |
| 49 | $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); | 49 | $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); |
| 50 | $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); | 50 | $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); |
| 51 | + $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8')); | ||
| 51 | $this->lucene->addDocument($doc); | 52 | $this->lucene->addDocument($doc); |
| 52 | } | 53 | } |
| 53 | 54 | ||
| @@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer | @@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer | ||
| 58 | * @param string $textfile | 59 | * @param string $textfile |
| 59 | * @return boolean | 60 | * @return boolean |
| 60 | */ | 61 | */ |
| 61 | - protected function indexDocument($docid, $textfile, $title='') | 62 | + protected function indexDocument($docid, $textfile, $title, $version) |
| 62 | { | 63 | { |
| 63 | global $default; | 64 | global $default; |
| 64 | 65 | ||
| @@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer | @@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer | ||
| 68 | return false; | 69 | return false; |
| 69 | } | 70 | } |
| 70 | 71 | ||
| 71 | - list($content, $discussion) = $this->deleteDocument($docid); | 72 | + list($content, $discussion, $title2, $version2) = $this->deleteDocument($docid); |
| 72 | 73 | ||
| 73 | - $this->addDocument($docid, file_get_contents($textfile), $discussion, $title); | 74 | + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title, $version); |
| 74 | 75 | ||
| 75 | return true; | 76 | return true; |
| 76 | } | 77 | } |
| @@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer | @@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer | ||
| 82 | * @param string $textfile | 83 | * @param string $textfile |
| 83 | * @return boolean | 84 | * @return boolean |
| 84 | */ | 85 | */ |
| 85 | - protected function indexDocumentAndDiscussion($docid, $textfile, $title='') | 86 | + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version) |
| 86 | { | 87 | { |
| 87 | global $default; | 88 | global $default; |
| 88 | 89 | ||
| @@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer | @@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer | ||
| 94 | 95 | ||
| 95 | $this->deleteDocument($docid); | 96 | $this->deleteDocument($docid); |
| 96 | 97 | ||
| 97 | - $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title); | 98 | + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title, $version); |
| 98 | 99 | ||
| 99 | return true; | 100 | return true; |
| 100 | } | 101 | } |
| @@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer | @@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer | ||
| 107 | */ | 108 | */ |
| 108 | protected function indexDiscussion($docid) | 109 | protected function indexDiscussion($docid) |
| 109 | { | 110 | { |
| 110 | - list($content, $discussion, $title) = $this->deleteDocument($docid); | 111 | + list($content, $discussion, $title, $version) = $this->deleteDocument($docid); |
| 111 | 112 | ||
| 112 | - $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title); | 113 | + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title, $version); |
| 113 | 114 | ||
| 114 | return true; | 115 | return true; |
| 115 | } | 116 | } |
| @@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer | @@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer | ||
| 142 | $content = $hit->Content; | 143 | $content = $hit->Content; |
| 143 | $discussion = $hit->Discussion; | 144 | $discussion = $hit->Discussion; |
| 144 | $title = $hit->Title; | 145 | $title = $hit->Title; |
| 145 | - $title=''; | 146 | + $version = $hit->Version; |
| 146 | 147 | ||
| 147 | $this->lucene->delete($hit); | 148 | $this->lucene->delete($hit); |
| 148 | } | 149 | } |
| 149 | - return array($content, $discussion, $title); | 150 | + return array($content, $discussion, $title, $version); |
| 150 | } | 151 | } |
| 151 | 152 | ||
| 152 | /** | 153 | /** |