Commit bc1bb8bfe21001edc84f4c275022848efa027693
1 parent
03fc43f8
KTS-673
"The search algorithm needs some work" Updated. Include version of document to the index. Committed By: Conrad Vermeulen Reviewed By: Kevin Fourie git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7239 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
2 changed files
with
157 additions
and
14 deletions
search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php
| 1 | 1 | <? |
| 2 | -require_once('indexer.inc.php'); | |
| 2 | + | |
| 3 | +require_once('indexing/lib/XmlRpcLucene.inc.php'); | |
| 3 | 4 | |
| 4 | 5 | class JavaXMLRPCLuceneIndexer extends Indexer |
| 5 | 6 | { |
| 6 | - protected function indexDocument($docid, $textfile) | |
| 7 | + /** | |
| 8 | + * @var XmlRpcLucene | |
| 9 | + */ | |
| 10 | + private $lucene; | |
| 11 | + | |
| 12 | + /** | |
| 13 | + * The constructor for PHP Lucene | |
| 14 | + * | |
| 15 | + * @param boolean $create Optional. If true, the lucene index will be recreated. | |
| 16 | + */ | |
| 17 | + public function __construct() | |
| 18 | + { | |
| 19 | + parent::__construct(); | |
| 20 | + | |
| 21 | + $config =& KTConfig::getSingleton(); | |
| 22 | + $javaServerUrl = $config->get('indexer/JavaLuceneURL', 'http://localhost:8875'); | |
| 23 | + $this->lucene = new XmlRpcLucene($javaServerUrl); | |
| 24 | + } | |
| 25 | + | |
| 26 | + /** | |
| 27 | + * Creates an index to be used. | |
| 28 | + * | |
| 29 | + */ | |
| 30 | + public static function createIndex() | |
| 31 | + { | |
| 32 | + // do nothing. The java lucene indexer will create the indexes if required | |
| 33 | + } | |
| 34 | + | |
| 35 | + /** | |
| 36 | + * Indexes a document based on a text file. | |
| 37 | + * | |
| 38 | + * @param int $docid | |
| 39 | + * @param string $textfile | |
| 40 | + * @return boolean | |
| 41 | + */ | |
| 42 | + protected function indexDocument($docid, $textfile, $title, $version) | |
| 7 | 43 | { |
| 8 | - throw new Exception('TODO'); | |
| 44 | + try | |
| 45 | + { | |
| 46 | + return $this->lucene->addDocument($docid, $textfile, '', $title, $version); | |
| 47 | + } | |
| 48 | + catch(Exception $e) | |
| 49 | + { | |
| 50 | + return false; | |
| 51 | + } | |
| 9 | 52 | } |
| 53 | + | |
| 54 | + /** | |
| 55 | + * Indexes the content and discussions on a document. | |
| 56 | + * | |
| 57 | + * @param int $docid | |
| 58 | + * @param string $textfile | |
| 59 | + * @return boolean | |
| 60 | + */ | |
| 61 | + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version) | |
| 62 | + { | |
| 63 | + try | |
| 64 | + { | |
| 65 | + $discussion = Indexer::getDiscussionText($docid); | |
| 66 | + return $this->lucene->addDocument($docid, $textfile, $discussion, $title, $version); | |
| 67 | + } | |
| 68 | + catch(Exception $e) | |
| 69 | + { | |
| 70 | + return false; | |
| 71 | + } | |
| 72 | + } | |
| 73 | + | |
| 74 | + /** | |
| 75 | + * Indexes a discussion on a document.. | |
| 76 | + * | |
| 77 | + * @param int $docid | |
| 78 | + * @return boolean | |
| 79 | + */ | |
| 80 | + protected function indexDiscussion($docid) | |
| 81 | + { | |
| 82 | + try | |
| 83 | + { | |
| 84 | + $discussion = Indexer::getDiscussionText($docid); | |
| 85 | + return $this->lucene->updateDiscussion($docid, $discussion); | |
| 86 | + } | |
| 87 | + catch(Exception $e) | |
| 88 | + { | |
| 89 | + return false; | |
| 90 | + } | |
| 91 | + | |
| 92 | + return true; | |
| 93 | + } | |
| 94 | + | |
| 95 | + /** | |
| 96 | + * Optimise the lucene index. | |
| 97 | + * This can be called periodically to optimise performance and size of the lucene index. | |
| 98 | + * | |
| 99 | + */ | |
| 100 | + public function optimise() | |
| 101 | + { | |
| 102 | + $this->lucene->optimize(); | |
| 103 | + } | |
| 104 | + | |
| 105 | + /** | |
| 106 | + * Removes a document from the index. | |
| 107 | + * | |
| 108 | + * @param int $docid | |
| 109 | + * @return array containing (content, discussion, title) | |
| 110 | + */ | |
| 111 | + public function deleteDocument($docid) | |
| 112 | + { | |
| 113 | + return $this->lucene->deleteDocument($docid); | |
| 114 | + } | |
| 115 | + | |
| 116 | + /** | |
| 117 | + * Enter description here... | |
| 118 | + * | |
| 119 | + * @param string $query | |
| 120 | + * @return array | |
| 121 | + */ | |
| 122 | + public function query($query) | |
| 123 | + { | |
| 124 | + $results = array(); | |
| 125 | + $hits = $this->lucene->query($query); | |
| 126 | + if (is_array($hits)) | |
| 127 | + { | |
| 128 | + foreach ($hits as $hit) | |
| 129 | + { | |
| 130 | + | |
| 131 | + | |
| 132 | + $document_id = $hit->DocumentID; | |
| 133 | + $content = $hit->Text; | |
| 134 | + $discussion = $hit->Title; //TODO: fix to be discussion. lucen server is not returning discussion text as well.. | |
| 135 | + $title = $hit->Title; | |
| 136 | + $score = $hit->Rank; | |
| 137 | + | |
| 138 | + // avoid adding duplicates. If it is in already, it has higher priority. | |
| 139 | + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score) | |
| 140 | + { | |
| 141 | + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion); | |
| 142 | + } | |
| 143 | + } | |
| 144 | + } | |
| 145 | + else | |
| 146 | + { | |
| 147 | + $_SESSION['KTErrorMessage'][] = _kt('The XMLRPC Server did not respond correctly. Please notify the system administrator to investigate.'); | |
| 148 | + } | |
| 149 | + return $results; | |
| 150 | + } | |
| 151 | + | |
| 10 | 152 | } |
| 11 | 153 | ?> |
| 12 | 154 | \ No newline at end of file | ... | ... |
search2/indexing/indexers/PHPLuceneIndexer.inc.php
| ... | ... | @@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer |
| 30 | 30 | { |
| 31 | 31 | $config =& KTConfig::getSingleton(); |
| 32 | 32 | $indexPath = $config->get('indexer/luceneDirectory'); |
| 33 | - $lucene = new Zend_Search_Lucene($indexPath, true); | |
| 33 | + new Zend_Search_Lucene($indexPath, true); | |
| 34 | 34 | } |
| 35 | 35 | |
| 36 | 36 | |
| ... | ... | @@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer |
| 41 | 41 | * @param string $content |
| 42 | 42 | * @param string $discussion |
| 43 | 43 | */ |
| 44 | - private function addDocument($docid, $content, $discussion, $title='') | |
| 44 | + private function addDocument($docid, $content, $discussion, $title, $version) | |
| 45 | 45 | { |
| 46 | 46 | $doc = new Zend_Search_Lucene_Document(); |
| 47 | 47 | $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); |
| 48 | 48 | $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); |
| 49 | 49 | $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); |
| 50 | 50 | $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); |
| 51 | + $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8')); | |
| 51 | 52 | $this->lucene->addDocument($doc); |
| 52 | 53 | } |
| 53 | 54 | |
| ... | ... | @@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer |
| 58 | 59 | * @param string $textfile |
| 59 | 60 | * @return boolean |
| 60 | 61 | */ |
| 61 | - protected function indexDocument($docid, $textfile, $title='') | |
| 62 | + protected function indexDocument($docid, $textfile, $title, $version) | |
| 62 | 63 | { |
| 63 | 64 | global $default; |
| 64 | 65 | |
| ... | ... | @@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer |
| 68 | 69 | return false; |
| 69 | 70 | } |
| 70 | 71 | |
| 71 | - list($content, $discussion) = $this->deleteDocument($docid); | |
| 72 | + list($content, $discussion, $title2, $version2) = $this->deleteDocument($docid); | |
| 72 | 73 | |
| 73 | - $this->addDocument($docid, file_get_contents($textfile), $discussion, $title); | |
| 74 | + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title, $version); | |
| 74 | 75 | |
| 75 | 76 | return true; |
| 76 | 77 | } |
| ... | ... | @@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer |
| 82 | 83 | * @param string $textfile |
| 83 | 84 | * @return boolean |
| 84 | 85 | */ |
| 85 | - protected function indexDocumentAndDiscussion($docid, $textfile, $title='') | |
| 86 | + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version) | |
| 86 | 87 | { |
| 87 | 88 | global $default; |
| 88 | 89 | |
| ... | ... | @@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer |
| 94 | 95 | |
| 95 | 96 | $this->deleteDocument($docid); |
| 96 | 97 | |
| 97 | - $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title); | |
| 98 | + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title, $version); | |
| 98 | 99 | |
| 99 | 100 | return true; |
| 100 | 101 | } |
| ... | ... | @@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer |
| 107 | 108 | */ |
| 108 | 109 | protected function indexDiscussion($docid) |
| 109 | 110 | { |
| 110 | - list($content, $discussion, $title) = $this->deleteDocument($docid); | |
| 111 | + list($content, $discussion, $title, $version) = $this->deleteDocument($docid); | |
| 111 | 112 | |
| 112 | - $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title); | |
| 113 | + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title, $version); | |
| 113 | 114 | |
| 114 | 115 | return true; |
| 115 | 116 | } |
| ... | ... | @@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer |
| 142 | 143 | $content = $hit->Content; |
| 143 | 144 | $discussion = $hit->Discussion; |
| 144 | 145 | $title = $hit->Title; |
| 145 | - $title=''; | |
| 146 | + $version = $hit->Version; | |
| 146 | 147 | |
| 147 | 148 | $this->lucene->delete($hit); |
| 148 | 149 | } |
| 149 | - return array($content, $discussion, $title); | |
| 150 | + return array($content, $discussion, $title, $version); | |
| 150 | 151 | } |
| 151 | 152 | |
| 152 | 153 | /** | ... | ... |