Commit bc1bb8bfe21001edc84f4c275022848efa027693

Authored by conradverm
1 parent 03fc43f8

KTS-673

"The search algorithm needs some work"
Updated. Include version of document to the index.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7239 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php
1 1 <?
2   -require_once('indexer.inc.php');
  2 +
  3 +require_once('indexing/lib/XmlRpcLucene.inc.php');
3 4  
4 5 class JavaXMLRPCLuceneIndexer extends Indexer
5 6 {
6   - protected function indexDocument($docid, $textfile)
  7 + /**
  8 + * @var XmlRpcLucene
  9 + */
  10 + private $lucene;
  11 +
  12 + /**
  13 + * The constructor for PHP Lucene
  14 + *
  15 + * @param boolean $create Optional. If true, the lucene index will be recreated.
  16 + */
  17 + public function __construct()
  18 + {
  19 + parent::__construct();
  20 +
  21 + $config =& KTConfig::getSingleton();
  22 + $javaServerUrl = $config->get('indexer/JavaLuceneURL', 'http://localhost:8875');
  23 + $this->lucene = new XmlRpcLucene($javaServerUrl);
  24 + }
  25 +
  26 + /**
  27 + * Creates an index to be used.
  28 + *
  29 + */
  30 + public static function createIndex()
  31 + {
  32 + // do nothing. The java lucene indexer will create the indexes if required
  33 + }
  34 +
  35 + /**
  36 + * Indexes a document based on a text file.
  37 + *
  38 + * @param int $docid
  39 + * @param string $textfile
  40 + * @return boolean
  41 + */
  42 + protected function indexDocument($docid, $textfile, $title, $version)
7 43 {
8   - throw new Exception('TODO');
  44 + try
  45 + {
  46 + return $this->lucene->addDocument($docid, $textfile, '', $title, $version);
  47 + }
  48 + catch(Exception $e)
  49 + {
  50 + return false;
  51 + }
9 52 }
  53 +
  54 + /**
  55 + * Indexes the content and discussions on a document.
  56 + *
  57 + * @param int $docid
  58 + * @param string $textfile
  59 + * @return boolean
  60 + */
  61 + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version)
  62 + {
  63 + try
  64 + {
  65 + $discussion = Indexer::getDiscussionText($docid);
  66 + return $this->lucene->addDocument($docid, $textfile, $discussion, $title, $version);
  67 + }
  68 + catch(Exception $e)
  69 + {
  70 + return false;
  71 + }
  72 + }
  73 +
  74 + /**
  75 + * Indexes a discussion on a document..
  76 + *
  77 + * @param int $docid
  78 + * @return boolean
  79 + */
  80 + protected function indexDiscussion($docid)
  81 + {
  82 + try
  83 + {
  84 + $discussion = Indexer::getDiscussionText($docid);
  85 + return $this->lucene->updateDiscussion($docid, $discussion);
  86 + }
  87 + catch(Exception $e)
  88 + {
  89 + return false;
  90 + }
  91 +
  92 + return true;
  93 + }
  94 +
  95 + /**
  96 + * Optimise the lucene index.
  97 + * This can be called periodically to optimise performance and size of the lucene index.
  98 + *
  99 + */
  100 + public function optimise()
  101 + {
  102 + $this->lucene->optimize();
  103 + }
  104 +
  105 + /**
  106 + * Removes a document from the index.
  107 + *
  108 + * @param int $docid
  109 + * @return array containing (content, discussion, title)
  110 + */
  111 + public function deleteDocument($docid)
  112 + {
  113 + return $this->lucene->deleteDocument($docid);
  114 + }
  115 +
  116 + /**
  117 + * Enter description here...
  118 + *
  119 + * @param string $query
  120 + * @return array
  121 + */
  122 + public function query($query)
  123 + {
  124 + $results = array();
  125 + $hits = $this->lucene->query($query);
  126 + if (is_array($hits))
  127 + {
  128 + foreach ($hits as $hit)
  129 + {
  130 +
  131 +
  132 + $document_id = $hit->DocumentID;
  133 + $content = $hit->Text;
  134 + $discussion = $hit->Title; //TODO: fix to be discussion. lucen server is not returning discussion text as well..
  135 + $title = $hit->Title;
  136 + $score = $hit->Rank;
  137 +
  138 + // avoid adding duplicates. If it is in already, it has higher priority.
  139 + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score)
  140 + {
  141 + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion);
  142 + }
  143 + }
  144 + }
  145 + else
  146 + {
  147 + $_SESSION['KTErrorMessage'][] = _kt('The XMLRPC Server did not respond correctly. Please notify the system administrator to investigate.');
  148 + }
  149 + return $results;
  150 + }
  151 +
10 152 }
11 153 ?>
12 154 \ No newline at end of file
... ...
search2/indexing/indexers/PHPLuceneIndexer.inc.php
... ... @@ -30,7 +30,7 @@ class PHPLuceneIndexer extends Indexer
30 30 {
31 31 $config =& KTConfig::getSingleton();
32 32 $indexPath = $config->get('indexer/luceneDirectory');
33   - $lucene = new Zend_Search_Lucene($indexPath, true);
  33 + new Zend_Search_Lucene($indexPath, true);
34 34 }
35 35  
36 36  
... ... @@ -41,13 +41,14 @@ class PHPLuceneIndexer extends Indexer
41 41 * @param string $content
42 42 * @param string $discussion
43 43 */
44   - private function addDocument($docid, $content, $discussion, $title='')
  44 + private function addDocument($docid, $content, $discussion, $title, $version)
45 45 {
46 46 $doc = new Zend_Search_Lucene_Document();
47 47 $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid)));
48 48 $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8'));
49 49 $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8'));
50 50 $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8'));
  51 + $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8'));
51 52 $this->lucene->addDocument($doc);
52 53 }
53 54  
... ... @@ -58,7 +59,7 @@ class PHPLuceneIndexer extends Indexer
58 59 * @param string $textfile
59 60 * @return boolean
60 61 */
61   - protected function indexDocument($docid, $textfile, $title='')
  62 + protected function indexDocument($docid, $textfile, $title, $version)
62 63 {
63 64 global $default;
64 65  
... ... @@ -68,9 +69,9 @@ class PHPLuceneIndexer extends Indexer
68 69 return false;
69 70 }
70 71  
71   - list($content, $discussion) = $this->deleteDocument($docid);
  72 + list($content, $discussion, $title2, $version2) = $this->deleteDocument($docid);
72 73  
73   - $this->addDocument($docid, file_get_contents($textfile), $discussion, $title);
  74 + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title, $version);
74 75  
75 76 return true;
76 77 }
... ... @@ -82,7 +83,7 @@ class PHPLuceneIndexer extends Indexer
82 83 * @param string $textfile
83 84 * @return boolean
84 85 */
85   - protected function indexDocumentAndDiscussion($docid, $textfile, $title='')
  86 + protected function indexDocumentAndDiscussion($docid, $textfile, $title, $version)
86 87 {
87 88 global $default;
88 89  
... ... @@ -94,7 +95,7 @@ class PHPLuceneIndexer extends Indexer
94 95  
95 96 $this->deleteDocument($docid);
96 97  
97   - $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title);
  98 + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title, $version);
98 99  
99 100 return true;
100 101 }
... ... @@ -107,9 +108,9 @@ class PHPLuceneIndexer extends Indexer
107 108 */
108 109 protected function indexDiscussion($docid)
109 110 {
110   - list($content, $discussion, $title) = $this->deleteDocument($docid);
  111 + list($content, $discussion, $title, $version) = $this->deleteDocument($docid);
111 112  
112   - $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title);
  113 + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title, $version);
113 114  
114 115 return true;
115 116 }
... ... @@ -142,11 +143,11 @@ class PHPLuceneIndexer extends Indexer
142 143 $content = $hit->Content;
143 144 $discussion = $hit->Discussion;
144 145 $title = $hit->Title;
145   - $title='';
  146 + $version = $hit->Version;
146 147  
147 148 $this->lucene->delete($hit);
148 149 }
149   - return array($content, $discussion, $title);
  150 + return array($content, $discussion, $title, $version);
150 151 }
151 152  
152 153 /**
... ...