Commit a1a973757e04c1f085fffc12809bd5c662530ac7

Authored by conradverm
1 parent b39270d9

KTS-673

"The search algorithm needs some work"
Updated.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@7403 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/search/expr.inc.php
... ... @@ -847,16 +847,7 @@ class TextQueryBuilder implements QueryBuilder
847 847 $init = $result->Rank;
848 848 $score=0;
849 849 $ranker = RankManager::get();
850   - $discussion = $result->Discussion;
851   - if (!empty($discussion))
852   - {
853   - $score += $init *$ranker->scoreField('Discussion', 'S');
854   - }
855   - else
856   - {
857   - $score += $init *$ranker->scoreField('DocumentText', 'S');
858   -
859   - }
  850 + $score += $init *$ranker->scoreField('DocumentText', 'S');
860 851 return $score;
861 852 }
862 853  
... ... @@ -865,142 +856,11 @@ class TextQueryBuilder implements QueryBuilder
865 856 $this->query = $query;
866 857 }
867 858  
868   - private function extractText($word, $maxwords=40, $maxlen=512)
  859 + function getResultText($result)
869 860 {
870   - $offset=stripos($this->text, $word);
871   -
872   - if ($offset === false)
873   - {
874   - return array(false, false);
875   - }
876   -
877   - if ($offset == 0)
878   - {
879   - $startOffset = 0;
880   - }
881   - else
882   - {
883   - $text = substr($this->text, 0 , $offset);
884   -
885   - $lastsentence = strrpos($text, '.');
886   - if ($lastsentence === false) $lastsentence=0;
887   -
888   - if ($offset - $lastsentence > $maxlen)
889   - {
890   - $lastsentence = $offset - $maxlen;
891   - }
892   -
893   - $text = substr($this->text, $lastsentence, $offset - $lastsentence);
894   -
895   - $wordoffset= strlen($text)-1;
896   - $words = $maxwords;
897   - while ($words > 0)
898   - {
899   - $text = substr($text, 0, $wordoffset);
900   - $foundoffset = strrpos($text, ' ');
901   - if ($foundoffset === false)
902   - {
903   - break;
904   - }
905   - $wordoffset = $foundoffset;
906   - $words--;
907   - }
908   - $startOffset = $lastsentence + $wordoffset;
909   - }
910   -
911   -
912   -
913   - $nextsentence = strpos($this->text, '.', $offset);
914   -
915   - $words = $maxwords;
916   - $endOffset = $offset;
917   - while ($words > 0)
918   - {
919   - $foundoffset = strpos($this->text, ' ', $endOffset+1);
920   - if ($foundoffset === false)
921   - {
922   - break;
923   - }
924   - if ($endOffset > $offset + $maxlen)
925   - {
926   - break;
927   - }
928   - if ($endOffset > $nextsentence)
929   - {
930   - $endOffset = $nextsentence-1;
931   - break;
932   - }
933   - $endOffset = $foundoffset;
934   -
935   - $words--;
936   - }
937   -
938   - return array($startOffset, substr($this->text, $startOffset, $endOffset - $startOffset + 1));
  861 + // not require!
  862 + return '';
939 863 }
940   -
941   -
942   - public function getResultText($result)
943   - {
944   - $this->text = substr($result->Text,0,40960);
945   - $words = array();
946   - $sentences = array();
947   -
948   - preg_match_all('("[^"]*")',$this->query, $matches,PREG_OFFSET_CAPTURE);
949   -
950   - foreach($matches[0] as $word)
951   - {
952   - list($word,$offset) = $word;
953   - $word = substr($word,1,-1);
954   - $wordlen = strlen($word);
955   - $res = $this->extractText($word);
956   - list($sentenceOffset,$sentence) = $res;
957   -
958   - if ($sentenceOffset === false)
959   - {
960   - continue;
961   - }
962   -
963   - if (array_key_exists($sentenceOffset, $sentences))
964   - {
965   - $sentences[$sentenceOffset]['score']++;
966   - }
967   - else
968   - {
969   - $sentences[$sentenceOffset] = array(
970   - 'sentence'=>$sentence,
971   - 'score'=>1
972   - );
973   - }
974   -
975   - $sentence = $sentences[$sentenceOffset]['sentence'];
976   -
977   - preg_match_all("@$word@i",$sentence, $swords,PREG_OFFSET_CAPTURE);
978   - foreach($swords[0] as $wordx)
979   - {
980   - list($wordx,$offset) = $wordx;
981   -
982   - $sentence = substr($sentence,0, $offset) . '<b>' . substr($sentence, $offset, $wordlen) . '</b>' . substr($sentence, $offset + $wordlen);
983   - }
984   -
985   - $sentences[$sentenceOffset]['sentence'] = $sentence;
986   -
987   - $words[$word] = array(
988   - 'sentence'=>$sentenceOffset
989   - );
990   - }
991   -
992   - ksort($sentences);
993   - $result = '';
994   -
995   - foreach($sentences as $o=>$i)
996   - {
997   - if (!empty($result)) $result .= '&nbsp;&nbsp;&nbsp;...&nbsp;&nbsp;&nbsp;&nbsp;';
998   - $result .= $i['sentence'];
999   - }
1000   -
1001   - return $result;
1002   - }
1003   -
1004 864 }
1005 865  
1006 866 class SQLQueryBuilder implements QueryBuilder
... ... @@ -1226,7 +1086,10 @@ class SQLQueryBuilder implements QueryBuilder
1226 1086  
1227 1087 private function resolveMetadataOffset($expr)
1228 1088 {
1229   - assert($expr->left()->isMetadataField() );
  1089 + if (!$expr->left()->isMetadataField())
  1090 + {
  1091 + throw new Exception(_kt('Metadata field expected'));
  1092 + }
1230 1093  
1231 1094 $offset=0;
1232 1095 foreach($this->metadata as $item)
... ... @@ -2039,7 +1902,7 @@ class OpExpr extends Expr
2039 1902 $rank = $exprbuilder->getRanking($item);
2040 1903 if (!array_key_exists($document_id, $results) || $rank > $results[$document_id]->Rank)
2041 1904 {
2042   - $results[$document_id] = new MatchResult($document_id, $rank, $item['title'], $exprbuilder->getResultText($item));
  1905 + $results[$document_id] = new QueryResultItem($document_id, $rank, $item['title'], $exprbuilder->getResultText($item));
2043 1906 }
2044 1907 }
2045 1908  
... ... @@ -2069,7 +1932,7 @@ class OpExpr extends Expr
2069 1932 {
2070 1933 $item->Rank = $exprbuilder->getRanking($item);
2071 1934 $exprbuilder->setQuery($query);
2072   - $item->Text = $exprbuilder->getResultText($item);
  1935 + //$item->Text = $exprbuilder->getResultText($item); ?? wipe - done at indexer level
2073 1936 }
2074 1937  
2075 1938 return $results;
... ... @@ -2156,11 +2019,7 @@ class OpExpr extends Expr
2156 2019 $permResults = array();
2157 2020 foreach($result as $idx=>$item)
2158 2021 {
2159   - $doc = Document::get($item->DocumentID);
2160   - if (Permission::userHasDocumentReadPermission($doc))
2161   - {
2162   - $permResults[$idx] = $item;
2163   - }
  2022 + $permResults[$idx] = $item;
2164 2023 }
2165 2024  
2166 2025 return $permResults;
... ... @@ -2189,10 +2048,6 @@ class OpExpr extends Expr
2189 2048 $left->toViz($str, $phase);
2190 2049 $right->toViz($str, $phase);
2191 2050 }
2192   -
2193 2051 }
2194 2052  
2195   -
2196   -
2197   -
2198 2053 ?>
2199 2054 \ No newline at end of file
... ...
search2/search/search.inc.php
... ... @@ -229,11 +229,6 @@ class SearchHelper
229 229  
230 230 public static function getSavedSearches($userID)
231 231 {
232   - if (empty($default->db))
233   - {
234   - return array();
235   - }
236   -
237 232 $sql = "SELECT id, name FROM search_saved WHERE type='S'";
238 233  
239 234 // if we are not the system admin, then we get only ours or shared searches
... ...