Commit 96d0b2592f3d095f28e8836e33c27808566e8369

Authored by conradverm
1 parent 05eb195b

KTS-1012

"not indexed documents are not flagged to the admin"
Fixed. reportUnindexedDocuments.php iterates through document id's in the database and identifies records that don't have a record in the index.

Committed By: Conrad Vermeulen
Reviewed By: Megan Watson

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@8693 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/bin/reportUnindexedDocuments.php 0 → 100644
  1 +<?php
  2 +
  3 +/**
  4 + * $Id:$
  5 + *
  6 + * KnowledgeTree Community Edition
  7 + * Document Management Made Simple
  8 + * Copyright (C) 2008 KnowledgeTree Inc.
  9 + * Portions copyright The Jam Warehouse Software (Pty) Limited
  10 + *
  11 + * This program is free software; you can redistribute it and/or modify it under
  12 + * the terms of the GNU General Public License version 3 as published by the
  13 + * Free Software Foundation.
  14 + *
  15 + * This program is distributed in the hope that it will be useful, but WITHOUT
  16 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17 + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  18 + * details.
  19 + *
  20 + * You should have received a copy of the GNU General Public License
  21 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  22 + *
  23 + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
  24 + * California 94120-7775, or email info@knowledgetree.com.
  25 + *
  26 + * The interactive user interfaces in modified source and object code versions
  27 + * of this program must display Appropriate Legal Notices, as required under
  28 + * Section 5 of the GNU General Public License version 3.
  29 + *
  30 + * In accordance with Section 7(b) of the GNU General Public License version 3,
  31 + * these Appropriate Legal Notices must retain the display of the "Powered by
  32 + * KnowledgeTree" logo and retain the original copyright notice. If the display of the
  33 + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
  34 + * must display the words "Powered by KnowledgeTree" and retain the original
  35 + * copyright notice.
  36 + * Contributor( s): ______________________________________
  37 + *
  38 + */
  39 +
  40 +/**
  41 + * PURPOSE:
  42 + *
  43 + * The purpose of this script is to list documents that are not currently indexed.
  44 + *
  45 + * Usage: reportUnindexedDocuments.php [reindex]
  46 + *
  47 + * If 'reindex' is specified, documents will be re-indexed.
  48 + *
  49 + */
  50 +
  51 +session_start();
  52 +chdir(dirname(__FILE__));
  53 +require_once(realpath('../../../config/dmsDefaults.php'));
  54 +
  55 +$sql = 'select id from documents';
  56 +$rows = DBUtil::getResultArray($sql);
  57 +
  58 +$indexer = Indexer::get();
  59 +$diagnosis = $indexer->diagnose();
  60 +
  61 +if (!empty($diagnosis))
  62 +{
  63 + die($diagnosis);
  64 +}
  65 +
  66 +require_once('indexing/indexerCore.inc.php');
  67 +$reindex=false;
  68 +if ($argc > 0)
  69 +{
  70 + foreach($argv as $arg)
  71 + {
  72 + switch (strtolower($arg))
  73 + {
  74 + case 'reindex':
  75 + $reindex=true;
  76 + print "* " . _kt("Reindexing documents when they are encountered.") . "\n";
  77 + break;
  78 + case 'help':
  79 + print "Usage: registerTypes.php [clear]\n";
  80 + exit;
  81 + }
  82 + }
  83 +}
  84 +
  85 +print "Querying document index...\n\n";
  86 +print "Note that this is quite an expensive task....\n\n";
  87 +
  88 +$notIndexed = array();
  89 +$i = 0;
  90 +foreach($rows as $row)
  91 +{
  92 + $docId = $row['id'];
  93 + if (!$indexer->isDocumentIndexed($docId))
  94 + {
  95 + $notIndexed[] = $docId;
  96 + }
  97 + if ($i % 100 == 0) print '.';
  98 + if ($i++ % 4000 == 0) print "\n";
  99 +}
  100 +
  101 +print "\nReporting...\n";
  102 +
  103 +if (empty($notIndexed))
  104 +{
  105 + print "All documents are indexed\n";
  106 +}
  107 +else
  108 +{
  109 + print "\n-----START-----\n\"Document Id\",\"Title\",\"Full Path\"\n";
  110 +
  111 + $notIndexed = implode(',', $notIndexed);
  112 + $sql = "select d.id, dm.name as title, d.full_path from documents d inner join document_metadata_version dm on d.metadata_version_id = dm.id where d.id in ($notIndexed) ";
  113 + $rows = DBUtil::getResultArray($sql);
  114 +
  115 + foreach($rows as $row)
  116 + {
  117 + print '"' .$row['id'] . '","' .$row['title'] . '","' .$row['full_path'] . '"' . "\n";
  118 + if ($reindex)
  119 + {
  120 + Indexer::index($docId);
  121 + $GLOBALS["_OBJECTCACHE"] = array();
  122 + }
  123 + }
  124 +
  125 + print "-----END-----\n\nDone\n";
  126 +}
  127 +
  128 +?>
0 129 \ No newline at end of file
... ...
search2/indexing/indexerCore.inc.php
... ... @@ -1708,6 +1708,8 @@ abstract class Indexer
1708 1708 */
1709 1709 public abstract function getDocumentsInIndex();
1710 1710  
  1711 + public abstract function isDocumentIndexed($documentId);
  1712 +
1711 1713 /**
1712 1714 * Returns the path to the index directory
1713 1715 *
... ...
search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php
... ... @@ -7,31 +7,31 @@
7 7 * Document Management Made Simple
8 8 * Copyright (C) 2008 KnowledgeTree Inc.
9 9 * Portions copyright The Jam Warehouse Software (Pty) Limited
10   - *
  10 + *
11 11 * This program is free software; you can redistribute it and/or modify it under
12 12 * the terms of the GNU General Public License version 3 as published by the
13 13 * Free Software Foundation.
14   - *
  14 + *
15 15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 18 * details.
19   - *
  19 + *
20 20 * You should have received a copy of the GNU General Public License
21 21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22   - *
23   - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
  22 + *
  23 + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
24 24 * California 94120-7775, or email info@knowledgetree.com.
25   - *
  25 + *
26 26 * The interactive user interfaces in modified source and object code versions
27 27 * of this program must display Appropriate Legal Notices, as required under
28 28 * Section 5 of the GNU General Public License version 3.
29   - *
  29 + *
30 30 * In accordance with Section 7(b) of the GNU General Public License version 3,
31 31 * these Appropriate Legal Notices must retain the display of the "Powered by
32   - * KnowledgeTree" logo and retain the original copyright notice. If the display of the
  32 + * KnowledgeTree" logo and retain the original copyright notice. If the display of the
33 33 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
34   - * must display the words "Powered by KnowledgeTree" and retain the original
  34 + * must display the words "Powered by KnowledgeTree" and retain the original
35 35 * copyright notice.
36 36 * Contributor( s): ______________________________________
37 37 *
... ... @@ -279,5 +279,11 @@ class JavaXMLRPCLuceneIndexer extends Indexer
279 279 return $stats->indexDirectory;
280 280 }
281 281  
  282 + public function isDocumentIndexed($document_id)
  283 + {
  284 + return $this->lucene->documentExists($document_id);
  285 + }
  286 +
  287 +
282 288 }
283 289 ?>
... ...
search2/indexing/lib/XmlRpcLucene.inc.php
... ... @@ -7,31 +7,31 @@
7 7 * Document Management Made Simple
8 8 * Copyright (C) 2008 KnowledgeTree Inc.
9 9 * Portions copyright The Jam Warehouse Software (Pty) Limited
10   - *
  10 + *
11 11 * This program is free software; you can redistribute it and/or modify it under
12 12 * the terms of the GNU General Public License version 3 as published by the
13 13 * Free Software Foundation.
14   - *
  14 + *
15 15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 18 * details.
19   - *
  19 + *
20 20 * You should have received a copy of the GNU General Public License
21 21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22   - *
23   - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
  22 + *
  23 + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
24 24 * California 94120-7775, or email info@knowledgetree.com.
25   - *
  25 + *
26 26 * The interactive user interfaces in modified source and object code versions
27 27 * of this program must display Appropriate Legal Notices, as required under
28 28 * Section 5 of the GNU General Public License version 3.
29   - *
  29 + *
30 30 * In accordance with Section 7(b) of the GNU General Public License version 3,
31 31 * these Appropriate Legal Notices must retain the display of the "Powered by
32   - * KnowledgeTree" logo and retain the original copyright notice. If the display of the
  32 + * KnowledgeTree" logo and retain the original copyright notice. If the display of the
33 33 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
34   - * must display the words "Powered by KnowledgeTree" and retain the original
  34 + * must display the words "Powered by KnowledgeTree" and retain the original
35 35 * copyright notice.
36 36 * Contributor( s): ______________________________________
37 37 *
... ... @@ -194,7 +194,7 @@ class XmlRpcLucene
194 194 $result=&$this->client->send($function);
195 195 if($result->faultCode())
196 196 {
197   - $this->error($result, 'deleteDocument');
  197 + $this->error($result, 'documentExists');
198 198 return false;
199 199 }
200 200 return php_xmlrpc_decode($result->value());
... ...