diff --git a/search2/indexing/bin/reportUnindexedDocuments.php b/search2/indexing/bin/reportUnindexedDocuments.php new file mode 100644 index 0000000..ba09c1c --- /dev/null +++ b/search2/indexing/bin/reportUnindexedDocuments.php @@ -0,0 +1,128 @@ +. + * + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, + * California 94120-7775, or email info@knowledgetree.com. + * + * The interactive user interfaces in modified source and object code versions + * of this program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU General Public License version 3. + * + * In accordance with Section 7(b) of the GNU General Public License version 3, + * these Appropriate Legal Notices must retain the display of the "Powered by + * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices + * must display the words "Powered by KnowledgeTree" and retain the original + * copyright notice. + * Contributor( s): ______________________________________ + * + */ + +/** + * PURPOSE: + * + * The purpose of this script is to list documents that are not currently indexed. + * + * Usage: reportUnindexedDocuments.php [reindex] + * + * If 'reindex' is specified, documents will be re-indexed. + * + */ + +session_start(); +chdir(dirname(__FILE__)); +require_once(realpath('../../../config/dmsDefaults.php')); + +$sql = 'select id from documents'; +$rows = DBUtil::getResultArray($sql); + +$indexer = Indexer::get(); +$diagnosis = $indexer->diagnose(); + +if (!empty($diagnosis)) +{ + die($diagnosis); +} + +require_once('indexing/indexerCore.inc.php'); +$reindex=false; +if ($argc > 0) +{ + foreach($argv as $arg) + { + switch (strtolower($arg)) + { + case 'reindex': + $reindex=true; + print "* " . _kt("Reindexing documents when they are encountered.") . "\n"; + break; + case 'help': + print "Usage: registerTypes.php [clear]\n"; + exit; + } + } +} + +print "Querying document index...\n\n"; +print "Note that this is quite an expensive task....\n\n"; + +$notIndexed = array(); +$i = 0; +foreach($rows as $row) +{ + $docId = $row['id']; + if (!$indexer->isDocumentIndexed($docId)) + { + $notIndexed[] = $docId; + } + if ($i % 100 == 0) print '.'; + if ($i++ % 4000 == 0) print "\n"; +} + +print "\nReporting...\n"; + +if (empty($notIndexed)) +{ + print "All documents are indexed\n"; +} +else +{ + print "\n-----START-----\n\"Document Id\",\"Title\",\"Full Path\"\n"; + + $notIndexed = implode(',', $notIndexed); + $sql = "select d.id, dm.name as title, d.full_path from documents d inner join document_metadata_version dm on d.metadata_version_id = dm.id where d.id in ($notIndexed) "; + $rows = DBUtil::getResultArray($sql); + + foreach($rows as $row) + { + print '"' .$row['id'] . '","' .$row['title'] . '","' .$row['full_path'] . '"' . "\n"; + if ($reindex) + { + Indexer::index($docId); + $GLOBALS["_OBJECTCACHE"] = array(); + } + } + + print "-----END-----\n\nDone\n"; +} + +?> \ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php index cd7881f..b2ad8fa 100755 --- a/search2/indexing/indexerCore.inc.php +++ b/search2/indexing/indexerCore.inc.php @@ -1708,6 +1708,8 @@ abstract class Indexer */ public abstract function getDocumentsInIndex(); + public abstract function isDocumentIndexed($documentId); + /** * Returns the path to the index directory * diff --git a/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php index 47c6295..d95a5cb 100755 --- a/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php +++ b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php @@ -7,31 +7,31 @@ * Document Management Made Simple * Copyright (C) 2008 KnowledgeTree Inc. * Portions copyright The Jam Warehouse Software (Pty) Limited - * + * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 3 as published by the * Free Software Foundation. - * + * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, + * + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, * California 94120-7775, or email info@knowledgetree.com. - * + * * The interactive user interfaces in modified source and object code versions * of this program must display Appropriate Legal Notices, as required under * Section 5 of the GNU General Public License version 3. - * + * * In accordance with Section 7(b) of the GNU General Public License version 3, * these Appropriate Legal Notices must retain the display of the "Powered by - * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * KnowledgeTree" logo and retain the original copyright notice. If the display of the * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices - * must display the words "Powered by KnowledgeTree" and retain the original + * must display the words "Powered by KnowledgeTree" and retain the original * copyright notice. * Contributor( s): ______________________________________ * @@ -279,5 +279,11 @@ class JavaXMLRPCLuceneIndexer extends Indexer return $stats->indexDirectory; } + public function isDocumentIndexed($document_id) + { + return $this->lucene->documentExists($document_id); + } + + } ?> diff --git a/search2/indexing/lib/XmlRpcLucene.inc.php b/search2/indexing/lib/XmlRpcLucene.inc.php index 1dfb401..1080073 100755 --- a/search2/indexing/lib/XmlRpcLucene.inc.php +++ b/search2/indexing/lib/XmlRpcLucene.inc.php @@ -7,31 +7,31 @@ * Document Management Made Simple * Copyright (C) 2008 KnowledgeTree Inc. * Portions copyright The Jam Warehouse Software (Pty) Limited - * + * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 3 as published by the * Free Software Foundation. - * + * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, + * + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, * California 94120-7775, or email info@knowledgetree.com. - * + * * The interactive user interfaces in modified source and object code versions * of this program must display Appropriate Legal Notices, as required under * Section 5 of the GNU General Public License version 3. - * + * * In accordance with Section 7(b) of the GNU General Public License version 3, * these Appropriate Legal Notices must retain the display of the "Powered by - * KnowledgeTree" logo and retain the original copyright notice. If the display of the + * KnowledgeTree" logo and retain the original copyright notice. If the display of the * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices - * must display the words "Powered by KnowledgeTree" and retain the original + * must display the words "Powered by KnowledgeTree" and retain the original * copyright notice. * Contributor( s): ______________________________________ * @@ -194,7 +194,7 @@ class XmlRpcLucene $result=&$this->client->send($function); if($result->faultCode()) { - $this->error($result, 'deleteDocument'); + $this->error($result, 'documentExists'); return false; } return php_xmlrpc_decode($result->value());