From 0401ebdcd3d118fa4921a604fad6837b4fbb9479 Mon Sep 17 00:00:00 2001 From: Neil Blakey-Milner Date: Sun, 27 Nov 2005 21:23:28 +0000 Subject: [PATCH] Hook up an indexer for Microsoft Office (Word) Documents using the catdoc program. --- plugins/ktstandard/KTIndexer.php | 12 ++++++++++++ plugins/ktstandard/contents/WordIndexer.php | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 0 deletions(-) create mode 100644 plugins/ktstandard/KTIndexer.php create mode 100644 plugins/ktstandard/contents/WordIndexer.php diff --git a/plugins/ktstandard/KTIndexer.php b/plugins/ktstandard/KTIndexer.php new file mode 100644 index 0000000..4f2a2ee --- /dev/null +++ b/plugins/ktstandard/KTIndexer.php @@ -0,0 +1,12 @@ +registerPlugin('KTIndexerPlugin', 'ktstandard.indexer.plugin', __FILE__); +$oPlugin =& $oPluginRegistry->getPlugin('ktstandard.indexer.plugin'); + +$oPlugin->registerTrigger('content', 'transform', 'KTWordIndexerTrigger', 'ktstandard.indexer.triggers.word', 'contents/WordIndexer.php'); +$oPlugin->register(); diff --git a/plugins/ktstandard/contents/WordIndexer.php b/plugins/ktstandard/contents/WordIndexer.php new file mode 100644 index 0000000..e869a56 --- /dev/null +++ b/plugins/ktstandard/contents/WordIndexer.php @@ -0,0 +1,26 @@ +oDocument = $oDocument; + } + + function transform() { + $oStorage = KTStorageManagerUtil::getSingleton(); + $sFile = $oStorage->temporaryFile($this->oDocument); + + $cmdline = array("catdoc", $sFile); + $myfilename = tempnam("/tmp", "kt.catdoc"); + $command = KTUtil::safeShellString($cmdline) . " >> " . $myfilename; + system($command); + $contents = file_get_contents($myfilename); + $aInsertValues = array( + 'document_id' => $this->oDocument->getId(), + 'document_text' => $contents, + ); + $sTable = KTUtil::getTableName('document_text'); + DBUtil::autoInsert($sTable, $aInsertValues, array('noid' => true)); + } +} + +?> -- libgit2 0.21.4