Commit 0401ebdcd3d118fa4921a604fad6837b4fbb9479

Authored by Neil Blakey-Milner
1 parent 495842dd

Hook up an indexer for Microsoft Office (Word) Documents using the

catdoc program.


git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@4207 c91229c3-7414-0410-bfa2-8a42b809f60b
plugins/ktstandard/KTIndexer.php 0 → 100644
  1 +<?php
  2 +
  3 +class KTIndexerPlugin extends KTPlugin {
  4 + var $sNamespace = "ktstandard.indexer.plugin";
  5 +}
  6 +
  7 +$oPluginRegistry =& KTPluginRegistry::getSingleton();
  8 +$oPluginRegistry->registerPlugin('KTIndexerPlugin', 'ktstandard.indexer.plugin', __FILE__);
  9 +$oPlugin =& $oPluginRegistry->getPlugin('ktstandard.indexer.plugin');
  10 +
  11 +$oPlugin->registerTrigger('content', 'transform', 'KTWordIndexerTrigger', 'ktstandard.indexer.triggers.word', 'contents/WordIndexer.php');
  12 +$oPlugin->register();
... ...
plugins/ktstandard/contents/WordIndexer.php 0 → 100644
  1 +<?php
  2 +
  3 +class KTWordIndexerTrigger {
  4 + function setDocument($oDocument) {
  5 + $this->oDocument = $oDocument;
  6 + }
  7 +
  8 + function transform() {
  9 + $oStorage = KTStorageManagerUtil::getSingleton();
  10 + $sFile = $oStorage->temporaryFile($this->oDocument);
  11 +
  12 + $cmdline = array("catdoc", $sFile);
  13 + $myfilename = tempnam("/tmp", "kt.catdoc");
  14 + $command = KTUtil::safeShellString($cmdline) . " >> " . $myfilename;
  15 + system($command);
  16 + $contents = file_get_contents($myfilename);
  17 + $aInsertValues = array(
  18 + 'document_id' => $this->oDocument->getId(),
  19 + 'document_text' => $contents,
  20 + );
  21 + $sTable = KTUtil::getTableName('document_text');
  22 + DBUtil::autoInsert($sTable, $aInsertValues, array('noid' => true));
  23 + }
  24 +}
  25 +
  26 +?>
... ...