Commit 8d61f978b0b5f7a29d10efcb338591175bd8d477

Authored by Conrad Vermeulen
1 parent dd929132

KTS-3456

"Allow indexing to not be dependant on open office"
Fixed.  Unzip requires a file to have an extension in wondows. Renamed tmp directories to be cleaned up by cleanup script.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@9047 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/extractors/OpenOfficeTextExtractor.inc.php
... ... @@ -72,6 +72,11 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
72 72 );
73 73 }
74 74  
  75 + public function needsIntermediateSourceFile()
  76 + {
  77 + return true;
  78 + }
  79 +
75 80 protected function filter($text)
76 81 {
77 82 return preg_replace ("@(</?[^>]*>)+@", " ", $text);
... ... @@ -83,7 +88,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
83 88 $temp_dir = $config->get('urls/tmpDirectory');
84 89  
85 90 $docid = $this->document->getId();
86   - $time = 'openoffice_'. time() . '-' . $docid;
  91 + $time = 'ktindexer_openoffice_'. time() . '-' . $docid;
87 92 $this->openxml_dir = $temp_dir . '/' . $time;
88 93  
89 94 $this->sourcefile = str_replace('\\','/',$this->sourcefile);
... ... @@ -108,14 +113,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
108 113 return false;
109 114 }
110 115  
111   - $result = @rename($filename, $this->targetfile);
112   -
113   - if ($result === false)
114   - {
115   - return false;
116   - }
117   -
118   - $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($this->targetfile)));
  116 + $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($filename)));
119 117  
120 118 return $result !== false;
121 119 }
... ...
search2/indexing/extractors/OpenXmlTextExtractor.inc.php
... ... @@ -7,31 +7,31 @@
7 7 * Document Management Made Simple
8 8 * Copyright (C) 2008 KnowledgeTree Inc.
9 9 * Portions copyright The Jam Warehouse Software (Pty) Limited
10   - *
  10 + *
11 11 * This program is free software; you can redistribute it and/or modify it under
12 12 * the terms of the GNU General Public License version 3 as published by the
13 13 * Free Software Foundation.
14   - *
  14 + *
15 15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 18 * details.
19   - *
  19 + *
20 20 * You should have received a copy of the GNU General Public License
21 21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22   - *
23   - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
  22 + *
  23 + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
24 24 * California 94120-7775, or email info@knowledgetree.com.
25   - *
  25 + *
26 26 * The interactive user interfaces in modified source and object code versions
27 27 * of this program must display Appropriate Legal Notices, as required under
28 28 * Section 5 of the GNU General Public License version 3.
29   - *
  29 + *
30 30 * In accordance with Section 7(b) of the GNU General Public License version 3,
31 31 * these Appropriate Legal Notices must retain the display of the "Powered by
32   - * KnowledgeTree" logo and retain the original copyright notice. If the display of the
  32 + * KnowledgeTree" logo and retain the original copyright notice. If the display of the
33 33 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
34   - * must display the words "Powered by KnowledgeTree" and retain the original
  34 + * must display the words "Powered by KnowledgeTree" and retain the original
35 35 * copyright notice.
36 36 * Contributor( s): ______________________________________
37 37 *
... ... @@ -60,6 +60,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor
60 60 return _kt('Open Xml Text Extractor');
61 61 }
62 62  
  63 + public function needsIntermediateSourceFile()
  64 + {
  65 + return true;
  66 + }
  67 +
63 68 /**
64 69 * Return a list of all Office 2007 document types that are supported
65 70 *
... ... @@ -147,7 +152,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor
147 152 $temp_dir = $config->get('urls/tmpDirectory');
148 153  
149 154 $docid = $this->document->getId();
150   - $time = 'openxml_'. time() . '-' . $docid;
  155 + $time = 'ktindexer_openxml_'. time() . '-' . $docid;
151 156 $this->openxml_dir = $temp_dir . '/' . $time;
152 157  
153 158 $this->sourcefile = str_replace('\\','/',$this->sourcefile);
... ...