Commit 1fb9c5e2f1eaae0dc0663cd65c01a2bdb31c5681

Authored by kevin_fourie
1 parent f1369866

Merged in from STABLE trunk...

KTS-3456
"Allow indexing to not be dependant on open office"
Fixed.  Unzip requires a file to have an extension in wondows. Renamed tmp directories to be cleaned up by cleanup script.

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie


git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/STABLE/branches/3.5.3-Release-Branch@9049 c91229c3-7414-0410-bfa2-8a42b809f60b
search2/indexing/extractors/OpenOfficeTextExtractor.inc.php
@@ -72,6 +72,11 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor @@ -72,6 +72,11 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
72 ); 72 );
73 } 73 }
74 74
  75 + public function needsIntermediateSourceFile()
  76 + {
  77 + return true;
  78 + }
  79 +
75 protected function filter($text) 80 protected function filter($text)
76 { 81 {
77 return preg_replace ("@(</?[^>]*>)+@", " ", $text); 82 return preg_replace ("@(</?[^>]*>)+@", " ", $text);
@@ -83,7 +88,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor @@ -83,7 +88,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
83 $temp_dir = $config->get('urls/tmpDirectory'); 88 $temp_dir = $config->get('urls/tmpDirectory');
84 89
85 $docid = $this->document->getId(); 90 $docid = $this->document->getId();
86 - $time = 'openoffice_'. time() . '-' . $docid; 91 + $time = 'ktindexer_openoffice_'. time() . '-' . $docid;
87 $this->openxml_dir = $temp_dir . '/' . $time; 92 $this->openxml_dir = $temp_dir . '/' . $time;
88 93
89 $this->sourcefile = str_replace('\\','/',$this->sourcefile); 94 $this->sourcefile = str_replace('\\','/',$this->sourcefile);
@@ -108,14 +113,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor @@ -108,14 +113,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor
108 return false; 113 return false;
109 } 114 }
110 115
111 - $result = @rename($filename, $this->targetfile);  
112 -  
113 - if ($result === false)  
114 - {  
115 - return false;  
116 - }  
117 -  
118 - $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($this->targetfile))); 116 + $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($filename)));
119 117
120 return $result !== false; 118 return $result !== false;
121 } 119 }
search2/indexing/extractors/OpenXmlTextExtractor.inc.php
@@ -7,31 +7,31 @@ @@ -7,31 +7,31 @@
7 * Document Management Made Simple 7 * Document Management Made Simple
8 * Copyright (C) 2008 KnowledgeTree Inc. 8 * Copyright (C) 2008 KnowledgeTree Inc.
9 * Portions copyright The Jam Warehouse Software (Pty) Limited 9 * Portions copyright The Jam Warehouse Software (Pty) Limited
10 - * 10 + *
11 * This program is free software; you can redistribute it and/or modify it under 11 * This program is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU General Public License version 3 as published by the 12 * the terms of the GNU General Public License version 3 as published by the
13 * Free Software Foundation. 13 * Free Software Foundation.
14 - * 14 + *
15 * This program is distributed in the hope that it will be useful, but WITHOUT 15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 * details. 18 * details.
19 - * 19 + *
20 * You should have received a copy of the GNU General Public License 20 * You should have received a copy of the GNU General Public License
21 * along with this program. If not, see <http://www.gnu.org/licenses/>. 21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 - *  
23 - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, 22 + *
  23 + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco,
24 * California 94120-7775, or email info@knowledgetree.com. 24 * California 94120-7775, or email info@knowledgetree.com.
25 - * 25 + *
26 * The interactive user interfaces in modified source and object code versions 26 * The interactive user interfaces in modified source and object code versions
27 * of this program must display Appropriate Legal Notices, as required under 27 * of this program must display Appropriate Legal Notices, as required under
28 * Section 5 of the GNU General Public License version 3. 28 * Section 5 of the GNU General Public License version 3.
29 - * 29 + *
30 * In accordance with Section 7(b) of the GNU General Public License version 3, 30 * In accordance with Section 7(b) of the GNU General Public License version 3,
31 * these Appropriate Legal Notices must retain the display of the "Powered by 31 * these Appropriate Legal Notices must retain the display of the "Powered by
32 - * KnowledgeTree" logo and retain the original copyright notice. If the display of the 32 + * KnowledgeTree" logo and retain the original copyright notice. If the display of the
33 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices 33 * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices
34 - * must display the words "Powered by KnowledgeTree" and retain the original 34 + * must display the words "Powered by KnowledgeTree" and retain the original
35 * copyright notice. 35 * copyright notice.
36 * Contributor( s): ______________________________________ 36 * Contributor( s): ______________________________________
37 * 37 *
@@ -60,6 +60,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor @@ -60,6 +60,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor
60 return _kt('Open Xml Text Extractor'); 60 return _kt('Open Xml Text Extractor');
61 } 61 }
62 62
  63 + public function needsIntermediateSourceFile()
  64 + {
  65 + return true;
  66 + }
  67 +
63 /** 68 /**
64 * Return a list of all Office 2007 document types that are supported 69 * Return a list of all Office 2007 document types that are supported
65 * 70 *
@@ -147,7 +152,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor @@ -147,7 +152,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor
147 $temp_dir = $config->get('urls/tmpDirectory'); 152 $temp_dir = $config->get('urls/tmpDirectory');
148 153
149 $docid = $this->document->getId(); 154 $docid = $this->document->getId();
150 - $time = 'openxml_'. time() . '-' . $docid; 155 + $time = 'ktindexer_openxml_'. time() . '-' . $docid;
151 $this->openxml_dir = $temp_dir . '/' . $time; 156 $this->openxml_dir = $temp_dir . '/' . $time;
152 157
153 $this->sourcefile = str_replace('\\','/',$this->sourcefile); 158 $this->sourcefile = str_replace('\\','/',$this->sourcefile);