Commit 8d61f978b0b5f7a29d10efcb338591175bd8d477
1 parent
dd929132
KTS-3456
"Allow indexing to not be dependant on open office" Fixed. Unzip requires a file to have an extension in wondows. Renamed tmp directories to be cleaned up by cleanup script. Committed By: Conrad Vermeulen Reviewed By: Kevin Fourie git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@9047 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
2 changed files
with
22 additions
and
19 deletions
search2/indexing/extractors/OpenOfficeTextExtractor.inc.php
| @@ -72,6 +72,11 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | @@ -72,6 +72,11 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | ||
| 72 | ); | 72 | ); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | + public function needsIntermediateSourceFile() | ||
| 76 | + { | ||
| 77 | + return true; | ||
| 78 | + } | ||
| 79 | + | ||
| 75 | protected function filter($text) | 80 | protected function filter($text) |
| 76 | { | 81 | { |
| 77 | return preg_replace ("@(</?[^>]*>)+@", " ", $text); | 82 | return preg_replace ("@(</?[^>]*>)+@", " ", $text); |
| @@ -83,7 +88,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | @@ -83,7 +88,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | ||
| 83 | $temp_dir = $config->get('urls/tmpDirectory'); | 88 | $temp_dir = $config->get('urls/tmpDirectory'); |
| 84 | 89 | ||
| 85 | $docid = $this->document->getId(); | 90 | $docid = $this->document->getId(); |
| 86 | - $time = 'openoffice_'. time() . '-' . $docid; | 91 | + $time = 'ktindexer_openoffice_'. time() . '-' . $docid; |
| 87 | $this->openxml_dir = $temp_dir . '/' . $time; | 92 | $this->openxml_dir = $temp_dir . '/' . $time; |
| 88 | 93 | ||
| 89 | $this->sourcefile = str_replace('\\','/',$this->sourcefile); | 94 | $this->sourcefile = str_replace('\\','/',$this->sourcefile); |
| @@ -108,14 +113,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | @@ -108,14 +113,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor | ||
| 108 | return false; | 113 | return false; |
| 109 | } | 114 | } |
| 110 | 115 | ||
| 111 | - $result = @rename($filename, $this->targetfile); | ||
| 112 | - | ||
| 113 | - if ($result === false) | ||
| 114 | - { | ||
| 115 | - return false; | ||
| 116 | - } | ||
| 117 | - | ||
| 118 | - $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($this->targetfile))); | 116 | + $result = file_put_contents($this->targetfile, $this->filter(file_get_contents($filename))); |
| 119 | 117 | ||
| 120 | return $result !== false; | 118 | return $result !== false; |
| 121 | } | 119 | } |
search2/indexing/extractors/OpenXmlTextExtractor.inc.php
| @@ -7,31 +7,31 @@ | @@ -7,31 +7,31 @@ | ||
| 7 | * Document Management Made Simple | 7 | * Document Management Made Simple |
| 8 | * Copyright (C) 2008 KnowledgeTree Inc. | 8 | * Copyright (C) 2008 KnowledgeTree Inc. |
| 9 | * Portions copyright The Jam Warehouse Software (Pty) Limited | 9 | * Portions copyright The Jam Warehouse Software (Pty) Limited |
| 10 | - * | 10 | + * |
| 11 | * This program is free software; you can redistribute it and/or modify it under | 11 | * This program is free software; you can redistribute it and/or modify it under |
| 12 | * the terms of the GNU General Public License version 3 as published by the | 12 | * the terms of the GNU General Public License version 3 as published by the |
| 13 | * Free Software Foundation. | 13 | * Free Software Foundation. |
| 14 | - * | 14 | + * |
| 15 | * This program is distributed in the hope that it will be useful, but WITHOUT | 15 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | 17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| 18 | * details. | 18 | * details. |
| 19 | - * | 19 | + * |
| 20 | * You should have received a copy of the GNU General Public License | 20 | * You should have received a copy of the GNU General Public License |
| 21 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 21 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 22 | - * | ||
| 23 | - * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, | 22 | + * |
| 23 | + * You can contact KnowledgeTree Inc., PO Box 7775 #87847, San Francisco, | ||
| 24 | * California 94120-7775, or email info@knowledgetree.com. | 24 | * California 94120-7775, or email info@knowledgetree.com. |
| 25 | - * | 25 | + * |
| 26 | * The interactive user interfaces in modified source and object code versions | 26 | * The interactive user interfaces in modified source and object code versions |
| 27 | * of this program must display Appropriate Legal Notices, as required under | 27 | * of this program must display Appropriate Legal Notices, as required under |
| 28 | * Section 5 of the GNU General Public License version 3. | 28 | * Section 5 of the GNU General Public License version 3. |
| 29 | - * | 29 | + * |
| 30 | * In accordance with Section 7(b) of the GNU General Public License version 3, | 30 | * In accordance with Section 7(b) of the GNU General Public License version 3, |
| 31 | * these Appropriate Legal Notices must retain the display of the "Powered by | 31 | * these Appropriate Legal Notices must retain the display of the "Powered by |
| 32 | - * KnowledgeTree" logo and retain the original copyright notice. If the display of the | 32 | + * KnowledgeTree" logo and retain the original copyright notice. If the display of the |
| 33 | * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices | 33 | * logo is not reasonably feasible for technical reasons, the Appropriate Legal Notices |
| 34 | - * must display the words "Powered by KnowledgeTree" and retain the original | 34 | + * must display the words "Powered by KnowledgeTree" and retain the original |
| 35 | * copyright notice. | 35 | * copyright notice. |
| 36 | * Contributor( s): ______________________________________ | 36 | * Contributor( s): ______________________________________ |
| 37 | * | 37 | * |
| @@ -60,6 +60,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | @@ -60,6 +60,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | ||
| 60 | return _kt('Open Xml Text Extractor'); | 60 | return _kt('Open Xml Text Extractor'); |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | + public function needsIntermediateSourceFile() | ||
| 64 | + { | ||
| 65 | + return true; | ||
| 66 | + } | ||
| 67 | + | ||
| 63 | /** | 68 | /** |
| 64 | * Return a list of all Office 2007 document types that are supported | 69 | * Return a list of all Office 2007 document types that are supported |
| 65 | * | 70 | * |
| @@ -147,7 +152,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | @@ -147,7 +152,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor | ||
| 147 | $temp_dir = $config->get('urls/tmpDirectory'); | 152 | $temp_dir = $config->get('urls/tmpDirectory'); |
| 148 | 153 | ||
| 149 | $docid = $this->document->getId(); | 154 | $docid = $this->document->getId(); |
| 150 | - $time = 'openxml_'. time() . '-' . $docid; | 155 | + $time = 'ktindexer_openxml_'. time() . '-' . $docid; |
| 151 | $this->openxml_dir = $temp_dir . '/' . $time; | 156 | $this->openxml_dir = $temp_dir . '/' . $time; |
| 152 | 157 | ||
| 153 | $this->sourcefile = str_replace('\\','/',$this->sourcefile); | 158 | $this->sourcefile = str_replace('\\','/',$this->sourcefile); |