diff --git a/search2/indexing/extractors/OpenOfficeTextExtractor.inc.php b/search2/indexing/extractors/OpenOfficeTextExtractor.inc.php index 548a4f3..2878e7c 100644 --- a/search2/indexing/extractors/OpenOfficeTextExtractor.inc.php +++ b/search2/indexing/extractors/OpenOfficeTextExtractor.inc.php @@ -1,5 +1,7 @@ unzip = KTUtil::findCommand("import/unzip", 'unzip'); $this->unzip = str_replace('\\','/',$this->unzip); $this->unzip_params = $config->get('extractorParameters/unzip', '"{source}" "{part}" -d "{target_dir}"'); + */ parent::__construct(); } @@ -94,6 +98,14 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor $this->sourcefile = str_replace('\\','/',$this->sourcefile); $this->openxml_dir = str_replace('\\','/',$this->openxml_dir); + $archive = new PclZip($this->sourcefile); + + if ($archive->extract(PCLZIP_OPT_PATH, $this->openxml_dir) == 0){ + $this->output = _kt('Failed to extract content'); + return false; + } + + /* *** Original code using the unzip binary *** $cmd = '"' . $this->unzip . '"' . ' ' . str_replace( array('{source}','{part}', '{target_dir}'), array($this->sourcefile, 'content.xml',$this->openxml_dir), $this->unzip_params); @@ -105,6 +117,7 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor $this->output = _kt('Failed to execute command: ') . $cmd; return false; } + *** End unzip code *** */ $filename = $this->openxml_dir . '/content.xml'; if (!file_exists($filename)) @@ -134,4 +147,4 @@ class OpenOfficeTextExtractor extends ExternalDocumentExtractor } -?> +?> \ No newline at end of file diff --git a/search2/indexing/extractors/OpenXmlTextExtractor.inc.php b/search2/indexing/extractors/OpenXmlTextExtractor.inc.php index 79ac182..c710a39 100644 --- a/search2/indexing/extractors/OpenXmlTextExtractor.inc.php +++ b/search2/indexing/extractors/OpenXmlTextExtractor.inc.php @@ -43,9 +43,11 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor { $config = KTConfig::getSingleton(); + /* ** Using peclzip instead of the unzip binary ** $this->unzip = KTUtil::findCommand("import/unzip", 'unzip'); $this->unzip = str_replace('\\','/',$this->unzip); $this->unzip_params = $config->get('extractorParameters/unzip', '"{source}" "{part}" -d "{target_dir}"'); + */ parent::__construct(); } @@ -158,6 +160,14 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor $this->sourcefile = str_replace('\\','/',$this->sourcefile); $this->openxml_dir = str_replace('\\','/',$this->openxml_dir); + $archive = new PclZip($this->sourcefile); + + if ($archive->extract(PCLZIP_OPT_PATH, $this->openxml_dir) == 0){ + $this->output = _kt('Failed to extract content'); + return false; + } + + /* *** Original code using the unzip binary *** $cmd = '"' . $this->unzip . '"' . ' ' . str_replace( array('{source}','{part}', '{target_dir}'), array($this->sourcefile, '*Content_Types*.xml',$this->openxml_dir), $this->unzip_params); @@ -169,6 +179,7 @@ class OpenXmlTextExtractor extends ExternalDocumentExtractor $this->output = _kt('Failed to execute command: ') . $cmd; return false; } + *** End unzip code *** */ $filename = $this->openxml_dir . '/[Content_Types].xml'; if (!file_exists($filename))