From 68f26be8ad91ab691729d466c6d01959231cfc9c Mon Sep 17 00:00:00 2001 From: Bryn Divey Date: Wed, 7 Jun 2006 11:23:59 +0000 Subject: [PATCH] Most indexers now index UTF-8 on Linux --- plugins/ktstandard/contents/BaseIndexer.php | 6 +++--- plugins/ktstandard/contents/ExcelIndexer.php | 3 ++- plugins/ktstandard/contents/PdfIndexer.php | 3 ++- plugins/ktstandard/contents/RtfIndexer.php | 3 ++- plugins/ktstandard/contents/WordIndexer.php | 3 ++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/plugins/ktstandard/contents/BaseIndexer.php b/plugins/ktstandard/contents/BaseIndexer.php index a3725a4..dd43482 100644 --- a/plugins/ktstandard/contents/BaseIndexer.php +++ b/plugins/ktstandard/contents/BaseIndexer.php @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger { $contents = $this->extract_contents($intermediate, $myfilename); - unlink($myfilename); - if (OS_WINDOWS) { unlink($intermediate); } + @unlink($myfilename); + if (OS_WINDOWS) { @unlink($intermediate); } if (empty($contents)) { return; } @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger { } KTUtil::pexec($cmdline, $aOptions); $contents = file_get_contents($sTempFilename); - + return $contents; } } diff --git a/plugins/ktstandard/contents/ExcelIndexer.php b/plugins/ktstandard/contents/ExcelIndexer.php index d0b285a..12bef16 100644 --- a/plugins/ktstandard/contents/ExcelIndexer.php +++ b/plugins/ktstandard/contents/ExcelIndexer.php @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger { ); var $command = 'xls2csv'; // could be any application. var $commandconfig = 'indexer/xls2csv'; // could be any application. - var $args = array("-q", "0", "-c", " "); + var $args = array("-d", "UTF-8", "-q", "0", "-c", " "); var $use_pipes = true; // see BaseIndexer for how the extraction works. // function extract_contents($sFilename, $sTempFilename) { if (!OS_WINDOWS) { + putenv('LANG=en_US.UTF-8'); $res = parent::extract_contents($sFilename, $sTempFilename); if (!empty($res)) { return $res; diff --git a/plugins/ktstandard/contents/PdfIndexer.php b/plugins/ktstandard/contents/PdfIndexer.php index 21dce49..6bde8a2 100644 --- a/plugins/ktstandard/contents/PdfIndexer.php +++ b/plugins/ktstandard/contents/PdfIndexer.php @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger { ); var $command = 'pdftotext'; // could be any application. var $commandconfig = 'indexer/pdftotext'; // could be any application. - var $args = array("-nopgbrk"); + var $args = array("-nopgbrk", "-enc", "UTF-8"); var $use_pipes = false; // see BaseIndexer for how the extraction works. function findLocalCommand() { + putenv('LANG=en_US.UTF-8'); $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); return $sCommand; } diff --git a/plugins/ktstandard/contents/RtfIndexer.php b/plugins/ktstandard/contents/RtfIndexer.php index 65d16c2..79a8a30 100644 --- a/plugins/ktstandard/contents/RtfIndexer.php +++ b/plugins/ktstandard/contents/RtfIndexer.php @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger { ); var $command = 'catdoc'; // could be any application. var $commandconfig = 'indexer/catdoc'; // could be any application. - var $args = array("-w", "-d", "utf-8"); + var $args = array("-w", "-d", "UTF-8"); var $use_pipes = true; function findLocalCommand() { $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); + putenv('LANG=en_US.UTF-8'); return $sCommand; } diff --git a/plugins/ktstandard/contents/WordIndexer.php b/plugins/ktstandard/contents/WordIndexer.php index 573c6f5..326ef6b 100644 --- a/plugins/ktstandard/contents/WordIndexer.php +++ b/plugins/ktstandard/contents/WordIndexer.php @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { ); var $command = 'catdoc'; // could be any application. var $commandconfig = 'indexer/catdoc'; // could be any application. - var $args = array("-w"); + var $args = array("-w", "-d", "UTF-8"); var $use_pipes = true; function extract_contents($sFilename, $sTempFilename) { @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { $sDir = dirname(dirname($sCommand)); putenv('HOME=' . $sDir); } + putenv('LANG=en_US.UTF-8'); return parent::extract_contents($sFilename, $sTempFilename); } -- libgit2 0.21.4