Commit 68f26be8ad91ab691729d466c6d01959231cfc9c
1 parent
3b597345
Most indexers now index UTF-8 on Linux
git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5525 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
5 changed files
with
11 additions
and
7 deletions
plugins/ktstandard/contents/BaseIndexer.php
| @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger { | @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger { | ||
| 99 | 99 | ||
| 100 | $contents = $this->extract_contents($intermediate, $myfilename); | 100 | $contents = $this->extract_contents($intermediate, $myfilename); |
| 101 | 101 | ||
| 102 | - unlink($myfilename); | ||
| 103 | - if (OS_WINDOWS) { unlink($intermediate); } | 102 | + @unlink($myfilename); |
| 103 | + if (OS_WINDOWS) { @unlink($intermediate); } | ||
| 104 | if (empty($contents)) { | 104 | if (empty($contents)) { |
| 105 | return; | 105 | return; |
| 106 | } | 106 | } |
| @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger { | @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger { | ||
| 136 | } | 136 | } |
| 137 | KTUtil::pexec($cmdline, $aOptions); | 137 | KTUtil::pexec($cmdline, $aOptions); |
| 138 | $contents = file_get_contents($sTempFilename); | 138 | $contents = file_get_contents($sTempFilename); |
| 139 | - | 139 | + |
| 140 | return $contents; | 140 | return $contents; |
| 141 | } | 141 | } |
| 142 | } | 142 | } |
plugins/ktstandard/contents/ExcelIndexer.php
| @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger { | @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger { | ||
| 36 | ); | 36 | ); |
| 37 | var $command = 'xls2csv'; // could be any application. | 37 | var $command = 'xls2csv'; // could be any application. |
| 38 | var $commandconfig = 'indexer/xls2csv'; // could be any application. | 38 | var $commandconfig = 'indexer/xls2csv'; // could be any application. |
| 39 | - var $args = array("-q", "0", "-c", " "); | 39 | + var $args = array("-d", "UTF-8", "-q", "0", "-c", " "); |
| 40 | var $use_pipes = true; | 40 | var $use_pipes = true; |
| 41 | 41 | ||
| 42 | // see BaseIndexer for how the extraction works. | 42 | // see BaseIndexer for how the extraction works. |
| 43 | // | 43 | // |
| 44 | function extract_contents($sFilename, $sTempFilename) { | 44 | function extract_contents($sFilename, $sTempFilename) { |
| 45 | if (!OS_WINDOWS) { | 45 | if (!OS_WINDOWS) { |
| 46 | + putenv('LANG=en_US.UTF-8'); | ||
| 46 | $res = parent::extract_contents($sFilename, $sTempFilename); | 47 | $res = parent::extract_contents($sFilename, $sTempFilename); |
| 47 | if (!empty($res)) { | 48 | if (!empty($res)) { |
| 48 | return $res; | 49 | return $res; |
plugins/ktstandard/contents/PdfIndexer.php
| @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger { | @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger { | ||
| 34 | ); | 34 | ); |
| 35 | var $command = 'pdftotext'; // could be any application. | 35 | var $command = 'pdftotext'; // could be any application. |
| 36 | var $commandconfig = 'indexer/pdftotext'; // could be any application. | 36 | var $commandconfig = 'indexer/pdftotext'; // could be any application. |
| 37 | - var $args = array("-nopgbrk"); | 37 | + var $args = array("-nopgbrk", "-enc", "UTF-8"); |
| 38 | var $use_pipes = false; | 38 | var $use_pipes = false; |
| 39 | 39 | ||
| 40 | // see BaseIndexer for how the extraction works. | 40 | // see BaseIndexer for how the extraction works. |
| 41 | function findLocalCommand() { | 41 | function findLocalCommand() { |
| 42 | + putenv('LANG=en_US.UTF-8'); | ||
| 42 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); | 43 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); |
| 43 | return $sCommand; | 44 | return $sCommand; |
| 44 | } | 45 | } |
plugins/ktstandard/contents/RtfIndexer.php
| @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger { | @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger { | ||
| 34 | ); | 34 | ); |
| 35 | var $command = 'catdoc'; // could be any application. | 35 | var $command = 'catdoc'; // could be any application. |
| 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. | 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. |
| 37 | - var $args = array("-w", "-d", "utf-8"); | 37 | + var $args = array("-w", "-d", "UTF-8"); |
| 38 | var $use_pipes = true; | 38 | var $use_pipes = true; |
| 39 | 39 | ||
| 40 | function findLocalCommand() { | 40 | function findLocalCommand() { |
| 41 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); | 41 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); |
| 42 | + putenv('LANG=en_US.UTF-8'); | ||
| 42 | return $sCommand; | 43 | return $sCommand; |
| 43 | } | 44 | } |
| 44 | 45 |
plugins/ktstandard/contents/WordIndexer.php
| @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { | @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { | ||
| 34 | ); | 34 | ); |
| 35 | var $command = 'catdoc'; // could be any application. | 35 | var $command = 'catdoc'; // could be any application. |
| 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. | 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. |
| 37 | - var $args = array("-w"); | 37 | + var $args = array("-w", "-d", "UTF-8"); |
| 38 | var $use_pipes = true; | 38 | var $use_pipes = true; |
| 39 | 39 | ||
| 40 | function extract_contents($sFilename, $sTempFilename) { | 40 | function extract_contents($sFilename, $sTempFilename) { |
| @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { | @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { | ||
| 47 | $sDir = dirname(dirname($sCommand)); | 47 | $sDir = dirname(dirname($sCommand)); |
| 48 | putenv('HOME=' . $sDir); | 48 | putenv('HOME=' . $sDir); |
| 49 | } | 49 | } |
| 50 | + putenv('LANG=en_US.UTF-8'); | ||
| 50 | return parent::extract_contents($sFilename, $sTempFilename); | 51 | return parent::extract_contents($sFilename, $sTempFilename); |
| 51 | } | 52 | } |
| 52 | 53 |