Commit 68f26be8ad91ab691729d466c6d01959231cfc9c
1 parent
3b597345
Most indexers now index UTF-8 on Linux
git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5525 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
5 changed files
with
11 additions
and
7 deletions
plugins/ktstandard/contents/BaseIndexer.php
| ... | ... | @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger { |
| 99 | 99 | |
| 100 | 100 | $contents = $this->extract_contents($intermediate, $myfilename); |
| 101 | 101 | |
| 102 | - unlink($myfilename); | |
| 103 | - if (OS_WINDOWS) { unlink($intermediate); } | |
| 102 | + @unlink($myfilename); | |
| 103 | + if (OS_WINDOWS) { @unlink($intermediate); } | |
| 104 | 104 | if (empty($contents)) { |
| 105 | 105 | return; |
| 106 | 106 | } |
| ... | ... | @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger { |
| 136 | 136 | } |
| 137 | 137 | KTUtil::pexec($cmdline, $aOptions); |
| 138 | 138 | $contents = file_get_contents($sTempFilename); |
| 139 | - | |
| 139 | + | |
| 140 | 140 | return $contents; |
| 141 | 141 | } |
| 142 | 142 | } | ... | ... |
plugins/ktstandard/contents/ExcelIndexer.php
| ... | ... | @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger { |
| 36 | 36 | ); |
| 37 | 37 | var $command = 'xls2csv'; // could be any application. |
| 38 | 38 | var $commandconfig = 'indexer/xls2csv'; // could be any application. |
| 39 | - var $args = array("-q", "0", "-c", " "); | |
| 39 | + var $args = array("-d", "UTF-8", "-q", "0", "-c", " "); | |
| 40 | 40 | var $use_pipes = true; |
| 41 | 41 | |
| 42 | 42 | // see BaseIndexer for how the extraction works. |
| 43 | 43 | // |
| 44 | 44 | function extract_contents($sFilename, $sTempFilename) { |
| 45 | 45 | if (!OS_WINDOWS) { |
| 46 | + putenv('LANG=en_US.UTF-8'); | |
| 46 | 47 | $res = parent::extract_contents($sFilename, $sTempFilename); |
| 47 | 48 | if (!empty($res)) { |
| 48 | 49 | return $res; | ... | ... |
plugins/ktstandard/contents/PdfIndexer.php
| ... | ... | @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger { |
| 34 | 34 | ); |
| 35 | 35 | var $command = 'pdftotext'; // could be any application. |
| 36 | 36 | var $commandconfig = 'indexer/pdftotext'; // could be any application. |
| 37 | - var $args = array("-nopgbrk"); | |
| 37 | + var $args = array("-nopgbrk", "-enc", "UTF-8"); | |
| 38 | 38 | var $use_pipes = false; |
| 39 | 39 | |
| 40 | 40 | // see BaseIndexer for how the extraction works. |
| 41 | 41 | function findLocalCommand() { |
| 42 | + putenv('LANG=en_US.UTF-8'); | |
| 42 | 43 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); |
| 43 | 44 | return $sCommand; |
| 44 | 45 | } | ... | ... |
plugins/ktstandard/contents/RtfIndexer.php
| ... | ... | @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger { |
| 34 | 34 | ); |
| 35 | 35 | var $command = 'catdoc'; // could be any application. |
| 36 | 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. |
| 37 | - var $args = array("-w", "-d", "utf-8"); | |
| 37 | + var $args = array("-w", "-d", "UTF-8"); | |
| 38 | 38 | var $use_pipes = true; |
| 39 | 39 | |
| 40 | 40 | function findLocalCommand() { |
| 41 | 41 | $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); |
| 42 | + putenv('LANG=en_US.UTF-8'); | |
| 42 | 43 | return $sCommand; |
| 43 | 44 | } |
| 44 | 45 | ... | ... |
plugins/ktstandard/contents/WordIndexer.php
| ... | ... | @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { |
| 34 | 34 | ); |
| 35 | 35 | var $command = 'catdoc'; // could be any application. |
| 36 | 36 | var $commandconfig = 'indexer/catdoc'; // could be any application. |
| 37 | - var $args = array("-w"); | |
| 37 | + var $args = array("-w", "-d", "UTF-8"); | |
| 38 | 38 | var $use_pipes = true; |
| 39 | 39 | |
| 40 | 40 | function extract_contents($sFilename, $sTempFilename) { |
| ... | ... | @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { |
| 47 | 47 | $sDir = dirname(dirname($sCommand)); |
| 48 | 48 | putenv('HOME=' . $sDir); |
| 49 | 49 | } |
| 50 | + putenv('LANG=en_US.UTF-8'); | |
| 50 | 51 | return parent::extract_contents($sFilename, $sTempFilename); |
| 51 | 52 | } |
| 52 | 53 | ... | ... |