Commit 68f26be8ad91ab691729d466c6d01959231cfc9c

Authored by Bryn Divey
1 parent 3b597345

Most indexers now index UTF-8 on Linux


git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5525 c91229c3-7414-0410-bfa2-8a42b809f60b
plugins/ktstandard/contents/BaseIndexer.php
... ... @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger {
99 99  
100 100 $contents = $this->extract_contents($intermediate, $myfilename);
101 101  
102   - unlink($myfilename);
103   - if (OS_WINDOWS) { unlink($intermediate); }
  102 + @unlink($myfilename);
  103 + if (OS_WINDOWS) { @unlink($intermediate); }
104 104 if (empty($contents)) {
105 105 return;
106 106 }
... ... @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger {
136 136 }
137 137 KTUtil::pexec($cmdline, $aOptions);
138 138 $contents = file_get_contents($sTempFilename);
139   -
  139 +
140 140 return $contents;
141 141 }
142 142 }
... ...
plugins/ktstandard/contents/ExcelIndexer.php
... ... @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger {
36 36 );
37 37 var $command = 'xls2csv'; // could be any application.
38 38 var $commandconfig = 'indexer/xls2csv'; // could be any application.
39   - var $args = array("-q", "0", "-c", " ");
  39 + var $args = array("-d", "UTF-8", "-q", "0", "-c", " ");
40 40 var $use_pipes = true;
41 41  
42 42 // see BaseIndexer for how the extraction works.
43 43 //
44 44 function extract_contents($sFilename, $sTempFilename) {
45 45 if (!OS_WINDOWS) {
  46 + putenv('LANG=en_US.UTF-8');
46 47 $res = parent::extract_contents($sFilename, $sTempFilename);
47 48 if (!empty($res)) {
48 49 return $res;
... ...
plugins/ktstandard/contents/PdfIndexer.php
... ... @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger {
34 34 );
35 35 var $command = 'pdftotext'; // could be any application.
36 36 var $commandconfig = 'indexer/pdftotext'; // could be any application.
37   - var $args = array("-nopgbrk");
  37 + var $args = array("-nopgbrk", "-enc", "UTF-8");
38 38 var $use_pipes = false;
39 39  
40 40 // see BaseIndexer for how the extraction works.
41 41 function findLocalCommand() {
  42 + putenv('LANG=en_US.UTF-8');
42 43 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
43 44 return $sCommand;
44 45 }
... ...
plugins/ktstandard/contents/RtfIndexer.php
... ... @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger {
34 34 );
35 35 var $command = 'catdoc'; // could be any application.
36 36 var $commandconfig = 'indexer/catdoc'; // could be any application.
37   - var $args = array("-w", "-d", "utf-8");
  37 + var $args = array("-w", "-d", "UTF-8");
38 38 var $use_pipes = true;
39 39  
40 40 function findLocalCommand() {
41 41 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
  42 + putenv('LANG=en_US.UTF-8');
42 43 return $sCommand;
43 44 }
44 45  
... ...
plugins/ktstandard/contents/WordIndexer.php
... ... @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger {
34 34 );
35 35 var $command = 'catdoc'; // could be any application.
36 36 var $commandconfig = 'indexer/catdoc'; // could be any application.
37   - var $args = array("-w");
  37 + var $args = array("-w", "-d", "UTF-8");
38 38 var $use_pipes = true;
39 39  
40 40 function extract_contents($sFilename, $sTempFilename) {
... ... @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger {
47 47 $sDir = dirname(dirname($sCommand));
48 48 putenv('HOME=' . $sDir);
49 49 }
  50 + putenv('LANG=en_US.UTF-8');
50 51 return parent::extract_contents($sFilename, $sTempFilename);
51 52 }
52 53  
... ...