Commit 68f26be8ad91ab691729d466c6d01959231cfc9c

Authored by Bryn Divey
1 parent 3b597345

Most indexers now index UTF-8 on Linux


git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5525 c91229c3-7414-0410-bfa2-8a42b809f60b
plugins/ktstandard/contents/BaseIndexer.php
@@ -99,8 +99,8 @@ class KTBaseIndexerTrigger { @@ -99,8 +99,8 @@ class KTBaseIndexerTrigger {
99 99
100 $contents = $this->extract_contents($intermediate, $myfilename); 100 $contents = $this->extract_contents($intermediate, $myfilename);
101 101
102 - unlink($myfilename);  
103 - if (OS_WINDOWS) { unlink($intermediate); } 102 + @unlink($myfilename);
  103 + if (OS_WINDOWS) { @unlink($intermediate); }
104 if (empty($contents)) { 104 if (empty($contents)) {
105 return; 105 return;
106 } 106 }
@@ -136,7 +136,7 @@ class KTBaseIndexerTrigger { @@ -136,7 +136,7 @@ class KTBaseIndexerTrigger {
136 } 136 }
137 KTUtil::pexec($cmdline, $aOptions); 137 KTUtil::pexec($cmdline, $aOptions);
138 $contents = file_get_contents($sTempFilename); 138 $contents = file_get_contents($sTempFilename);
139 - 139 +
140 return $contents; 140 return $contents;
141 } 141 }
142 } 142 }
plugins/ktstandard/contents/ExcelIndexer.php
@@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger { @@ -36,13 +36,14 @@ class KTExcelIndexerTrigger extends KTBaseIndexerTrigger {
36 ); 36 );
37 var $command = 'xls2csv'; // could be any application. 37 var $command = 'xls2csv'; // could be any application.
38 var $commandconfig = 'indexer/xls2csv'; // could be any application. 38 var $commandconfig = 'indexer/xls2csv'; // could be any application.
39 - var $args = array("-q", "0", "-c", " "); 39 + var $args = array("-d", "UTF-8", "-q", "0", "-c", " ");
40 var $use_pipes = true; 40 var $use_pipes = true;
41 41
42 // see BaseIndexer for how the extraction works. 42 // see BaseIndexer for how the extraction works.
43 // 43 //
44 function extract_contents($sFilename, $sTempFilename) { 44 function extract_contents($sFilename, $sTempFilename) {
45 if (!OS_WINDOWS) { 45 if (!OS_WINDOWS) {
  46 + putenv('LANG=en_US.UTF-8');
46 $res = parent::extract_contents($sFilename, $sTempFilename); 47 $res = parent::extract_contents($sFilename, $sTempFilename);
47 if (!empty($res)) { 48 if (!empty($res)) {
48 return $res; 49 return $res;
plugins/ktstandard/contents/PdfIndexer.php
@@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger { @@ -34,11 +34,12 @@ class KTPdfIndexerTrigger extends KTBaseIndexerTrigger {
34 ); 34 );
35 var $command = 'pdftotext'; // could be any application. 35 var $command = 'pdftotext'; // could be any application.
36 var $commandconfig = 'indexer/pdftotext'; // could be any application. 36 var $commandconfig = 'indexer/pdftotext'; // could be any application.
37 - var $args = array("-nopgbrk"); 37 + var $args = array("-nopgbrk", "-enc", "UTF-8");
38 var $use_pipes = false; 38 var $use_pipes = false;
39 39
40 // see BaseIndexer for how the extraction works. 40 // see BaseIndexer for how the extraction works.
41 function findLocalCommand() { 41 function findLocalCommand() {
  42 + putenv('LANG=en_US.UTF-8');
42 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); 43 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
43 return $sCommand; 44 return $sCommand;
44 } 45 }
plugins/ktstandard/contents/RtfIndexer.php
@@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger { @@ -34,11 +34,12 @@ class KTRtfIndexerTrigger extends KTBaseIndexerTrigger {
34 ); 34 );
35 var $command = 'catdoc'; // could be any application. 35 var $command = 'catdoc'; // could be any application.
36 var $commandconfig = 'indexer/catdoc'; // could be any application. 36 var $commandconfig = 'indexer/catdoc'; // could be any application.
37 - var $args = array("-w", "-d", "utf-8"); 37 + var $args = array("-w", "-d", "UTF-8");
38 var $use_pipes = true; 38 var $use_pipes = true;
39 39
40 function findLocalCommand() { 40 function findLocalCommand() {
41 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command); 41 $sCommand = KTUtil::findCommand($this->commandconfig, $this->command);
  42 + putenv('LANG=en_US.UTF-8');
42 return $sCommand; 43 return $sCommand;
43 } 44 }
44 45
plugins/ktstandard/contents/WordIndexer.php
@@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { @@ -34,7 +34,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger {
34 ); 34 );
35 var $command = 'catdoc'; // could be any application. 35 var $command = 'catdoc'; // could be any application.
36 var $commandconfig = 'indexer/catdoc'; // could be any application. 36 var $commandconfig = 'indexer/catdoc'; // could be any application.
37 - var $args = array("-w"); 37 + var $args = array("-w", "-d", "UTF-8");
38 var $use_pipes = true; 38 var $use_pipes = true;
39 39
40 function extract_contents($sFilename, $sTempFilename) { 40 function extract_contents($sFilename, $sTempFilename) {
@@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger { @@ -47,6 +47,7 @@ class KTWordIndexerTrigger extends KTBaseIndexerTrigger {
47 $sDir = dirname(dirname($sCommand)); 47 $sDir = dirname(dirname($sCommand));
48 putenv('HOME=' . $sDir); 48 putenv('HOME=' . $sDir);
49 } 49 }
  50 + putenv('LANG=en_US.UTF-8');
50 return parent::extract_contents($sFilename, $sTempFilename); 51 return parent::extract_contents($sFilename, $sTempFilename);
51 } 52 }
52 53