Commit 319ec2a8d18e3603ae908a49005d516b317eb431

Authored by kevin_fourie
1 parent 5df9da16

Merged in from DEV trunk...

KTS-673
"The search algorithm needs some work"
Updated. Removed internal metadata references

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-2529
"Test open office document extractor"
Fixed. Extractor script was not taking host and port correctly

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-673
"The search algorithm needs some work"
Updated. Removed experimental indexer

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-2525
"Create windows service to wrap around scheduler"
Updated. Changed bat file to dos filetype.

Committed By: Kevin Fourie
Reviewed By: Conrad Vermeulen

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/STABLE/trunk@7462 c91229c3-7414-0410-bfa2-8a42b809f60b
bin/openoffice/DocumentConverter.py
@@ -80,7 +80,7 @@ def _unoProps(**args): @@ -80,7 +80,7 @@ def _unoProps(**args):
80 80
81 class DocumentConverter: 81 class DocumentConverter:
82 82
83 - def __init__(self, host=argv[3], port=argv[4]): 83 + def __init__(self, host, port):
84 localContext = uno.getComponentContext() 84 localContext = uno.getComponentContext()
85 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) 85 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
86 try: 86 try:
@@ -133,7 +133,7 @@ if __name__ == "__main__": @@ -133,7 +133,7 @@ if __name__ == "__main__":
133 exit(255) 133 exit(255)
134 134
135 try: 135 try:
136 - converter = DocumentConverter() 136 + converter = DocumentConverter(argv[3],argv[4])
137 converter.convert(argv[1], argv[2]) 137 converter.convert(argv[1], argv[2])
138 except DocumentConversionException, exception: 138 except DocumentConversionException, exception:
139 print "ERROR! " + str(exception) 139 print "ERROR! " + str(exception)
bin/win32/taskrunner.bat
1 -@echo off  
2 -"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php"  
3 - 1 +@echo off
  2 +"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php"
  3 +
search2/indexing/extractors/ExifExtractor.inc.php
@@ -27,6 +27,11 @@ class ExifExtractor extends DocumentExtractor @@ -27,6 +27,11 @@ class ExifExtractor extends DocumentExtractor
27 // no point indexing numeric content. it will be ignored anyways! 27 // no point indexing numeric content. it will be ignored anyways!
28 continue; 28 continue;
29 } 29 }
  30 + if ($key =='FILE' && in_array($name, array('MimeType', 'SectionsFound')))
  31 + {
  32 + continue;
  33 + }
  34 +
30 $content .= "$val\n"; 35 $content .= "$val\n";
31 } 36 }
32 } 37 }
search2/indexing/extractors/OOPDFTextExtractor.inc.php deleted
1 -<?php  
2 -  
3 -require_once('PDFExtractor.inc.php');  
4 -require_once('OOTextExtractor.inc.php');  
5 -  
6 -class OOPDFTextExtractor extends CompositeExtractor  
7 -{  
8 - public function __construct()  
9 - {  
10 - parent::__construct(new OOTextExtractor('application/pdf'),'pdf','application/pdf',new PDFExtractor(), true);  
11 - }  
12 -  
13 - public function getSupportedMimeTypes()  
14 - {  
15 - // we provide this so diagnose doesn't fail  
16 - return array();  
17 - }  
18 -  
19 - public function getDisplayName()  
20 - {  
21 - // we provide this so diagnose doesn't fail  
22 - throw new Exception(_kt('This should be overriden'));  
23 - }  
24 -  
25 -}  
26 -  
27 -/*  
28 -class OOPDFTextExtractor extends DocumentExtractor  
29 -{  
30 -  
31 - private $pdf2txt;  
32 -  
33 -  
34 - private $text2pdf;  
35 -  
36 - public function __construct()  
37 - {  
38 - $this->pdf2txt = new PDFExtractor();  
39 - $this->text2pdf = new OOTextExtractor();  
40 - }  
41 -  
42 - public function needsIntermediateSourceFile()  
43 - {  
44 - // we need the intermediate file because it  
45 - // has the correct extension. jodconverter uses the extension to determine mimetype  
46 - return true;  
47 - }  
48 -  
49 - public function getDisplayName()  
50 - {  
51 - throw new Exception('This should be overriden');  
52 - }  
53 -  
54 - public function getSupportedMimeTypes()  
55 - {  
56 - return array();  
57 - }  
58 -  
59 - public function extractTextContent()  
60 - {  
61 - $pdffile = $this->targetfile . '.pdf';  
62 -  
63 - $this->text2pdf->setSourceFile($this->sourcefile);  
64 - $this->text2pdf->setTargetFile($pdffile);  
65 - $this->text2pdf->setMimeType($this->mimetype);  
66 - $this->text2pdf->setExtension($this->extension);  
67 - if ($this->extractTextContent())  
68 - {  
69 - return false;  
70 - }  
71 -  
72 - $this->pdf2txt->setSourceFile($pdffile);  
73 - $this->pdf2txt->setTargetFile($this->targetfile);  
74 - $this->pdf2txt->setMimeType('application/pdf');  
75 - $this->pdf2txt->setExtension('pdf');  
76 - $result = $this->pdf2txt->extractTextContent();  
77 -  
78 - unlink(@$pdffile);  
79 -  
80 - return $result;  
81 - }  
82 -  
83 - public function diagnose()  
84 - {  
85 - $diagnosis = $this->pdf2txt->diagnose();  
86 - if (!empty($diagnosis))  
87 - {  
88 - return $diagnosis;  
89 - }  
90 -  
91 - $diagnosis = $this->text2pdf->diagnose();  
92 - if (!empty($diagnosis))  
93 - {  
94 - return $diagnosis;  
95 - }  
96 -  
97 - return null;  
98 - }  
99 -} */  
100 -  
101 -?>  
102 \ No newline at end of file 0 \ No newline at end of file