Commit 319ec2a8d18e3603ae908a49005d516b317eb431

Authored by kevin_fourie
1 parent 5df9da16

Merged in from DEV trunk...

KTS-673
"The search algorithm needs some work"
Updated. Removed internal metadata references

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-2529
"Test open office document extractor"
Fixed. Extractor script was not taking host and port correctly

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-673
"The search algorithm needs some work"
Updated. Removed experimental indexer

Committed By: Conrad Vermeulen
Reviewed By: Kevin Fourie

KTS-2525
"Create windows service to wrap around scheduler"
Updated. Changed bat file to dos filetype.

Committed By: Kevin Fourie
Reviewed By: Conrad Vermeulen

git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/STABLE/trunk@7462 c91229c3-7414-0410-bfa2-8a42b809f60b
bin/openoffice/DocumentConverter.py
... ... @@ -80,7 +80,7 @@ def _unoProps(**args):
80 80  
81 81 class DocumentConverter:
82 82  
83   - def __init__(self, host=argv[3], port=argv[4]):
  83 + def __init__(self, host, port):
84 84 localContext = uno.getComponentContext()
85 85 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
86 86 try:
... ... @@ -133,7 +133,7 @@ if __name__ == "__main__":
133 133 exit(255)
134 134  
135 135 try:
136   - converter = DocumentConverter()
  136 + converter = DocumentConverter(argv[3],argv[4])
137 137 converter.convert(argv[1], argv[2])
138 138 except DocumentConversionException, exception:
139 139 print "ERROR! " + str(exception)
... ...
bin/win32/taskrunner.bat
1   -@echo off
2   -"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php"
3   -
  1 +@echo off
  2 +"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php"
  3 +
... ...
search2/indexing/extractors/ExifExtractor.inc.php
... ... @@ -27,6 +27,11 @@ class ExifExtractor extends DocumentExtractor
27 27 // no point indexing numeric content. it will be ignored anyways!
28 28 continue;
29 29 }
  30 + if ($key =='FILE' && in_array($name, array('MimeType', 'SectionsFound')))
  31 + {
  32 + continue;
  33 + }
  34 +
30 35 $content .= "$val\n";
31 36 }
32 37 }
... ...
search2/indexing/extractors/OOPDFTextExtractor.inc.php deleted
1   -<?php
2   -
3   -require_once('PDFExtractor.inc.php');
4   -require_once('OOTextExtractor.inc.php');
5   -
6   -class OOPDFTextExtractor extends CompositeExtractor
7   -{
8   - public function __construct()
9   - {
10   - parent::__construct(new OOTextExtractor('application/pdf'),'pdf','application/pdf',new PDFExtractor(), true);
11   - }
12   -
13   - public function getSupportedMimeTypes()
14   - {
15   - // we provide this so diagnose doesn't fail
16   - return array();
17   - }
18   -
19   - public function getDisplayName()
20   - {
21   - // we provide this so diagnose doesn't fail
22   - throw new Exception(_kt('This should be overriden'));
23   - }
24   -
25   -}
26   -
27   -/*
28   -class OOPDFTextExtractor extends DocumentExtractor
29   -{
30   -
31   - private $pdf2txt;
32   -
33   -
34   - private $text2pdf;
35   -
36   - public function __construct()
37   - {
38   - $this->pdf2txt = new PDFExtractor();
39   - $this->text2pdf = new OOTextExtractor();
40   - }
41   -
42   - public function needsIntermediateSourceFile()
43   - {
44   - // we need the intermediate file because it
45   - // has the correct extension. jodconverter uses the extension to determine mimetype
46   - return true;
47   - }
48   -
49   - public function getDisplayName()
50   - {
51   - throw new Exception('This should be overriden');
52   - }
53   -
54   - public function getSupportedMimeTypes()
55   - {
56   - return array();
57   - }
58   -
59   - public function extractTextContent()
60   - {
61   - $pdffile = $this->targetfile . '.pdf';
62   -
63   - $this->text2pdf->setSourceFile($this->sourcefile);
64   - $this->text2pdf->setTargetFile($pdffile);
65   - $this->text2pdf->setMimeType($this->mimetype);
66   - $this->text2pdf->setExtension($this->extension);
67   - if ($this->extractTextContent())
68   - {
69   - return false;
70   - }
71   -
72   - $this->pdf2txt->setSourceFile($pdffile);
73   - $this->pdf2txt->setTargetFile($this->targetfile);
74   - $this->pdf2txt->setMimeType('application/pdf');
75   - $this->pdf2txt->setExtension('pdf');
76   - $result = $this->pdf2txt->extractTextContent();
77   -
78   - unlink(@$pdffile);
79   -
80   - return $result;
81   - }
82   -
83   - public function diagnose()
84   - {
85   - $diagnosis = $this->pdf2txt->diagnose();
86   - if (!empty($diagnosis))
87   - {
88   - return $diagnosis;
89   - }
90   -
91   - $diagnosis = $this->text2pdf->diagnose();
92   - if (!empty($diagnosis))
93   - {
94   - return $diagnosis;
95   - }
96   -
97   - return null;
98   - }
99   -} */
100   -
101   -?>
102 0 \ No newline at end of file