diff --git a/bin/openoffice/DocumentConverter.py b/bin/openoffice/DocumentConverter.py index f03965f..cb18b65 100644 --- a/bin/openoffice/DocumentConverter.py +++ b/bin/openoffice/DocumentConverter.py @@ -80,7 +80,7 @@ def _unoProps(**args): class DocumentConverter: - def __init__(self, host=argv[3], port=argv[4]): + def __init__(self, host, port): localContext = uno.getComponentContext() resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) try: @@ -133,7 +133,7 @@ if __name__ == "__main__": exit(255) try: - converter = DocumentConverter() + converter = DocumentConverter(argv[3],argv[4]) converter.convert(argv[1], argv[2]) except DocumentConversionException, exception: print "ERROR! " + str(exception) diff --git a/bin/win32/taskrunner.bat b/bin/win32/taskrunner.bat index dbed1f4..290e4c9 100644 --- a/bin/win32/taskrunner.bat +++ b/bin/win32/taskrunner.bat @@ -1,3 +1,3 @@ -@echo off -"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php" - +@echo off +"@@BITROCK_INSTALLDIR@@\php\php.exe" "@@BITROCK_INSTALLDIR@@\knowledgeTree\bin\win32\schedulerService.php" + diff --git a/search2/indexing/extractors/ExifExtractor.inc.php b/search2/indexing/extractors/ExifExtractor.inc.php index 12311fa..1bee685 100644 --- a/search2/indexing/extractors/ExifExtractor.inc.php +++ b/search2/indexing/extractors/ExifExtractor.inc.php @@ -27,6 +27,11 @@ class ExifExtractor extends DocumentExtractor // no point indexing numeric content. it will be ignored anyways! continue; } + if ($key =='FILE' && in_array($name, array('MimeType', 'SectionsFound'))) + { + continue; + } + $content .= "$val\n"; } } diff --git a/search2/indexing/extractors/OOPDFTextExtractor.inc.php b/search2/indexing/extractors/OOPDFTextExtractor.inc.php deleted file mode 100644 index 1ce3ef3..0000000 --- a/search2/indexing/extractors/OOPDFTextExtractor.inc.php +++ /dev/null @@ -1,101 +0,0 @@ -pdf2txt = new PDFExtractor(); - $this->text2pdf = new OOTextExtractor(); - } - - public function needsIntermediateSourceFile() - { - // we need the intermediate file because it - // has the correct extension. jodconverter uses the extension to determine mimetype - return true; - } - - public function getDisplayName() - { - throw new Exception('This should be overriden'); - } - - public function getSupportedMimeTypes() - { - return array(); - } - - public function extractTextContent() - { - $pdffile = $this->targetfile . '.pdf'; - - $this->text2pdf->setSourceFile($this->sourcefile); - $this->text2pdf->setTargetFile($pdffile); - $this->text2pdf->setMimeType($this->mimetype); - $this->text2pdf->setExtension($this->extension); - if ($this->extractTextContent()) - { - return false; - } - - $this->pdf2txt->setSourceFile($pdffile); - $this->pdf2txt->setTargetFile($this->targetfile); - $this->pdf2txt->setMimeType('application/pdf'); - $this->pdf2txt->setExtension('pdf'); - $result = $this->pdf2txt->extractTextContent(); - - unlink(@$pdffile); - - return $result; - } - - public function diagnose() - { - $diagnosis = $this->pdf2txt->diagnose(); - if (!empty($diagnosis)) - { - return $diagnosis; - } - - $diagnosis = $this->text2pdf->diagnose(); - if (!empty($diagnosis)) - { - return $diagnosis; - } - - return null; - } -} */ - -?> \ No newline at end of file