DocumentConverter.py
5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#
# PyODConverter (Python OpenDocument Converter) v0.9 - 2007-04-05
#
# This script converts a document from one office format to another by
# connecting to an OpenOffice.org instance via Python-UNO bridge.
#
# Copyright (C) 2007 Mirko Nasato <mirko@artofsolving.com>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
#
# Modified by Kevin Fourie <kevin@knowledgetree.com>
# Contributions by Xavier Duret, Conrad Vermeulen
#DEFAULT_OPENOFFICE_PORT = 8100
import os, sys, glob
extrapaths = glob.glob('/usr/lib*/openoffice*/program/') + glob.glob('/usr/lib*/ooo*/program') + [ '/Applications/NeoOffice.app/Contents/program', 'c:/program files/ktdms/openoffice/program' ]
ooProgramPath = os.environ.get('ooProgramPath')
if not ooProgramPath is None:
extrapaths = [ ooProgramPath ] + extrapaths
for path in extrapaths:
try:
sys.path.append(path)
import uno
os.environ['PATH'] = '%s:' % path + os.environ['PATH']
break
except ImportError:
sys.path.remove(path)
continue
else:
print >>sys.stderr, "PyODConverter: Cannot find the pyuno.so library in sys.path and known paths."
sys.exit(1)
from os.path import abspath, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException
FAMILY_PRESENTATION = "Presentation"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_TEXT = "Text"
FAMILY_BY_EXTENSION = {
"odt": FAMILY_TEXT,
"sxw": FAMILY_TEXT,
"doc": FAMILY_TEXT,
"rtf": FAMILY_TEXT,
"txt": FAMILY_TEXT,
"wpd": FAMILY_TEXT,
"html": FAMILY_TEXT,
"ods": FAMILY_SPREADSHEET,
"sxc": FAMILY_SPREADSHEET,
"xls": FAMILY_SPREADSHEET,
"odp": FAMILY_PRESENTATION,
"sxi": FAMILY_PRESENTATION,
"ppt": FAMILY_PRESENTATION
}
FILTER_BY_EXTENSION = {
"pdf": {
FAMILY_TEXT: "writer_pdf_Export",
FAMILY_SPREADSHEET: "calc_pdf_Export",
FAMILY_PRESENTATION: "impress_pdf_Export"
},
"html": {
FAMILY_TEXT: "HTML (StarWriter)",
FAMILY_SPREADSHEET: "HTML (StarCalc)",
FAMILY_PRESENTATION: "impress_html_Export"
},
"odt": { FAMILY_TEXT: "writer8" },
"doc": { FAMILY_TEXT: "MS Word 97" },
"rtf": { FAMILY_TEXT: "Rich Text Format" },
"txt": { FAMILY_TEXT: "Text" },
"ods": { FAMILY_SPREADSHEET: "calc8" },
"xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
"odp": { FAMILY_PRESENTATION: "impress8" },
"ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
"swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
}
class DocumentConversionException(Exception):
def __init__(self, message):
self.message = message
def __str__(self):
return self.message
def _unoProps(**args):
props = []
for key in args:
prop = PropertyValue()
prop.Name = key
prop.Value = args[key]
props.append(prop)
return tuple(props)
class DocumentConverter:
def __init__(self, host, port):
localContext = uno.getComponentContext()
resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
try:
context = resolver.resolve("uno:socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (host, port))
except NoConnectException:
raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
def convert(self, inputFile, outputFile):
inputExt = self._fileExt(inputFile)
outputExt = self._fileExt(outputFile)
filterName = self._filterName(inputExt, outputExt)
inputUrl = self._fileUrl(argv[1])
outputUrl = self._fileUrl(argv[2])
document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, _unoProps(Hidden=True, ReadOnly=True))
document.storeToURL(outputUrl, _unoProps(FilterName=filterName))
document.close(True)
def _filterName(self, inputExt, outputExt):
try:
family = FAMILY_BY_EXTENSION[inputExt]
except KeyError:
raise DocumentConversionException, "unknown input format: '%s'" % inputExt
try:
filterByFamily = FILTER_BY_EXTENSION[outputExt]
except KeyError:
raise DocumentConversionException, "unknown output format: '%s'" % outputExt
try:
return filterByFamily[family]
except KeyError:
raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (inputExt, outputExt)
def _fileExt(self, path):
ext = splitext(path)[1]
if ext is not None:
return ext[1:].lower()
def _fileUrl(self, path):
return uno.systemPathToFileUrl(abspath(path))
if __name__ == "__main__":
from sys import argv, exit
if len(argv) < 3:
print "USAGE: " + argv[0] + " <input-file> <output-file> <host> <port>"
exit(255)
try:
converter = DocumentConverter(argv[3],argv[4])
converter.convert(argv[1], argv[2])
except DocumentConversionException, exception:
print "ERROR! " + str(exception)
exit(1)