Commit 7bcabd9988bd55418227b06eb4274719a7756be3

Authored by Etienne Stalmans
1 parent ba871b6b

Fix a bug where the last instrText wasn't always appended to the final string.

Reset the level counter with each w:p as far as I can see field codes shouldn't cross w:p tags
Showing 1 changed file with 6 additions and 4 deletions
oletools/msodde.py
@@ -112,13 +112,14 @@ def process_file(filepath): @@ -112,13 +112,14 @@ def process_file(filepath):
112 root = ET.fromstring(data) 112 root = ET.fromstring(data)
113 fields = [] 113 fields = []
114 ddetext = u'' 114 ddetext = u''
115 - level = 0 115 +
116 # find all the tags 'w:p': 116 # find all the tags 'w:p':
117 # parse each for begin and end tags, to group DDE strings 117 # parse each for begin and end tags, to group DDE strings
118 # fldChar can be in either a w:r element or floating alone in the w:p 118 # fldChar can be in either a w:r element or floating alone in the w:p
119 # escape DDE if quoted etc 119 # escape DDE if quoted etc
120 # (each is a chunk of a DDE link) 120 # (each is a chunk of a DDE link)
121 for subs in root.iter("{%s}p"%NS_WORD): 121 for subs in root.iter("{%s}p"%NS_WORD):
  122 + level = 0
122 for e in subs: 123 for e in subs:
123 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT 124 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT
124 if e.tag == "{%s}r"%NS_WORD: 125 if e.tag == "{%s}r"%NS_WORD:
@@ -131,22 +132,23 @@ def process_file(filepath): @@ -131,22 +132,23 @@ def process_file(filepath):
131 #this should be an error condition 132 #this should be an error condition
132 if elem is None: 133 if elem is None:
133 continue 134 continue
134 - 135 +
135 #check if FLDCHARTYPE and whether "begin" or "end" tag 136 #check if FLDCHARTYPE and whether "begin" or "end" tag
136 if elem.attrib.get(ATTR_W_FLDCHARTYPE) is not None: 137 if elem.attrib.get(ATTR_W_FLDCHARTYPE) is not None:
137 if elem.attrib[ATTR_W_FLDCHARTYPE] == "begin": 138 if elem.attrib[ATTR_W_FLDCHARTYPE] == "begin":
138 level += 1 139 level += 1
139 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": 140 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end":
140 level -= 1 141 level -= 1
141 - if level == 0: 142 + if level == 0 :
142 fields.append(ddetext) 143 fields.append(ddetext)
143 ddetext = u'' 144 ddetext = u''
144 - 145 +
145 # concatenate the text of the field, if present: 146 # concatenate the text of the field, if present:
146 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: 147 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None:
147 #expand field code if QUOTED 148 #expand field code if QUOTED
148 ddetext += unquote(elem.text) 149 ddetext += unquote(elem.text)
149 150
  151 +
150 for elem in root.iter(TAG_W_FLDSIMPLE): 152 for elem in root.iter(TAG_W_FLDSIMPLE):
151 # concatenate the attribute of the field, if present: 153 # concatenate the attribute of the field, if present:
152 if elem.attrib is not None: 154 if elem.attrib is not None: