Commit 7bcabd9988bd55418227b06eb4274719a7756be3

Authored by Etienne Stalmans
1 parent ba871b6b

Fix a bug where the last instrText wasn't always appended to the final string.

Reset the level counter with each w:p as far as I can see field codes shouldn't cross w:p tags
Showing 1 changed file with 6 additions and 4 deletions
oletools/msodde.py
... ... @@ -112,13 +112,14 @@ def process_file(filepath):
112 112 root = ET.fromstring(data)
113 113 fields = []
114 114 ddetext = u''
115   - level = 0
  115 +
116 116 # find all the tags 'w:p':
117 117 # parse each for begin and end tags, to group DDE strings
118 118 # fldChar can be in either a w:r element or floating alone in the w:p
119 119 # escape DDE if quoted etc
120 120 # (each is a chunk of a DDE link)
121 121 for subs in root.iter("{%s}p"%NS_WORD):
  122 + level = 0
122 123 for e in subs:
123 124 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT
124 125 if e.tag == "{%s}r"%NS_WORD:
... ... @@ -131,22 +132,23 @@ def process_file(filepath):
131 132 #this should be an error condition
132 133 if elem is None:
133 134 continue
134   -
  135 +
135 136 #check if FLDCHARTYPE and whether "begin" or "end" tag
136 137 if elem.attrib.get(ATTR_W_FLDCHARTYPE) is not None:
137 138 if elem.attrib[ATTR_W_FLDCHARTYPE] == "begin":
138 139 level += 1
139 140 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end":
140 141 level -= 1
141   - if level == 0:
  142 + if level == 0 :
142 143 fields.append(ddetext)
143 144 ddetext = u''
144   -
  145 +
145 146 # concatenate the text of the field, if present:
146 147 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None:
147 148 #expand field code if QUOTED
148 149 ddetext += unquote(elem.text)
149 150  
  151 +
150 152 for elem in root.iter(TAG_W_FLDSIMPLE):
151 153 # concatenate the attribute of the field, if present:
152 154 if elem.attrib is not None:
... ...