Commit 25c59f4347c83a597da23b0efa06418c7fe68eeb

Authored by Etienne Stalmans
1 parent 3a002837

fix to account for field codes that start and end in different w:p tags

Showing 1 changed file with 6 additions and 6 deletions
oletools/msodde.py
@@ -114,14 +114,14 @@ def process_file(filepath): @@ -114,14 +114,14 @@ def process_file(filepath):
114 root = ET.fromstring(data) 114 root = ET.fromstring(data)
115 fields = [] 115 fields = []
116 ddetext = u'' 116 ddetext = u''
117 - 117 + level = 0
118 # find all the tags 'w:p': 118 # find all the tags 'w:p':
119 # parse each for begin and end tags, to group DDE strings 119 # parse each for begin and end tags, to group DDE strings
120 - # fldChar can be in either a w:r element or floating alone in the w:p 120 + # fldChar can be in either a w:r element, floating alone in the w:p or spread accross w:p tags
121 # escape DDE if quoted etc 121 # escape DDE if quoted etc
122 # (each is a chunk of a DDE link) 122 # (each is a chunk of a DDE link)
123 for subs in root.iter(TAG_W_P): 123 for subs in root.iter(TAG_W_P):
124 - level = 0 124 + elem = None
125 for e in subs: 125 for e in subs:
126 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT 126 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT
127 if e.tag == TAG_W_R: 127 if e.tag == TAG_W_R:
@@ -141,9 +141,10 @@ def process_file(filepath): @@ -141,9 +141,10 @@ def process_file(filepath):
141 level += 1 141 level += 1
142 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": 142 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end":
143 level -= 1 143 level -= 1
144 - if level == 0 : 144 + if level == 0 or level == -1 : # edge-case where level becomes -1
145 fields.append(ddetext) 145 fields.append(ddetext)
146 ddetext = u'' 146 ddetext = u''
  147 + level = 0 # reset edge-case
147 148
148 # concatenate the text of the field, if present: 149 # concatenate the text of the field, if present:
149 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: 150 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None:
@@ -159,8 +160,7 @@ def process_file(filepath): @@ -159,8 +160,7 @@ def process_file(filepath):
159 160
160 return fields 161 return fields
161 162
162 -def unquote(field):  
163 - 163 +def unquote(field):
164 if "QUOTE" not in field or NO_QUOTES: 164 if "QUOTE" not in field or NO_QUOTES:
165 return field 165 return field
166 #split into components 166 #split into components