Commit 25c59f4347c83a597da23b0efa06418c7fe68eeb

Authored by Etienne Stalmans
1 parent 3a002837

fix to account for field codes that start and end in different w:p tags

Showing 1 changed file with 6 additions and 6 deletions
oletools/msodde.py
... ... @@ -114,14 +114,14 @@ def process_file(filepath):
114 114 root = ET.fromstring(data)
115 115 fields = []
116 116 ddetext = u''
117   -
  117 + level = 0
118 118 # find all the tags 'w:p':
119 119 # parse each for begin and end tags, to group DDE strings
120   - # fldChar can be in either a w:r element or floating alone in the w:p
  120 + # fldChar can be in either a w:r element, floating alone in the w:p or spread accross w:p tags
121 121 # escape DDE if quoted etc
122 122 # (each is a chunk of a DDE link)
123 123 for subs in root.iter(TAG_W_P):
124   - level = 0
  124 + elem = None
125 125 for e in subs:
126 126 #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT
127 127 if e.tag == TAG_W_R:
... ... @@ -141,9 +141,10 @@ def process_file(filepath):
141 141 level += 1
142 142 if elem.attrib[ATTR_W_FLDCHARTYPE] == "end":
143 143 level -= 1
144   - if level == 0 :
  144 + if level == 0 or level == -1 : # edge-case where level becomes -1
145 145 fields.append(ddetext)
146 146 ddetext = u''
  147 + level = 0 # reset edge-case
147 148  
148 149 # concatenate the text of the field, if present:
149 150 if elem.tag == TAG_W_INSTRTEXT and elem.text is not None:
... ... @@ -159,8 +160,7 @@ def process_file(filepath):
159 160  
160 161 return fields
161 162  
162   -def unquote(field):
163   -
  163 +def unquote(field):
164 164 if "QUOTE" not in field or NO_QUOTES:
165 165 return field
166 166 #split into components
... ...