Commit 25c59f4347c83a597da23b0efa06418c7fe68eeb
1 parent
3a002837
fix to account for field codes that start and end in different w:p tags
Showing
1 changed file
with
6 additions
and
6 deletions
oletools/msodde.py
| ... | ... | @@ -114,14 +114,14 @@ def process_file(filepath): |
| 114 | 114 | root = ET.fromstring(data) |
| 115 | 115 | fields = [] |
| 116 | 116 | ddetext = u'' |
| 117 | - | |
| 117 | + level = 0 | |
| 118 | 118 | # find all the tags 'w:p': |
| 119 | 119 | # parse each for begin and end tags, to group DDE strings |
| 120 | - # fldChar can be in either a w:r element or floating alone in the w:p | |
| 120 | + # fldChar can be in either a w:r element, floating alone in the w:p or spread accross w:p tags | |
| 121 | 121 | # escape DDE if quoted etc |
| 122 | 122 | # (each is a chunk of a DDE link) |
| 123 | 123 | for subs in root.iter(TAG_W_P): |
| 124 | - level = 0 | |
| 124 | + elem = None | |
| 125 | 125 | for e in subs: |
| 126 | 126 | #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT |
| 127 | 127 | if e.tag == TAG_W_R: |
| ... | ... | @@ -141,9 +141,10 @@ def process_file(filepath): |
| 141 | 141 | level += 1 |
| 142 | 142 | if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": |
| 143 | 143 | level -= 1 |
| 144 | - if level == 0 : | |
| 144 | + if level == 0 or level == -1 : # edge-case where level becomes -1 | |
| 145 | 145 | fields.append(ddetext) |
| 146 | 146 | ddetext = u'' |
| 147 | + level = 0 # reset edge-case | |
| 147 | 148 | |
| 148 | 149 | # concatenate the text of the field, if present: |
| 149 | 150 | if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: |
| ... | ... | @@ -159,8 +160,7 @@ def process_file(filepath): |
| 159 | 160 | |
| 160 | 161 | return fields |
| 161 | 162 | |
| 162 | -def unquote(field): | |
| 163 | - | |
| 163 | +def unquote(field): | |
| 164 | 164 | if "QUOTE" not in field or NO_QUOTES: |
| 165 | 165 | return field |
| 166 | 166 | #split into components | ... | ... |