Commit 25c59f4347c83a597da23b0efa06418c7fe68eeb
1 parent
3a002837
fix to account for field codes that start and end in different w:p tags
Showing
1 changed file
with
6 additions
and
6 deletions
oletools/msodde.py
| @@ -114,14 +114,14 @@ def process_file(filepath): | @@ -114,14 +114,14 @@ def process_file(filepath): | ||
| 114 | root = ET.fromstring(data) | 114 | root = ET.fromstring(data) |
| 115 | fields = [] | 115 | fields = [] |
| 116 | ddetext = u'' | 116 | ddetext = u'' |
| 117 | - | 117 | + level = 0 |
| 118 | # find all the tags 'w:p': | 118 | # find all the tags 'w:p': |
| 119 | # parse each for begin and end tags, to group DDE strings | 119 | # parse each for begin and end tags, to group DDE strings |
| 120 | - # fldChar can be in either a w:r element or floating alone in the w:p | 120 | + # fldChar can be in either a w:r element, floating alone in the w:p or spread accross w:p tags |
| 121 | # escape DDE if quoted etc | 121 | # escape DDE if quoted etc |
| 122 | # (each is a chunk of a DDE link) | 122 | # (each is a chunk of a DDE link) |
| 123 | for subs in root.iter(TAG_W_P): | 123 | for subs in root.iter(TAG_W_P): |
| 124 | - level = 0 | 124 | + elem = None |
| 125 | for e in subs: | 125 | for e in subs: |
| 126 | #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT | 126 | #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT |
| 127 | if e.tag == TAG_W_R: | 127 | if e.tag == TAG_W_R: |
| @@ -141,9 +141,10 @@ def process_file(filepath): | @@ -141,9 +141,10 @@ def process_file(filepath): | ||
| 141 | level += 1 | 141 | level += 1 |
| 142 | if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": | 142 | if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": |
| 143 | level -= 1 | 143 | level -= 1 |
| 144 | - if level == 0 : | 144 | + if level == 0 or level == -1 : # edge-case where level becomes -1 |
| 145 | fields.append(ddetext) | 145 | fields.append(ddetext) |
| 146 | ddetext = u'' | 146 | ddetext = u'' |
| 147 | + level = 0 # reset edge-case | ||
| 147 | 148 | ||
| 148 | # concatenate the text of the field, if present: | 149 | # concatenate the text of the field, if present: |
| 149 | if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: | 150 | if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: |
| @@ -159,8 +160,7 @@ def process_file(filepath): | @@ -159,8 +160,7 @@ def process_file(filepath): | ||
| 159 | 160 | ||
| 160 | return fields | 161 | return fields |
| 161 | 162 | ||
| 162 | -def unquote(field): | ||
| 163 | - | 163 | +def unquote(field): |
| 164 | if "QUOTE" not in field or NO_QUOTES: | 164 | if "QUOTE" not in field or NO_QUOTES: |
| 165 | return field | 165 | return field |
| 166 | #split into components | 166 | #split into components |