Commit 7bcabd9988bd55418227b06eb4274719a7756be3
1 parent
ba871b6b
Fix a bug where the last instrText wasn't always appended to the final string.
Reset the level counter with each w:p as far as I can see field codes shouldn't cross w:p tags
Showing
1 changed file
with
6 additions
and
4 deletions
oletools/msodde.py
| ... | ... | @@ -112,13 +112,14 @@ def process_file(filepath): |
| 112 | 112 | root = ET.fromstring(data) |
| 113 | 113 | fields = [] |
| 114 | 114 | ddetext = u'' |
| 115 | - level = 0 | |
| 115 | + | |
| 116 | 116 | # find all the tags 'w:p': |
| 117 | 117 | # parse each for begin and end tags, to group DDE strings |
| 118 | 118 | # fldChar can be in either a w:r element or floating alone in the w:p |
| 119 | 119 | # escape DDE if quoted etc |
| 120 | 120 | # (each is a chunk of a DDE link) |
| 121 | 121 | for subs in root.iter("{%s}p"%NS_WORD): |
| 122 | + level = 0 | |
| 122 | 123 | for e in subs: |
| 123 | 124 | #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT |
| 124 | 125 | if e.tag == "{%s}r"%NS_WORD: |
| ... | ... | @@ -131,22 +132,23 @@ def process_file(filepath): |
| 131 | 132 | #this should be an error condition |
| 132 | 133 | if elem is None: |
| 133 | 134 | continue |
| 134 | - | |
| 135 | + | |
| 135 | 136 | #check if FLDCHARTYPE and whether "begin" or "end" tag |
| 136 | 137 | if elem.attrib.get(ATTR_W_FLDCHARTYPE) is not None: |
| 137 | 138 | if elem.attrib[ATTR_W_FLDCHARTYPE] == "begin": |
| 138 | 139 | level += 1 |
| 139 | 140 | if elem.attrib[ATTR_W_FLDCHARTYPE] == "end": |
| 140 | 141 | level -= 1 |
| 141 | - if level == 0: | |
| 142 | + if level == 0 : | |
| 142 | 143 | fields.append(ddetext) |
| 143 | 144 | ddetext = u'' |
| 144 | - | |
| 145 | + | |
| 145 | 146 | # concatenate the text of the field, if present: |
| 146 | 147 | if elem.tag == TAG_W_INSTRTEXT and elem.text is not None: |
| 147 | 148 | #expand field code if QUOTED |
| 148 | 149 | ddetext += unquote(elem.text) |
| 149 | 150 | |
| 151 | + | |
| 150 | 152 | for elem in root.iter(TAG_W_FLDSIMPLE): |
| 151 | 153 | # concatenate the attribute of the field, if present: |
| 152 | 154 | if elem.attrib is not None: | ... | ... |