Commit dbf9f15f245edd04342bafd38aab2a8c3dc55da7
Committed by
GitHub
Merge pull request #1455 from m-holger/fuzz
During xref recovery reject /Page objects with multiple errors
Showing
2 changed files
with
52 additions
and
16 deletions
include/qpdf/QPDF.hh
| @@ -772,8 +772,9 @@ class QPDF | @@ -772,8 +772,9 @@ class QPDF | ||
| 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); | 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 775 | - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false); | ||
| 776 | - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false); | 775 | + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); |
| 776 | + qpdf_offset_t processXRefStream( | ||
| 777 | + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); | ||
| 777 | std::pair<int, std::array<int, 3>> | 778 | std::pair<int, std::array<int, 3>> |
| 778 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); | 779 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); |
| 779 | int processXRefSize( | 780 | int processXRefSize( |
| @@ -897,7 +898,8 @@ class QPDF | @@ -897,7 +898,8 @@ class QPDF | ||
| 897 | QPDFObjectHandle cur_pages, | 898 | QPDFObjectHandle cur_pages, |
| 898 | QPDFObjGen::set& visited, | 899 | QPDFObjGen::set& visited, |
| 899 | QPDFObjGen::set& seen, | 900 | QPDFObjGen::set& seen, |
| 900 | - bool media_box); | 901 | + bool media_box, |
| 902 | + bool resources); | ||
| 901 | void insertPage(QPDFObjectHandle newpage, int pos); | 903 | void insertPage(QPDFObjectHandle newpage, int pos); |
| 902 | void flattenPagesTree(); | 904 | void flattenPagesTree(); |
| 903 | void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); | 905 | void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); |
libqpdf/QPDF_pages.cc
| @@ -43,9 +43,10 @@ QPDF::getAllPages() | @@ -43,9 +43,10 @@ QPDF::getAllPages() | ||
| 43 | // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. | 43 | // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. |
| 44 | if (m->all_pages.empty() && !m->invalid_page_found) { | 44 | if (m->all_pages.empty() && !m->invalid_page_found) { |
| 45 | m->ever_called_get_all_pages = true; | 45 | m->ever_called_get_all_pages = true; |
| 46 | + auto root = getRoot(); | ||
| 46 | QPDFObjGen::set visited; | 47 | QPDFObjGen::set visited; |
| 47 | QPDFObjGen::set seen; | 48 | QPDFObjGen::set seen; |
| 48 | - QPDFObjectHandle pages = getRoot().getKey("/Pages"); | 49 | + QPDFObjectHandle pages = root.getKey("/Pages"); |
| 49 | bool warned = false; | 50 | bool warned = false; |
| 50 | bool changed_pages = false; | 51 | bool changed_pages = false; |
| 51 | while (pages.isDictionary() && pages.hasKey("/Parent")) { | 52 | while (pages.isDictionary() && pages.hasKey("/Parent")) { |
| @@ -56,7 +57,7 @@ QPDF::getAllPages() | @@ -56,7 +57,7 @@ QPDF::getAllPages() | ||
| 56 | // Files have been found in the wild where /Pages in the catalog points to the first | 57 | // Files have been found in the wild where /Pages in the catalog points to the first |
| 57 | // page. Try to work around this and similar cases with this heuristic. | 58 | // page. Try to work around this and similar cases with this heuristic. |
| 58 | if (!warned) { | 59 | if (!warned) { |
| 59 | - getRoot().warnIfPossible( | 60 | + root.warnIfPossible( |
| 60 | "document page tree root (root -> /Pages) doesn't point" | 61 | "document page tree root (root -> /Pages) doesn't point" |
| 61 | " to the root of the page tree; attempting to correct"); | 62 | " to the root of the page tree; attempting to correct"); |
| 62 | warned = true; | 63 | warned = true; |
| @@ -65,7 +66,7 @@ QPDF::getAllPages() | @@ -65,7 +66,7 @@ QPDF::getAllPages() | ||
| 65 | pages = pages.getKey("/Parent"); | 66 | pages = pages.getKey("/Parent"); |
| 66 | } | 67 | } |
| 67 | if (changed_pages) { | 68 | if (changed_pages) { |
| 68 | - getRoot().replaceKey("/Pages", pages); | 69 | + root.replaceKey("/Pages", pages); |
| 69 | } | 70 | } |
| 70 | seen.clear(); | 71 | seen.clear(); |
| 71 | if (!pages.hasKey("/Kids")) { | 72 | if (!pages.hasKey("/Kids")) { |
| @@ -74,7 +75,7 @@ QPDF::getAllPages() | @@ -74,7 +75,7 @@ QPDF::getAllPages() | ||
| 74 | qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); | 75 | qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); |
| 75 | } | 76 | } |
| 76 | try { | 77 | try { |
| 77 | - getAllPagesInternal(pages, visited, seen, false); | 78 | + getAllPagesInternal(pages, visited, seen, false, false); |
| 78 | } catch (...) { | 79 | } catch (...) { |
| 79 | m->all_pages.clear(); | 80 | m->all_pages.clear(); |
| 80 | m->invalid_page_found = false; | 81 | m->invalid_page_found = false; |
| @@ -90,7 +91,11 @@ QPDF::getAllPages() | @@ -90,7 +91,11 @@ QPDF::getAllPages() | ||
| 90 | 91 | ||
| 91 | void | 92 | void |
| 92 | QPDF::getAllPagesInternal( | 93 | QPDF::getAllPagesInternal( |
| 93 | - QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box) | 94 | + QPDFObjectHandle cur_node, |
| 95 | + QPDFObjGen::set& visited, | ||
| 96 | + QPDFObjGen::set& seen, | ||
| 97 | + bool media_box, | ||
| 98 | + bool resources) | ||
| 94 | { | 99 | { |
| 95 | if (!visited.add(cur_node)) { | 100 | if (!visited.add(cur_node)) { |
| 96 | throw QPDFExc( | 101 | throw QPDFExc( |
| @@ -101,6 +106,9 @@ QPDF::getAllPagesInternal( | @@ -101,6 +106,9 @@ QPDF::getAllPagesInternal( | ||
| 101 | "Loop detected in /Pages structure (getAllPages)"); | 106 | "Loop detected in /Pages structure (getAllPages)"); |
| 102 | } | 107 | } |
| 103 | if (!cur_node.isDictionaryOfType("/Pages")) { | 108 | if (!cur_node.isDictionaryOfType("/Pages")) { |
| 109 | + // During fuzzing files were encountered where the root object appeared in the pages tree. | ||
| 110 | + // Unconditionally setting the /Type to /Pages could cause problems, but trying to | ||
| 111 | + // accommodate the possibility may be excessive. | ||
| 104 | cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding"); | 112 | cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding"); |
| 105 | cur_node.replaceKey("/Type", "/Pages"_qpdf); | 113 | cur_node.replaceKey("/Type", "/Pages"_qpdf); |
| 106 | } | 114 | } |
| @@ -108,6 +116,9 @@ QPDF::getAllPagesInternal( | @@ -108,6 +116,9 @@ QPDF::getAllPagesInternal( | ||
| 108 | media_box = cur_node.getKey("/MediaBox").isRectangle(); | 116 | media_box = cur_node.getKey("/MediaBox").isRectangle(); |
| 109 | QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); | 117 | QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); |
| 110 | } | 118 | } |
| 119 | + if (!resources) { | ||
| 120 | + resources = cur_node.getKey("/Resources").isDictionary(); | ||
| 121 | + } | ||
| 111 | auto kids = cur_node.getKey("/Kids"); | 122 | auto kids = cur_node.getKey("/Kids"); |
| 112 | if (!visited.add(kids)) { | 123 | if (!visited.add(kids)) { |
| 113 | throw QPDFExc( | 124 | throw QPDFExc( |
| @@ -120,13 +131,22 @@ QPDF::getAllPagesInternal( | @@ -120,13 +131,22 @@ QPDF::getAllPagesInternal( | ||
| 120 | int i = -1; | 131 | int i = -1; |
| 121 | for (auto& kid: kids.as_array()) { | 132 | for (auto& kid: kids.as_array()) { |
| 122 | ++i; | 133 | ++i; |
| 134 | + int errors = 0; | ||
| 135 | + | ||
| 123 | if (!kid.isDictionary()) { | 136 | if (!kid.isDictionary()) { |
| 124 | kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring"); | 137 | kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring"); |
| 125 | m->invalid_page_found = true; | 138 | m->invalid_page_found = true; |
| 126 | continue; | 139 | continue; |
| 127 | } | 140 | } |
| 141 | + if (!kid.isIndirect()) { | ||
| 142 | + QTC::TC("qpdf", "QPDF handle direct page object"); | ||
| 143 | + cur_node.warnIfPossible( | ||
| 144 | + "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect"); | ||
| 145 | + kid = makeIndirectObject(kid); | ||
| 146 | + ++errors; | ||
| 147 | + } | ||
| 128 | if (kid.hasKey("/Kids")) { | 148 | if (kid.hasKey("/Kids")) { |
| 129 | - getAllPagesInternal(kid, visited, seen, media_box); | 149 | + getAllPagesInternal(kid, visited, seen, media_box, resources); |
| 130 | } else { | 150 | } else { |
| 131 | if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { | 151 | if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { |
| 132 | QTC::TC("qpdf", "QPDF missing mediabox"); | 152 | QTC::TC("qpdf", "QPDF missing mediabox"); |
| @@ -136,13 +156,13 @@ QPDF::getAllPagesInternal( | @@ -136,13 +156,13 @@ QPDF::getAllPagesInternal( | ||
| 136 | kid.replaceKey( | 156 | kid.replaceKey( |
| 137 | "/MediaBox", | 157 | "/MediaBox", |
| 138 | QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); | 158 | QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); |
| 159 | + ++errors; | ||
| 139 | } | 160 | } |
| 140 | - if (!kid.isIndirect()) { | ||
| 141 | - QTC::TC("qpdf", "QPDF handle direct page object"); | ||
| 142 | - cur_node.warnIfPossible( | ||
| 143 | - "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect"); | ||
| 144 | - kid = makeIndirectObject(kid); | ||
| 145 | - } else if (!seen.add(kid)) { | 161 | + if (!resources && !kid.getKey("/Resources").isDictionary()) { |
| 162 | + // Consider adding an information message | ||
| 163 | + ++errors; | ||
| 164 | + } | ||
| 165 | + if (!seen.add(kid)) { | ||
| 146 | // Make a copy of the page. This does the same as shallowCopyPage in | 166 | // Make a copy of the page. This does the same as shallowCopyPage in |
| 147 | // QPDFPageObjectHelper. | 167 | // QPDFPageObjectHelper. |
| 148 | QTC::TC("qpdf", "QPDF resolve duplicated page object"); | 168 | QTC::TC("qpdf", "QPDF resolve duplicated page object"); |
| @@ -151,6 +171,8 @@ QPDF::getAllPagesInternal( | @@ -151,6 +171,8 @@ QPDF::getAllPagesInternal( | ||
| 151 | "kid " + std::to_string(i) + | 171 | "kid " + std::to_string(i) + |
| 152 | " (from 0) appears more than once in the pages tree;" | 172 | " (from 0) appears more than once in the pages tree;" |
| 153 | " creating a new page object as a copy"); | 173 | " creating a new page object as a copy"); |
| 174 | + // This needs to be fixed. shallowCopy does not necessarily produce a valid | ||
| 175 | + // page. | ||
| 154 | kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); | 176 | kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); |
| 155 | seen.add(kid); | 177 | seen.add(kid); |
| 156 | } else { | 178 | } else { |
| @@ -161,12 +183,24 @@ QPDF::getAllPagesInternal( | @@ -161,12 +183,24 @@ QPDF::getAllPagesInternal( | ||
| 161 | kid = QPDFObjectHandle::newNull(); | 183 | kid = QPDFObjectHandle::newNull(); |
| 162 | continue; | 184 | continue; |
| 163 | } | 185 | } |
| 186 | + if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) { | ||
| 187 | + // Consider fixing and adding an information message. | ||
| 188 | + ++errors; | ||
| 189 | + } | ||
| 164 | } | 190 | } |
| 165 | if (!kid.isDictionaryOfType("/Page")) { | 191 | if (!kid.isDictionaryOfType("/Page")) { |
| 166 | kid.warnIfPossible("/Type key should be /Page but is not; overriding"); | 192 | kid.warnIfPossible("/Type key should be /Page but is not; overriding"); |
| 167 | kid.replaceKey("/Type", "/Page"_qpdf); | 193 | kid.replaceKey("/Type", "/Page"_qpdf); |
| 194 | + ++errors; | ||
| 195 | + } | ||
| 196 | + if (m->in_xref_reconstruction && errors > 2) { | ||
| 197 | + cur_node.warnIfPossible( | ||
| 198 | + "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); | ||
| 199 | + m->invalid_page_found = true; | ||
| 200 | + kid = QPDFObjectHandle::newNull(); | ||
| 201 | + continue; | ||
| 168 | } | 202 | } |
| 169 | - m->all_pages.push_back(kid); | 203 | + m->all_pages.emplace_back(kid); |
| 170 | } | 204 | } |
| 171 | } | 205 | } |
| 172 | } | 206 | } |