Commit dbf9f15f245edd04342bafd38aab2a8c3dc55da7

Authored by m-holger
Committed by GitHub
2 parents 3427df9b e4be0d58

Merge pull request #1455 from m-holger/fuzz

During xref recovery reject /Page objects with multiple errors
include/qpdf/QPDF.hh
@@ -772,8 +772,9 @@ class QPDF @@ -772,8 +772,9 @@ class QPDF
772 bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); 772 bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
773 bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); 773 bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
774 qpdf_offset_t read_xrefTable(qpdf_offset_t offset); 774 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
775 - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false);  
776 - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false); 775 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false);
  776 + qpdf_offset_t processXRefStream(
  777 + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false);
777 std::pair<int, std::array<int, 3>> 778 std::pair<int, std::array<int, 3>>
778 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); 779 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
779 int processXRefSize( 780 int processXRefSize(
@@ -897,7 +898,8 @@ class QPDF @@ -897,7 +898,8 @@ class QPDF
897 QPDFObjectHandle cur_pages, 898 QPDFObjectHandle cur_pages,
898 QPDFObjGen::set& visited, 899 QPDFObjGen::set& visited,
899 QPDFObjGen::set& seen, 900 QPDFObjGen::set& seen,
900 - bool media_box); 901 + bool media_box,
  902 + bool resources);
901 void insertPage(QPDFObjectHandle newpage, int pos); 903 void insertPage(QPDFObjectHandle newpage, int pos);
902 void flattenPagesTree(); 904 void flattenPagesTree();
903 void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); 905 void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
libqpdf/QPDF_pages.cc
@@ -43,9 +43,10 @@ QPDF::getAllPages() @@ -43,9 +43,10 @@ QPDF::getAllPages()
43 // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. 43 // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
44 if (m->all_pages.empty() && !m->invalid_page_found) { 44 if (m->all_pages.empty() && !m->invalid_page_found) {
45 m->ever_called_get_all_pages = true; 45 m->ever_called_get_all_pages = true;
  46 + auto root = getRoot();
46 QPDFObjGen::set visited; 47 QPDFObjGen::set visited;
47 QPDFObjGen::set seen; 48 QPDFObjGen::set seen;
48 - QPDFObjectHandle pages = getRoot().getKey("/Pages"); 49 + QPDFObjectHandle pages = root.getKey("/Pages");
49 bool warned = false; 50 bool warned = false;
50 bool changed_pages = false; 51 bool changed_pages = false;
51 while (pages.isDictionary() && pages.hasKey("/Parent")) { 52 while (pages.isDictionary() && pages.hasKey("/Parent")) {
@@ -56,7 +57,7 @@ QPDF::getAllPages() @@ -56,7 +57,7 @@ QPDF::getAllPages()
56 // Files have been found in the wild where /Pages in the catalog points to the first 57 // Files have been found in the wild where /Pages in the catalog points to the first
57 // page. Try to work around this and similar cases with this heuristic. 58 // page. Try to work around this and similar cases with this heuristic.
58 if (!warned) { 59 if (!warned) {
59 - getRoot().warnIfPossible( 60 + root.warnIfPossible(
60 "document page tree root (root -> /Pages) doesn't point" 61 "document page tree root (root -> /Pages) doesn't point"
61 " to the root of the page tree; attempting to correct"); 62 " to the root of the page tree; attempting to correct");
62 warned = true; 63 warned = true;
@@ -65,7 +66,7 @@ QPDF::getAllPages() @@ -65,7 +66,7 @@ QPDF::getAllPages()
65 pages = pages.getKey("/Parent"); 66 pages = pages.getKey("/Parent");
66 } 67 }
67 if (changed_pages) { 68 if (changed_pages) {
68 - getRoot().replaceKey("/Pages", pages); 69 + root.replaceKey("/Pages", pages);
69 } 70 }
70 seen.clear(); 71 seen.clear();
71 if (!pages.hasKey("/Kids")) { 72 if (!pages.hasKey("/Kids")) {
@@ -74,7 +75,7 @@ QPDF::getAllPages() @@ -74,7 +75,7 @@ QPDF::getAllPages()
74 qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); 75 qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array");
75 } 76 }
76 try { 77 try {
77 - getAllPagesInternal(pages, visited, seen, false); 78 + getAllPagesInternal(pages, visited, seen, false, false);
78 } catch (...) { 79 } catch (...) {
79 m->all_pages.clear(); 80 m->all_pages.clear();
80 m->invalid_page_found = false; 81 m->invalid_page_found = false;
@@ -90,7 +91,11 @@ QPDF::getAllPages() @@ -90,7 +91,11 @@ QPDF::getAllPages()
90 91
91 void 92 void
92 QPDF::getAllPagesInternal( 93 QPDF::getAllPagesInternal(
93 - QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box) 94 + QPDFObjectHandle cur_node,
  95 + QPDFObjGen::set& visited,
  96 + QPDFObjGen::set& seen,
  97 + bool media_box,
  98 + bool resources)
94 { 99 {
95 if (!visited.add(cur_node)) { 100 if (!visited.add(cur_node)) {
96 throw QPDFExc( 101 throw QPDFExc(
@@ -101,6 +106,9 @@ QPDF::getAllPagesInternal( @@ -101,6 +106,9 @@ QPDF::getAllPagesInternal(
101 "Loop detected in /Pages structure (getAllPages)"); 106 "Loop detected in /Pages structure (getAllPages)");
102 } 107 }
103 if (!cur_node.isDictionaryOfType("/Pages")) { 108 if (!cur_node.isDictionaryOfType("/Pages")) {
  109 + // During fuzzing files were encountered where the root object appeared in the pages tree.
  110 + // Unconditionally setting the /Type to /Pages could cause problems, but trying to
  111 + // accommodate the possibility may be excessive.
104 cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding"); 112 cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding");
105 cur_node.replaceKey("/Type", "/Pages"_qpdf); 113 cur_node.replaceKey("/Type", "/Pages"_qpdf);
106 } 114 }
@@ -108,6 +116,9 @@ QPDF::getAllPagesInternal( @@ -108,6 +116,9 @@ QPDF::getAllPagesInternal(
108 media_box = cur_node.getKey("/MediaBox").isRectangle(); 116 media_box = cur_node.getKey("/MediaBox").isRectangle();
109 QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); 117 QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
110 } 118 }
  119 + if (!resources) {
  120 + resources = cur_node.getKey("/Resources").isDictionary();
  121 + }
111 auto kids = cur_node.getKey("/Kids"); 122 auto kids = cur_node.getKey("/Kids");
112 if (!visited.add(kids)) { 123 if (!visited.add(kids)) {
113 throw QPDFExc( 124 throw QPDFExc(
@@ -120,13 +131,22 @@ QPDF::getAllPagesInternal( @@ -120,13 +131,22 @@ QPDF::getAllPagesInternal(
120 int i = -1; 131 int i = -1;
121 for (auto& kid: kids.as_array()) { 132 for (auto& kid: kids.as_array()) {
122 ++i; 133 ++i;
  134 + int errors = 0;
  135 +
123 if (!kid.isDictionary()) { 136 if (!kid.isDictionary()) {
124 kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring"); 137 kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring");
125 m->invalid_page_found = true; 138 m->invalid_page_found = true;
126 continue; 139 continue;
127 } 140 }
  141 + if (!kid.isIndirect()) {
  142 + QTC::TC("qpdf", "QPDF handle direct page object");
  143 + cur_node.warnIfPossible(
  144 + "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");
  145 + kid = makeIndirectObject(kid);
  146 + ++errors;
  147 + }
128 if (kid.hasKey("/Kids")) { 148 if (kid.hasKey("/Kids")) {
129 - getAllPagesInternal(kid, visited, seen, media_box); 149 + getAllPagesInternal(kid, visited, seen, media_box, resources);
130 } else { 150 } else {
131 if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { 151 if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
132 QTC::TC("qpdf", "QPDF missing mediabox"); 152 QTC::TC("qpdf", "QPDF missing mediabox");
@@ -136,13 +156,13 @@ QPDF::getAllPagesInternal( @@ -136,13 +156,13 @@ QPDF::getAllPagesInternal(
136 kid.replaceKey( 156 kid.replaceKey(
137 "/MediaBox", 157 "/MediaBox",
138 QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); 158 QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
  159 + ++errors;
139 } 160 }
140 - if (!kid.isIndirect()) {  
141 - QTC::TC("qpdf", "QPDF handle direct page object");  
142 - cur_node.warnIfPossible(  
143 - "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");  
144 - kid = makeIndirectObject(kid);  
145 - } else if (!seen.add(kid)) { 161 + if (!resources && !kid.getKey("/Resources").isDictionary()) {
  162 + // Consider adding an information message
  163 + ++errors;
  164 + }
  165 + if (!seen.add(kid)) {
146 // Make a copy of the page. This does the same as shallowCopyPage in 166 // Make a copy of the page. This does the same as shallowCopyPage in
147 // QPDFPageObjectHelper. 167 // QPDFPageObjectHelper.
148 QTC::TC("qpdf", "QPDF resolve duplicated page object"); 168 QTC::TC("qpdf", "QPDF resolve duplicated page object");
@@ -151,6 +171,8 @@ QPDF::getAllPagesInternal( @@ -151,6 +171,8 @@ QPDF::getAllPagesInternal(
151 "kid " + std::to_string(i) + 171 "kid " + std::to_string(i) +
152 " (from 0) appears more than once in the pages tree;" 172 " (from 0) appears more than once in the pages tree;"
153 " creating a new page object as a copy"); 173 " creating a new page object as a copy");
  174 + // This needs to be fixed. shallowCopy does not necessarily produce a valid
  175 + // page.
154 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); 176 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
155 seen.add(kid); 177 seen.add(kid);
156 } else { 178 } else {
@@ -161,12 +183,24 @@ QPDF::getAllPagesInternal( @@ -161,12 +183,24 @@ QPDF::getAllPagesInternal(
161 kid = QPDFObjectHandle::newNull(); 183 kid = QPDFObjectHandle::newNull();
162 continue; 184 continue;
163 } 185 }
  186 + if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) {
  187 + // Consider fixing and adding an information message.
  188 + ++errors;
  189 + }
164 } 190 }
165 if (!kid.isDictionaryOfType("/Page")) { 191 if (!kid.isDictionaryOfType("/Page")) {
166 kid.warnIfPossible("/Type key should be /Page but is not; overriding"); 192 kid.warnIfPossible("/Type key should be /Page but is not; overriding");
167 kid.replaceKey("/Type", "/Page"_qpdf); 193 kid.replaceKey("/Type", "/Page"_qpdf);
  194 + ++errors;
  195 + }
  196 + if (m->in_xref_reconstruction && errors > 2) {
  197 + cur_node.warnIfPossible(
  198 + "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
  199 + m->invalid_page_found = true;
  200 + kid = QPDFObjectHandle::newNull();
  201 + continue;
168 } 202 }
169 - m->all_pages.push_back(kid); 203 + m->all_pages.emplace_back(kid);
170 } 204 }
171 } 205 }
172 } 206 }