Commit bb3137296d4070e268690e8233e9d3eb2d64c652
1 parent
6d2b4d8f
Handle root /Pages pointing to other than page tree root (fixes #398)
Showing
6 changed files
with
126 additions
and
3 deletions
ChangeLog
| 1 | +2020-02-22 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Detect, warn, and correct the case of /Pages in the document | ||
| 4 | + catalog incorrectly pointing to a page or intermediate node | ||
| 5 | + instead of the root of the pages tree. Fixes #398. | ||
| 6 | + | ||
| 1 | 2020-01-26 Jay Berkenbilt <ejb@ql.org> | 7 | 2020-01-26 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | ||
| 3 | * 9.1.1: release | 9 | * 9.1.1: release |
libqpdf/QPDF_pages.cc
| @@ -49,8 +49,36 @@ QPDF::getAllPages() | @@ -49,8 +49,36 @@ QPDF::getAllPages() | ||
| 49 | { | 49 | { |
| 50 | std::set<QPDFObjGen> visited; | 50 | std::set<QPDFObjGen> visited; |
| 51 | std::set<QPDFObjGen> seen; | 51 | std::set<QPDFObjGen> seen; |
| 52 | - getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages, | ||
| 53 | - visited, seen); | 52 | + QPDFObjectHandle pages = getRoot().getKey("/Pages"); |
| 53 | + bool warned = false; | ||
| 54 | + bool changed_pages = false; | ||
| 55 | + while (pages.isDictionary() && pages.hasKey("/Parent")) | ||
| 56 | + { | ||
| 57 | + if (seen.count(pages.getObjGen())) | ||
| 58 | + { | ||
| 59 | + // loop -- will be detected again and reported later | ||
| 60 | + break; | ||
| 61 | + } | ||
| 62 | + // Files have been found in the wild where /Pages in the | ||
| 63 | + // catalog points to the first page. Try to work around | ||
| 64 | + // this and similar cases with this heuristic. | ||
| 65 | + if (! warned) | ||
| 66 | + { | ||
| 67 | + getRoot().warnIfPossible( | ||
| 68 | + "document page tree root (root -> /Pages) doesn't point" | ||
| 69 | + " to the root of the page tree; attempting to correct"); | ||
| 70 | + warned = true; | ||
| 71 | + } | ||
| 72 | + seen.insert(pages.getObjGen()); | ||
| 73 | + changed_pages = true; | ||
| 74 | + pages = pages.getKey("/Parent"); | ||
| 75 | + } | ||
| 76 | + if (changed_pages) | ||
| 77 | + { | ||
| 78 | + getRoot().replaceKey("/Pages", pages); | ||
| 79 | + } | ||
| 80 | + seen.clear(); | ||
| 81 | + getAllPagesInternal(pages, this->m->all_pages, visited, seen); | ||
| 54 | } | 82 | } |
| 55 | return this->m->all_pages; | 83 | return this->m->all_pages; |
| 56 | } | 84 | } |
qpdf/qtest/qpdf.test
| @@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n) | @@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n) | ||
| 2779 | show_ntests(); | 2779 | show_ntests(); |
| 2780 | # ---------- | 2780 | # ---------- |
| 2781 | $td->notify("--- Specific File Tests ---"); | 2781 | $td->notify("--- Specific File Tests ---"); |
| 2782 | -$n_tests += 7; | 2782 | +$n_tests += 9; |
| 2783 | 2783 | ||
| 2784 | # Special PDF files that caused problems at some point | 2784 | # Special PDF files that caused problems at some point |
| 2785 | 2785 | ||
| @@ -2810,6 +2810,14 @@ $td->runtest("check output", | @@ -2810,6 +2810,14 @@ $td->runtest("check output", | ||
| 2810 | $td->runtest("check fix-qdf idempotency", | 2810 | $td->runtest("check fix-qdf idempotency", |
| 2811 | {$td->COMMAND => "fix-qdf a.pdf"}, | 2811 | {$td->COMMAND => "fix-qdf a.pdf"}, |
| 2812 | {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); | 2812 | {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); |
| 2813 | +$td->runtest("pages points to page", | ||
| 2814 | + {$td->COMMAND => | ||
| 2815 | + "qpdf --static-id --linearize pages-is-page.pdf a.pdf"}, | ||
| 2816 | + {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3}, | ||
| 2817 | + $td->NORMALIZE_NEWLINES); | ||
| 2818 | +$td->runtest("check output", | ||
| 2819 | + {$td->FILE => "a.pdf"}, | ||
| 2820 | + {$td->FILE => "pages-is-page-out.pdf"}); | ||
| 2813 | 2821 | ||
| 2814 | show_ntests(); | 2822 | show_ntests(); |
| 2815 | # ---------- | 2823 | # ---------- |
qpdf/qtest/qpdf/pages-is-page-out.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/pages-is-page.out
0 → 100644
qpdf/qtest/qpdf/pages-is-page.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +1 0 obj | ||
| 3 | +<< | ||
| 4 | + /Type /Catalog | ||
| 5 | + /Pages 3 0 R | ||
| 6 | +>> | ||
| 7 | +endobj | ||
| 8 | + | ||
| 9 | +2 0 obj | ||
| 10 | +<< | ||
| 11 | + /Type /Pages | ||
| 12 | + /Kids [ | ||
| 13 | + 3 0 R | ||
| 14 | + ] | ||
| 15 | + /Count 1 | ||
| 16 | +>> | ||
| 17 | +endobj | ||
| 18 | + | ||
| 19 | +3 0 obj | ||
| 20 | +<< | ||
| 21 | + /Type /Page | ||
| 22 | + /Parent 2 0 R | ||
| 23 | + /MediaBox [0 0 612 792] | ||
| 24 | + /Contents 4 0 R | ||
| 25 | + /Resources << | ||
| 26 | + /ProcSet 5 0 R | ||
| 27 | + /Font << | ||
| 28 | + /F1 6 0 R | ||
| 29 | + >> | ||
| 30 | + >> | ||
| 31 | +>> | ||
| 32 | +endobj | ||
| 33 | + | ||
| 34 | +4 0 obj | ||
| 35 | +<< | ||
| 36 | + /Length 44 | ||
| 37 | +>> | ||
| 38 | +stream | ||
| 39 | +BT | ||
| 40 | + /F1 24 Tf | ||
| 41 | + 72 720 Td | ||
| 42 | + (Potato) Tj | ||
| 43 | +ET | ||
| 44 | +endstream | ||
| 45 | +endobj | ||
| 46 | + | ||
| 47 | +5 0 obj | ||
| 48 | +[ | ||
| 49 | |||
| 50 | + /Text | ||
| 51 | +] | ||
| 52 | +endobj | ||
| 53 | + | ||
| 54 | +6 0 obj | ||
| 55 | +<< | ||
| 56 | + /Type /Font | ||
| 57 | + /Subtype /Type1 | ||
| 58 | + /Name /F1 | ||
| 59 | + /BaseFont /Helvetica | ||
| 60 | + /Encoding /WinAnsiEncoding | ||
| 61 | +>> | ||
| 62 | +endobj | ||
| 63 | + | ||
| 64 | +xref | ||
| 65 | +0 7 | ||
| 66 | +0000000000 65535 f | ||
| 67 | +0000000009 00000 n | ||
| 68 | +0000000063 00000 n | ||
| 69 | +0000000135 00000 n | ||
| 70 | +0000000307 00000 n | ||
| 71 | +0000000403 00000 n | ||
| 72 | +0000000438 00000 n | ||
| 73 | +trailer << | ||
| 74 | + /Size 7 | ||
| 75 | + /Root 1 0 R | ||
| 76 | +>> | ||
| 77 | +startxref | ||
| 78 | +556 | ||
| 79 | +%%EOF |