Commit bb3137296d4070e268690e8233e9d3eb2d64c652

Authored by Jay Berkenbilt
1 parent 6d2b4d8f

Handle root /Pages pointing to other than page tree root (fixes #398)

ChangeLog
  1 +2020-02-22 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Detect, warn, and correct the case of /Pages in the document
  4 + catalog incorrectly pointing to a page or intermediate node
  5 + instead of the root of the pages tree. Fixes #398.
  6 +
1 7 2020-01-26 Jay Berkenbilt <ejb@ql.org>
2 8  
3 9 * 9.1.1: release
... ...
libqpdf/QPDF_pages.cc
... ... @@ -49,8 +49,36 @@ QPDF::getAllPages()
49 49 {
50 50 std::set<QPDFObjGen> visited;
51 51 std::set<QPDFObjGen> seen;
52   - getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages,
53   - visited, seen);
  52 + QPDFObjectHandle pages = getRoot().getKey("/Pages");
  53 + bool warned = false;
  54 + bool changed_pages = false;
  55 + while (pages.isDictionary() && pages.hasKey("/Parent"))
  56 + {
  57 + if (seen.count(pages.getObjGen()))
  58 + {
  59 + // loop -- will be detected again and reported later
  60 + break;
  61 + }
  62 + // Files have been found in the wild where /Pages in the
  63 + // catalog points to the first page. Try to work around
  64 + // this and similar cases with this heuristic.
  65 + if (! warned)
  66 + {
  67 + getRoot().warnIfPossible(
  68 + "document page tree root (root -> /Pages) doesn't point"
  69 + " to the root of the page tree; attempting to correct");
  70 + warned = true;
  71 + }
  72 + seen.insert(pages.getObjGen());
  73 + changed_pages = true;
  74 + pages = pages.getKey("/Parent");
  75 + }
  76 + if (changed_pages)
  77 + {
  78 + getRoot().replaceKey("/Pages", pages);
  79 + }
  80 + seen.clear();
  81 + getAllPagesInternal(pages, this->m->all_pages, visited, seen);
54 82 }
55 83 return this->m->all_pages;
56 84 }
... ...
qpdf/qtest/qpdf.test
... ... @@ -2779,7 +2779,7 @@ for (my $n = 16; $n &lt;= 19; ++$n)
2779 2779 show_ntests();
2780 2780 # ----------
2781 2781 $td->notify("--- Specific File Tests ---");
2782   -$n_tests += 7;
  2782 +$n_tests += 9;
2783 2783  
2784 2784 # Special PDF files that caused problems at some point
2785 2785  
... ... @@ -2810,6 +2810,14 @@ $td-&gt;runtest(&quot;check output&quot;,
2810 2810 $td->runtest("check fix-qdf idempotency",
2811 2811 {$td->COMMAND => "fix-qdf a.pdf"},
2812 2812 {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
  2813 +$td->runtest("pages points to page",
  2814 + {$td->COMMAND =>
  2815 + "qpdf --static-id --linearize pages-is-page.pdf a.pdf"},
  2816 + {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3},
  2817 + $td->NORMALIZE_NEWLINES);
  2818 +$td->runtest("check output",
  2819 + {$td->FILE => "a.pdf"},
  2820 + {$td->FILE => "pages-is-page-out.pdf"});
2813 2821  
2814 2822 show_ntests();
2815 2823 # ----------
... ...
qpdf/qtest/qpdf/pages-is-page-out.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/qpdf/pages-is-page.out 0 โ†’ 100644
  1 +WARNING: pages-is-page.pdf, object 1 0 at offset 19: document page tree root (root -> /Pages) doesn't point to the root of the page tree; attempting to correct
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/pages-is-page.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 3 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF
... ...