Commit 5cfcd4f361063df8e216489915758ce40a15f15b
1 parent
e09ae710
Additional checks for unreferenced resources
Explicitly abandon removal of unreferenced resources if there are any lexical errors in the page's contents. This case always generated a warning, but it now also prevents removal of unreferenced resources, this strongly decreasing the likelihood of data loss.
Showing
5 changed files
with
272 additions
and
3 deletions
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -99,11 +99,16 @@ QPDFPageObjectHelper::addContentTokenFilter( |
| 99 | 99 | class NameWatcher: public QPDFObjectHandle::TokenFilter |
| 100 | 100 | { |
| 101 | 101 | public: |
| 102 | + NameWatcher() : | |
| 103 | + saw_bad(false) | |
| 104 | + { | |
| 105 | + } | |
| 102 | 106 | virtual ~NameWatcher() |
| 103 | 107 | { |
| 104 | 108 | } |
| 105 | 109 | virtual void handleToken(QPDFTokenizer::Token const&); |
| 106 | 110 | std::set<std::string> names; |
| 111 | + bool saw_bad; | |
| 107 | 112 | }; |
| 108 | 113 | |
| 109 | 114 | void |
| ... | ... | @@ -116,6 +121,10 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) |
| 116 | 121 | this->names.insert( |
| 117 | 122 | QPDFObjectHandle::newName(token.getValue()).getName()); |
| 118 | 123 | } |
| 124 | + else if (token.getType() == QPDFTokenizer::tt_bad) | |
| 125 | + { | |
| 126 | + saw_bad = true; | |
| 127 | + } | |
| 119 | 128 | writeToken(token); |
| 120 | 129 | } |
| 121 | 130 | |
| ... | ... | @@ -134,6 +143,14 @@ QPDFPageObjectHelper::removeUnreferencedResources() |
| 134 | 143 | "; not attempting to remove unreferenced objects from this page"); |
| 135 | 144 | return; |
| 136 | 145 | } |
| 146 | + if (nw.saw_bad) | |
| 147 | + { | |
| 148 | + QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); | |
| 149 | + this->oh.warnIfPossible( | |
| 150 | + "Bad token found while scanning content stream; " | |
| 151 | + "not attempting to remove unreferenced objects from this page"); | |
| 152 | + return; | |
| 153 | + } | |
| 137 | 154 | // Walk through /Font and /XObject dictionaries, removing any |
| 138 | 155 | // resources that are not referenced. We must make copies of |
| 139 | 156 | // resource dictionaries down into the dictionaries are mutating | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -1384,7 +1384,7 @@ my @sp_cases = ( |
| 1384 | 1384 | [11, 'pdf extension', '', 'split-out.Pdf'], |
| 1385 | 1385 | [4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'], |
| 1386 | 1386 | ); |
| 1387 | -$n_tests += 21; | |
| 1387 | +$n_tests += 23; | |
| 1388 | 1388 | for (@sp_cases) |
| 1389 | 1389 | { |
| 1390 | 1390 | $n_tests += 1 + $_->[0]; |
| ... | ... | @@ -1482,10 +1482,20 @@ $td->runtest("split shared font, xobject", |
| 1482 | 1482 | foreach my $i (qw(1 2 3 4)) |
| 1483 | 1483 | { |
| 1484 | 1484 | $td->runtest("check output ($i)", |
| 1485 | - {$td->FILE => "shared-font-xobject-split-$i.pdf"}, | |
| 1486 | - {$td->FILE => "split-out-shared-font-xobject-$i.pdf"}); | |
| 1485 | + {$td->FILE => "split-out-shared-font-xobject-$i.pdf"}, | |
| 1486 | + {$td->FILE => "shared-font-xobject-split-$i.pdf"}); | |
| 1487 | 1487 | } |
| 1488 | 1488 | |
| 1489 | +$td->runtest("unreferenced resources with bad token", | |
| 1490 | + {$td->COMMAND => | |
| 1491 | + "qpdf --qdf --static-id --split-pages=2" . | |
| 1492 | + " coalesce.pdf split-out-bad-token.pdf"}, | |
| 1493 | + {$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3}, | |
| 1494 | + $td->NORMALIZE_NEWLINES); | |
| 1495 | +$td->runtest("check output", | |
| 1496 | + {$td->FILE => "split-out-bad-token-1-2.pdf"}, | |
| 1497 | + {$td->FILE => "coalesce-split-1-2.pdf"}); | |
| 1498 | + | |
| 1489 | 1499 | show_ntests(); |
| 1490 | 1500 | # ---------- |
| 1491 | 1501 | $td->notify("--- Keep Files Open ---"); | ... | ... |
qpdf/qtest/qpdf/coalesce-split-1-2.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/coalesce-split.out
0 → 100644
| 1 | +WARNING: coalesce.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page | |
| 2 | +WARNING: empty PDF: content normalization encountered bad tokens | |
| 3 | +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | |
| 4 | +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | |
| 5 | +WARNING: empty PDF: content normalization encountered bad tokens | |
| 6 | +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | |
| 7 | +WARNING: empty PDF: content normalization encountered bad tokens | |
| 8 | +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | |
| 9 | +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | |
| 10 | +qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |