Commit dac65a21fb4fa5f871e31c314280b75adde89a6c
1 parent
278710fb
Look in form XObjects when removing unreferenced resources (fixes #373)
If a page contains a form XObject, also filter the form XObject and remove its unreferenced resources.
Showing
12 changed files
with
49 additions
and
1 deletions
ChangeLog
| 1 | 2020-03-31 Jay Berkenbilt <ejb@ql.org> | 1 | 2020-03-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * When detecting unreferenced images during page splitting, if any | ||
| 4 | + XObjects are form XObjects, recursively descend into them and | ||
| 5 | + remove any unreferenced objects from them too. Fixes #373. | ||
| 6 | + | ||
| 3 | * Add QPDFObjectHandle::filterAsContents, which filters a stream's | 7 | * Add QPDFObjectHandle::filterAsContents, which filters a stream's |
| 4 | data as if it were page contents. This can be useful to filter | 8 | data as if it were page contents. This can be useful to filter |
| 5 | form XObjects the same way we would filter page contents. | 9 | form XObjects the same way we would filter page contents. |
libqpdf/QPDFPageObjectHelper.cc
| @@ -568,6 +568,29 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -568,6 +568,29 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 568 | { | 568 | { |
| 569 | dict.removeKey(*k_iter); | 569 | dict.removeKey(*k_iter); |
| 570 | } | 570 | } |
| 571 | + QPDFObjectHandle resource = dict.getKey(*k_iter); | ||
| 572 | + if (resource.isStream() && | ||
| 573 | + resource.getDict().getKey("/Type").isName() && | ||
| 574 | + ("/XObject" == resource.getDict().getKey("/Type").getName()) && | ||
| 575 | + resource.getDict().getKey("/Subtype").isName() && | ||
| 576 | + ("/Form" == resource.getDict().getKey("/Subtype").getName())) | ||
| 577 | + { | ||
| 578 | + QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); | ||
| 579 | + removeUnreferencedResourcesHelper( | ||
| 580 | + resource.getDict(), seen, | ||
| 581 | + [&resource]() { | ||
| 582 | + auto result = resource.getDict().getKey("/Resources"); | ||
| 583 | + if (result.isDictionary()) | ||
| 584 | + { | ||
| 585 | + result = result.shallowCopy(); | ||
| 586 | + resource.getDict().replaceKey("/Resources", result); | ||
| 587 | + } | ||
| 588 | + return result; | ||
| 589 | + }, | ||
| 590 | + [&resource](QPDFObjectHandle::TokenFilter* f) { | ||
| 591 | + resource.filterAsContents(f); | ||
| 592 | + }); | ||
| 593 | + } | ||
| 571 | } | 594 | } |
| 572 | } | 595 | } |
| 573 | } | 596 | } |
qpdf/qpdf.testcov
| @@ -449,3 +449,4 @@ QPDFObjectHandle duplicate dict key 0 | @@ -449,3 +449,4 @@ QPDFObjectHandle duplicate dict key 0 | ||
| 449 | QPDFWriter no encryption sig contents 0 | 449 | QPDFWriter no encryption sig contents 0 |
| 450 | QPDFPageObjectHelper colorspace lookup 0 | 450 | QPDFPageObjectHelper colorspace lookup 0 |
| 451 | QPDFWriter ignore XRef in qdf mode 0 | 451 | QPDFWriter ignore XRef in qdf mode 0 |
| 452 | +QPDFPageObjectHelper filter form xobject 0 |
qpdf/qtest/qpdf.test
| @@ -1686,7 +1686,8 @@ my @sp_cases = ( | @@ -1686,7 +1686,8 @@ my @sp_cases = ( | ||
| 1686 | [11, 'pdf extension', '', 'split-out.Pdf'], | 1686 | [11, 'pdf extension', '', 'split-out.Pdf'], |
| 1687 | [4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'], | 1687 | [4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'], |
| 1688 | ); | 1688 | ); |
| 1689 | -$n_tests += 23; | 1689 | +$n_tests += 32; |
| 1690 | +$n_compare_pdfs += 1; | ||
| 1690 | for (@sp_cases) | 1691 | for (@sp_cases) |
| 1691 | { | 1692 | { |
| 1692 | $n_tests += 1 + $_->[0]; | 1693 | $n_tests += 1 + $_->[0]; |
| @@ -1801,6 +1802,25 @@ $td->runtest("check output", | @@ -1801,6 +1802,25 @@ $td->runtest("check output", | ||
| 1801 | {$td->FILE => "split-out-bad-token-1-2.pdf"}, | 1802 | {$td->FILE => "split-out-bad-token-1-2.pdf"}, |
| 1802 | {$td->FILE => "coalesce-split-1-2.pdf"}); | 1803 | {$td->FILE => "coalesce-split-1-2.pdf"}); |
| 1803 | 1804 | ||
| 1805 | +$td->runtest("shared images in form xobject", | ||
| 1806 | + {$td->COMMAND => "qpdf --qdf --static-id --split-pages". | ||
| 1807 | + " shared-form-images.pdf split-out-shared-form.pdf"}, | ||
| 1808 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 1809 | +foreach my $i (qw(1 2 3 4 5 6)) | ||
| 1810 | +{ | ||
| 1811 | + $td->runtest("check output ($i)", | ||
| 1812 | + {$td->FILE => "split-out-shared-form-$i.pdf"}, | ||
| 1813 | + {$td->FILE => "shared-form-split-$i.pdf"}); | ||
| 1814 | +} | ||
| 1815 | +$td->runtest("merge for compare", | ||
| 1816 | + {$td->COMMAND => "qpdf --static-id --empty --pages" . | ||
| 1817 | + " split-out-shared-form*.pdf -- a.pdf"}, | ||
| 1818 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 1819 | +$td->runtest("check output", | ||
| 1820 | + {$td->FILE => "a.pdf"}, | ||
| 1821 | + {$td->FILE => "shared-form-images-merged.pdf"}); | ||
| 1822 | +compare_pdfs("shared-form-images.pdf", "a.pdf"); | ||
| 1823 | + | ||
| 1804 | show_ntests(); | 1824 | show_ntests(); |
| 1805 | # ---------- | 1825 | # ---------- |
| 1806 | $td->notify("--- Keep Files Open ---"); | 1826 | $td->notify("--- Keep Files Open ---"); |
qpdf/qtest/qpdf/shared-form-images-merged.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-images.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-1.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-2.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-3.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-4.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-5.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/shared-form-split-6.pdf
0 → 100644
No preview for this file type