Commit a5a016cdd26a8e5c99e5f019bc30d1bdf6c050a2
1 parent
8ce3b53c
Revert preservations of outlines with --split-pages
The preservation of outlines didn't provide very useful behavior anyway as it copied all outlines but most didn't work. This implementation also caused a very significant performance hit and so is being reverted until a proper solution can be coded. The eventual solution will not be compatible with the reverted solution anyway, so it's best not to leave this in.
Showing
4 changed files
with
23 additions
and
25 deletions
ChangeLog
| 1 | +2019-04-20 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Revert change that included preservation of outlines (bookmarks) | ||
| 4 | + in --split-pages. The way it was implemented caused a very | ||
| 5 | + significant performance penalty when splitting pages with | ||
| 6 | + outlines. We need a better solution that only copies the relevant | ||
| 7 | + items, not the whole tree. | ||
| 8 | + | ||
| 1 | 2019-03-11 Jay Berkenbilt <ejb@ql.org> | 9 | 2019-03-11 Jay Berkenbilt <ejb@ql.org> |
| 2 | 10 | ||
| 3 | * JSON serialization: add missing leading 0 to decimal values | 11 | * JSON serialization: add missing leading 0 to decimal values |
TODO
| @@ -77,6 +77,17 @@ Page splitting/merging | @@ -77,6 +77,17 @@ Page splitting/merging | ||
| 77 | * make sure conflicting named destinations work possibly test by | 77 | * make sure conflicting named destinations work possibly test by |
| 78 | including the same file by two paths in a merge | 78 | including the same file by two paths in a merge |
| 79 | 79 | ||
| 80 | + Note: original implementation of bookmark preservation for split | ||
| 81 | + pages caused a very high performance hit. The problem was | ||
| 82 | + introduced in 313ba081265f69ac9a0324f9fe87087c72918191 and reverted | ||
| 83 | + in the commit that adds this paragraph. The revert includes marking | ||
| 84 | + a few tests cases as $td->EXPECT_FAILURE. When properly coded, the | ||
| 85 | + test cases will need to be adjusted to only include the parts of | ||
| 86 | + the outlines that are actually copied. The tests in question are | ||
| 87 | + "split page with outlines". When implementing properly, ensure that | ||
| 88 | + the performance is not adversely affected by timing split-pages on | ||
| 89 | + a large file with complex outlines such as the PDF specification. | ||
| 90 | + | ||
| 80 | When pruning outlines, keep all outlines in the hierarchy that are | 91 | When pruning outlines, keep all outlines in the hierarchy that are |
| 81 | above an outline for a page we care about. If one of the ancestor | 92 | above an outline for a page we care about. If one of the ancestor |
| 82 | outlines points to a non-existent page, clear its dest. If an | 93 | outlines points to a non-existent page, clear its dest. If an |
qpdf/qpdf.cc
| @@ -4966,30 +4966,6 @@ static void write_outfile(QPDF& pdf, Options& o) | @@ -4966,30 +4966,6 @@ static void write_outfile(QPDF& pdf, Options& o) | ||
| 4966 | "/Nums", QPDFObjectHandle::newArray(labels)); | 4966 | "/Nums", QPDFObjectHandle::newArray(labels)); |
| 4967 | outpdf.getRoot().replaceKey("/PageLabels", page_labels); | 4967 | outpdf.getRoot().replaceKey("/PageLabels", page_labels); |
| 4968 | } | 4968 | } |
| 4969 | - // Copying the outlines tree, names table, and any | ||
| 4970 | - // outdated Dests key from the original file will make | ||
| 4971 | - // some things work in the split files. It is not a | ||
| 4972 | - // complete solution, but at least outlines whose | ||
| 4973 | - // destinations are on pages that have been preserved will | ||
| 4974 | - // work normally. There are other top-level structures | ||
| 4975 | - // that should be copied as well. This will be improved in | ||
| 4976 | - // the future. | ||
| 4977 | - std::list<std::string> to_copy; | ||
| 4978 | - to_copy.push_back("/Names"); | ||
| 4979 | - to_copy.push_back("/Dests"); | ||
| 4980 | - to_copy.push_back("/Outlines"); | ||
| 4981 | - for (std::list<std::string>::iterator iter = to_copy.begin(); | ||
| 4982 | - iter != to_copy.end(); ++iter) | ||
| 4983 | - { | ||
| 4984 | - QPDFObjectHandle orig = pdf.getRoot().getKey(*iter); | ||
| 4985 | - if (! orig.isIndirect()) | ||
| 4986 | - { | ||
| 4987 | - orig = pdf.makeIndirectObject(orig); | ||
| 4988 | - } | ||
| 4989 | - outpdf.getRoot().replaceKey( | ||
| 4990 | - *iter, | ||
| 4991 | - outpdf.copyForeignObject(orig)); | ||
| 4992 | - } | ||
| 4993 | std::string page_range = QUtil::int_to_string(first, pageno_len); | 4969 | std::string page_range = QUtil::int_to_string(first, pageno_len); |
| 4994 | if (o.split_pages > 1) | 4970 | if (o.split_pages > 1) |
| 4995 | { | 4971 | { |
qpdf/qtest/qpdf.test
| @@ -1594,6 +1594,8 @@ foreach my $i (qw(01-06 07-11)) | @@ -1594,6 +1594,8 @@ foreach my $i (qw(01-06 07-11)) | ||
| 1594 | {$td->FILE => "labels-split-$i.pdf"}); | 1594 | {$td->FILE => "labels-split-$i.pdf"}); |
| 1595 | } | 1595 | } |
| 1596 | 1596 | ||
| 1597 | +# See comments in TODO about these expected failures. Search for | ||
| 1598 | +# "split page with outlines". | ||
| 1597 | $td->runtest("split page with outlines", | 1599 | $td->runtest("split page with outlines", |
| 1598 | {$td->COMMAND => "qpdf --qdf --static-id --split-pages=10". | 1600 | {$td->COMMAND => "qpdf --qdf --static-id --split-pages=10". |
| 1599 | " outlines-with-actions.pdf split-out-outlines.pdf"}, | 1601 | " outlines-with-actions.pdf split-out-outlines.pdf"}, |
| @@ -1602,7 +1604,8 @@ foreach my $i (qw(01-10 11-20 21-30)) | @@ -1602,7 +1604,8 @@ foreach my $i (qw(01-10 11-20 21-30)) | ||
| 1602 | { | 1604 | { |
| 1603 | $td->runtest("check output ($i)", | 1605 | $td->runtest("check output ($i)", |
| 1604 | {$td->FILE => "split-out-outlines-$i.pdf"}, | 1606 | {$td->FILE => "split-out-outlines-$i.pdf"}, |
| 1605 | - {$td->FILE => "outlines-split-$i.pdf"}); | 1607 | + {$td->FILE => "outlines-split-$i.pdf"}, |
| 1608 | + $td->EXPECT_FAILURE) | ||
| 1606 | } | 1609 | } |
| 1607 | 1610 | ||
| 1608 | foreach my $d (@sp_cases) | 1611 | foreach my $d (@sp_cases) |