Commit 0c7c7e4ba485fd39f5b6d41fa2924c607d2eeda0

Authored by Jay Berkenbilt
1 parent 25aff0bd

Track whether certain page modifying methods have been called

We need to know whether pushInheritedAttributesToPage or getAllPages
have been called when generating JSON output. When reading the JSON
back in, we have to call the same methods so that object numbers will
line up properly.
... ... @@ -9,10 +9,6 @@ Before Release:
9 9 * Release qtest with updates to qtest-driver and copy back into qpdf
10 10  
11 11 Next:
12   -* QPDF -- track whether the pages tree was modified (whether
13   - getAllPages was ever called. If so, consider generating a non-flat
14   - pages tree before creating output to better handle files with lots
15   - of pages.
16 12 * JSON v2 fixes
17 13  
18 14 Pending changes:
... ... @@ -44,6 +40,12 @@ Pending changes:
44 40 drivers from the qpdf directory into the latter category as long
45 41 as doing so isn't too troublesome from a coverage standpoint.
46 42 * Consider adding fuzzer code for JSON
  43 +* Consider generating a non-flat pages tree before creating output to
  44 + better handle files with lots of pages. If there are more than 256
  45 + pages, add a second layer with the second layer nodes having no more
  46 + than 256 nodes and being as evenly sizes as possible. Don't worry
  47 + about the case of more than 65,536 pages. If the top node has more
  48 + than 256 children, we'll live with it.
47 49  
48 50 Soon: Break ground on "Document-level work"
49 51  
... ... @@ -81,7 +83,8 @@ JSON v2 fixes
81 83 "qpdf": [
82 84 {
83 85 "jsonversion": 2,
84   - "repairpagestree": false,
  86 + "pushedinheritedpageresources": false,
  87 + "calledgetallpages": false,
85 88 "maxobjectid": 10
86 89 },
87 90 {
... ... @@ -110,6 +113,10 @@ JSON v2 fixes
110 113 the same number of elements whose individual elements are
111 114 validated according to the regular rules.
112 115  
  116 + * When reading back in, we'll have to call
  117 + pushInheritedAttributesToPage or getAllPages based on the values
  118 + of the metadata.
  119 +
113 120 * Support json v2 in the C API. At a minimum, write_json,
114 121 create_from_json, and update_from_json need to be there and should
115 122 take the same kinds of functions as the C API for logger.
... ...
include/qpdf/QPDF.hh
... ... @@ -709,6 +709,11 @@ class QPDF
709 709 QPDF_DLL
710 710 std::vector<QPDFObjectHandle> const& getAllPages();
711 711  
  712 + QPDF_DLL
  713 + bool everCalledGetAllPages() const;
  714 + QPDF_DLL
  715 + bool everPushedInheritedAttributesToPages() const;
  716 +
712 717 // These methods, given a page object or its object/generation
713 718 // number, returns the 0-based index into the array returned by
714 719 // getAllPages() for that page. An exception is thrown if the page
... ... @@ -1690,6 +1695,8 @@ class QPDF
1690 1695 std::vector<QPDFObjectHandle> all_pages;
1691 1696 std::map<QPDFObjGen, int> pageobj_to_pages_pos;
1692 1697 bool pushed_inherited_attributes_to_pages;
  1698 + bool ever_pushed_inherited_attributes_to_pages;
  1699 + bool ever_called_get_all_pages;
1693 1700 std::vector<QPDFExc> warnings;
1694 1701 std::map<unsigned long long, ObjCopier> object_copiers;
1695 1702 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
... ...
libqpdf/QPDF.cc
... ... @@ -222,6 +222,8 @@ QPDF::Members::Members() :
222 222 attempt_recovery(true),
223 223 encp(new EncryptionParameters),
224 224 pushed_inherited_attributes_to_pages(false),
  225 + ever_pushed_inherited_attributes_to_pages(false),
  226 + ever_called_get_all_pages(false),
225 227 copied_stream_data_provider(0),
226 228 reconstructed_xref(false),
227 229 fixed_dangling_refs(false),
... ... @@ -2879,3 +2881,15 @@ QPDF::stopOnError(std::string const&amp; message)
2879 2881 this->m->file->getLastOffset(),
2880 2882 message);
2881 2883 }
  2884 +
  2885 +bool
  2886 +QPDF::everCalledGetAllPages() const
  2887 +{
  2888 + return this->m->ever_called_get_all_pages;
  2889 +}
  2890 +
  2891 +bool
  2892 +QPDF::everPushedInheritedAttributesToPages() const
  2893 +{
  2894 + return this->m->ever_pushed_inherited_attributes_to_pages;
  2895 +}
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -162,6 +162,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
162 162 " pushing inherited attributes to pages");
163 163 }
164 164 this->m->pushed_inherited_attributes_to_pages = true;
  165 + this->m->ever_pushed_inherited_attributes_to_pages = true;
165 166 }
166 167  
167 168 void
... ...
libqpdf/QPDF_pages.cc
... ... @@ -54,6 +54,7 @@ QPDF::getAllPages()
54 54 // Note that pushInheritedAttributesToPage may also be used to
55 55 // initialize this->m->all_pages.
56 56 if (this->m->all_pages.empty()) {
  57 + this->m->ever_called_get_all_pages = true;
57 58 std::set<QPDFObjGen> visited;
58 59 std::set<QPDFObjGen> seen;
59 60 QPDFObjectHandle pages = getRoot().getKey("/Pages");
... ...
qpdf/test_driver.cc
... ... @@ -691,7 +691,9 @@ test_15(QPDF&amp; pdf, char const* arg2)
691 691 // Remove pages from various places, checking to make sure
692 692 // that our pages reference is getting updated.
693 693 assert(pages.size() == 10);
  694 + assert(!pdf.everPushedInheritedAttributesToPages());
694 695 pdf.removePage(pages.back()); // original page 9
  696 + assert(pdf.everPushedInheritedAttributesToPages());
695 697 assert(pages.size() == 9);
696 698 pdf.removePage(*pages.begin()); // original page 0
697 699 assert(pages.size() == 8);
... ... @@ -767,7 +769,9 @@ static void
767 769 test_16(QPDF& pdf, char const* arg2)
768 770 {
769 771 // Insert a page manually and then update the cache.
  772 + assert(!pdf.everCalledGetAllPages());
770 773 std::vector<QPDFObjectHandle> const& all_pages = pdf.getAllPages();
  774 + assert(pdf.everCalledGetAllPages());
771 775  
772 776 QPDFObjectHandle contents = createPageContents(pdf, "New page 10");
773 777 QPDFObjectHandle page =
... ... @@ -785,6 +789,7 @@ test_16(QPDF&amp; pdf, char const* arg2)
785 789 kids.appendItem(page);
786 790 assert(all_pages.size() == 10);
787 791 pdf.updateAllPagesCache();
  792 + assert(pdf.everCalledGetAllPages());
788 793 assert(all_pages.size() == 11);
789 794 assert(all_pages.back().getObjGen() == page.getObjGen());
790 795  
... ...