Commit 0c7c7e4ba485fd39f5b6d41fa2924c607d2eeda0

Authored by Jay Berkenbilt
1 parent 25aff0bd

Track whether certain page modifying methods have been called

We need to know whether pushInheritedAttributesToPage or getAllPages
have been called when generating JSON output. When reading the JSON
back in, we have to call the same methods so that object numbers will
line up properly.
@@ -9,10 +9,6 @@ Before Release: @@ -9,10 +9,6 @@ Before Release:
9 * Release qtest with updates to qtest-driver and copy back into qpdf 9 * Release qtest with updates to qtest-driver and copy back into qpdf
10 10
11 Next: 11 Next:
12 -* QPDF -- track whether the pages tree was modified (whether  
13 - getAllPages was ever called. If so, consider generating a non-flat  
14 - pages tree before creating output to better handle files with lots  
15 - of pages.  
16 * JSON v2 fixes 12 * JSON v2 fixes
17 13
18 Pending changes: 14 Pending changes:
@@ -44,6 +40,12 @@ Pending changes: @@ -44,6 +40,12 @@ Pending changes:
44 drivers from the qpdf directory into the latter category as long 40 drivers from the qpdf directory into the latter category as long
45 as doing so isn't too troublesome from a coverage standpoint. 41 as doing so isn't too troublesome from a coverage standpoint.
46 * Consider adding fuzzer code for JSON 42 * Consider adding fuzzer code for JSON
  43 +* Consider generating a non-flat pages tree before creating output to
  44 + better handle files with lots of pages. If there are more than 256
  45 + pages, add a second layer with the second layer nodes having no more
  46 + than 256 nodes and being as evenly sizes as possible. Don't worry
  47 + about the case of more than 65,536 pages. If the top node has more
  48 + than 256 children, we'll live with it.
47 49
48 Soon: Break ground on "Document-level work" 50 Soon: Break ground on "Document-level work"
49 51
@@ -81,7 +83,8 @@ JSON v2 fixes @@ -81,7 +83,8 @@ JSON v2 fixes
81 "qpdf": [ 83 "qpdf": [
82 { 84 {
83 "jsonversion": 2, 85 "jsonversion": 2,
84 - "repairpagestree": false, 86 + "pushedinheritedpageresources": false,
  87 + "calledgetallpages": false,
85 "maxobjectid": 10 88 "maxobjectid": 10
86 }, 89 },
87 { 90 {
@@ -110,6 +113,10 @@ JSON v2 fixes @@ -110,6 +113,10 @@ JSON v2 fixes
110 the same number of elements whose individual elements are 113 the same number of elements whose individual elements are
111 validated according to the regular rules. 114 validated according to the regular rules.
112 115
  116 + * When reading back in, we'll have to call
  117 + pushInheritedAttributesToPage or getAllPages based on the values
  118 + of the metadata.
  119 +
113 * Support json v2 in the C API. At a minimum, write_json, 120 * Support json v2 in the C API. At a minimum, write_json,
114 create_from_json, and update_from_json need to be there and should 121 create_from_json, and update_from_json need to be there and should
115 take the same kinds of functions as the C API for logger. 122 take the same kinds of functions as the C API for logger.
include/qpdf/QPDF.hh
@@ -709,6 +709,11 @@ class QPDF @@ -709,6 +709,11 @@ class QPDF
709 QPDF_DLL 709 QPDF_DLL
710 std::vector<QPDFObjectHandle> const& getAllPages(); 710 std::vector<QPDFObjectHandle> const& getAllPages();
711 711
  712 + QPDF_DLL
  713 + bool everCalledGetAllPages() const;
  714 + QPDF_DLL
  715 + bool everPushedInheritedAttributesToPages() const;
  716 +
712 // These methods, given a page object or its object/generation 717 // These methods, given a page object or its object/generation
713 // number, returns the 0-based index into the array returned by 718 // number, returns the 0-based index into the array returned by
714 // getAllPages() for that page. An exception is thrown if the page 719 // getAllPages() for that page. An exception is thrown if the page
@@ -1690,6 +1695,8 @@ class QPDF @@ -1690,6 +1695,8 @@ class QPDF
1690 std::vector<QPDFObjectHandle> all_pages; 1695 std::vector<QPDFObjectHandle> all_pages;
1691 std::map<QPDFObjGen, int> pageobj_to_pages_pos; 1696 std::map<QPDFObjGen, int> pageobj_to_pages_pos;
1692 bool pushed_inherited_attributes_to_pages; 1697 bool pushed_inherited_attributes_to_pages;
  1698 + bool ever_pushed_inherited_attributes_to_pages;
  1699 + bool ever_called_get_all_pages;
1693 std::vector<QPDFExc> warnings; 1700 std::vector<QPDFExc> warnings;
1694 std::map<unsigned long long, ObjCopier> object_copiers; 1701 std::map<unsigned long long, ObjCopier> object_copiers;
1695 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams; 1702 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
libqpdf/QPDF.cc
@@ -222,6 +222,8 @@ QPDF::Members::Members() : @@ -222,6 +222,8 @@ QPDF::Members::Members() :
222 attempt_recovery(true), 222 attempt_recovery(true),
223 encp(new EncryptionParameters), 223 encp(new EncryptionParameters),
224 pushed_inherited_attributes_to_pages(false), 224 pushed_inherited_attributes_to_pages(false),
  225 + ever_pushed_inherited_attributes_to_pages(false),
  226 + ever_called_get_all_pages(false),
225 copied_stream_data_provider(0), 227 copied_stream_data_provider(0),
226 reconstructed_xref(false), 228 reconstructed_xref(false),
227 fixed_dangling_refs(false), 229 fixed_dangling_refs(false),
@@ -2879,3 +2881,15 @@ QPDF::stopOnError(std::string const&amp; message) @@ -2879,3 +2881,15 @@ QPDF::stopOnError(std::string const&amp; message)
2879 this->m->file->getLastOffset(), 2881 this->m->file->getLastOffset(),
2880 message); 2882 message);
2881 } 2883 }
  2884 +
  2885 +bool
  2886 +QPDF::everCalledGetAllPages() const
  2887 +{
  2888 + return this->m->ever_called_get_all_pages;
  2889 +}
  2890 +
  2891 +bool
  2892 +QPDF::everPushedInheritedAttributesToPages() const
  2893 +{
  2894 + return this->m->ever_pushed_inherited_attributes_to_pages;
  2895 +}
libqpdf/QPDF_optimization.cc
@@ -162,6 +162,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -162,6 +162,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
162 " pushing inherited attributes to pages"); 162 " pushing inherited attributes to pages");
163 } 163 }
164 this->m->pushed_inherited_attributes_to_pages = true; 164 this->m->pushed_inherited_attributes_to_pages = true;
  165 + this->m->ever_pushed_inherited_attributes_to_pages = true;
165 } 166 }
166 167
167 void 168 void
libqpdf/QPDF_pages.cc
@@ -54,6 +54,7 @@ QPDF::getAllPages() @@ -54,6 +54,7 @@ QPDF::getAllPages()
54 // Note that pushInheritedAttributesToPage may also be used to 54 // Note that pushInheritedAttributesToPage may also be used to
55 // initialize this->m->all_pages. 55 // initialize this->m->all_pages.
56 if (this->m->all_pages.empty()) { 56 if (this->m->all_pages.empty()) {
  57 + this->m->ever_called_get_all_pages = true;
57 std::set<QPDFObjGen> visited; 58 std::set<QPDFObjGen> visited;
58 std::set<QPDFObjGen> seen; 59 std::set<QPDFObjGen> seen;
59 QPDFObjectHandle pages = getRoot().getKey("/Pages"); 60 QPDFObjectHandle pages = getRoot().getKey("/Pages");
qpdf/test_driver.cc
@@ -691,7 +691,9 @@ test_15(QPDF&amp; pdf, char const* arg2) @@ -691,7 +691,9 @@ test_15(QPDF&amp; pdf, char const* arg2)
691 // Remove pages from various places, checking to make sure 691 // Remove pages from various places, checking to make sure
692 // that our pages reference is getting updated. 692 // that our pages reference is getting updated.
693 assert(pages.size() == 10); 693 assert(pages.size() == 10);
  694 + assert(!pdf.everPushedInheritedAttributesToPages());
694 pdf.removePage(pages.back()); // original page 9 695 pdf.removePage(pages.back()); // original page 9
  696 + assert(pdf.everPushedInheritedAttributesToPages());
695 assert(pages.size() == 9); 697 assert(pages.size() == 9);
696 pdf.removePage(*pages.begin()); // original page 0 698 pdf.removePage(*pages.begin()); // original page 0
697 assert(pages.size() == 8); 699 assert(pages.size() == 8);
@@ -767,7 +769,9 @@ static void @@ -767,7 +769,9 @@ static void
767 test_16(QPDF& pdf, char const* arg2) 769 test_16(QPDF& pdf, char const* arg2)
768 { 770 {
769 // Insert a page manually and then update the cache. 771 // Insert a page manually and then update the cache.
  772 + assert(!pdf.everCalledGetAllPages());
770 std::vector<QPDFObjectHandle> const& all_pages = pdf.getAllPages(); 773 std::vector<QPDFObjectHandle> const& all_pages = pdf.getAllPages();
  774 + assert(pdf.everCalledGetAllPages());
771 775
772 QPDFObjectHandle contents = createPageContents(pdf, "New page 10"); 776 QPDFObjectHandle contents = createPageContents(pdf, "New page 10");
773 QPDFObjectHandle page = 777 QPDFObjectHandle page =
@@ -785,6 +789,7 @@ test_16(QPDF&amp; pdf, char const* arg2) @@ -785,6 +789,7 @@ test_16(QPDF&amp; pdf, char const* arg2)
785 kids.appendItem(page); 789 kids.appendItem(page);
786 assert(all_pages.size() == 10); 790 assert(all_pages.size() == 10);
787 pdf.updateAllPagesCache(); 791 pdf.updateAllPagesCache();
  792 + assert(pdf.everCalledGetAllPages());
788 assert(all_pages.size() == 11); 793 assert(all_pages.size() == 11);
789 assert(all_pages.back().getObjGen() == page.getObjGen()); 794 assert(all_pages.back().getObjGen() == page.getObjGen());
790 795