Commit 1a62cce94012e05e25e81dd7016766d9d039281d

Authored by Jay Berkenbilt
1 parent 09027344

Restructure optimize to allow skipping parameters of filtered streams

include/qpdf/QPDF.hh
@@ -564,16 +564,28 @@ class QPDF @@ -564,16 +564,28 @@ class QPDF
564 // QPDF_optimization.cc 564 // QPDF_optimization.cc
565 565
566 // The object_stream_data map maps from a "compressed" object to 566 // The object_stream_data map maps from a "compressed" object to
567 - // the object stream that contains it. This enables optimize to 567 + // the object stream that contains it. This enables optimize to
568 // populate the object <-> user maps with only uncompressed 568 // populate the object <-> user maps with only uncompressed
569 - // objects. If allow_changes is false, an exception will be  
570 - // thrown if any changes are made during the optimization process.  
571 - // This is available so that the test suite can make sure that a  
572 - // linearized file is already optimized. When called in this way,  
573 - // optimize() still populates the object <-> user maps 569 + // objects. If allow_changes is false, an exception will be thrown
  570 + // if any changes are made during the optimization process. This
  571 + // is available so that the test suite can make sure that a
  572 + // linearized file is already optimized. When called in this way,
  573 + // optimize() still populates the object <-> user maps. The
  574 + // optional skip_stream_parameters parameter, if present, is
  575 + // called for each stream object. The function should return 2 if
  576 + // optimization should discard /Length, /Filter, and /DecodeParms;
  577 + // 1 if it should discard /Length, and 0 if it should preserve all
  578 + // keys. This is used by QPDFWriter to avoid creation of dangling
  579 + // objects for stream dictionary keys it will be regenerating.
574 QPDF_DLL 580 QPDF_DLL
575 void optimize(std::map<int, int> const& object_stream_data, 581 void optimize(std::map<int, int> const& object_stream_data,
576 bool allow_changes = true); 582 bool allow_changes = true);
  583 + // ABI: make function optional and merge overloaded versions
  584 + QPDF_DLL
  585 + void optimize(
  586 + std::map<int, int> const& object_stream_data,
  587 + bool allow_changes,
  588 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
577 589
578 // Traverse page tree return all /Page objects. It also detects 590 // Traverse page tree return all /Page objects. It also detects
579 // and resolves cases in which the same /Page object is 591 // and resolves cases in which the same /Page object is
@@ -1356,10 +1368,14 @@ class QPDF @@ -1356,10 +1368,14 @@ class QPDF
1356 std::vector<QPDFObjectHandle>& all_pages, 1368 std::vector<QPDFObjectHandle>& all_pages,
1357 bool allow_changes, bool warn_skipped_keys, 1369 bool allow_changes, bool warn_skipped_keys,
1358 std::set<QPDFObjGen>& visited); 1370 std::set<QPDFObjGen>& visited);
1359 - void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh);  
1360 - void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,  
1361 - std::set<QPDFObjGen>& visited, bool top,  
1362 - int depth); 1371 + void updateObjectMaps(
  1372 + ObjUser const& ou, QPDFObjectHandle oh,
  1373 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  1374 + void updateObjectMapsInternal(
  1375 + ObjUser const& ou, QPDFObjectHandle oh,
  1376 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
  1377 + std::set<QPDFObjGen>& visited, bool top,
  1378 + int depth);
1363 void filterCompressedObjects(std::map<int, int> const& object_stream_data); 1379 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
1364 1380
1365 // Type conversion helper methods 1381 // Type conversion helper methods
libqpdf/QPDF_optimization.cc
@@ -62,6 +62,14 @@ void @@ -62,6 +62,14 @@ void
62 QPDF::optimize(std::map<int, int> const& object_stream_data, 62 QPDF::optimize(std::map<int, int> const& object_stream_data,
63 bool allow_changes) 63 bool allow_changes)
64 { 64 {
  65 + optimize(object_stream_data, allow_changes, nullptr);
  66 +}
  67 +
  68 +void
  69 +QPDF::optimize(std::map<int, int> const& object_stream_data,
  70 + bool allow_changes,
  71 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  72 +{
65 if (! this->m->obj_user_to_objects.empty()) 73 if (! this->m->obj_user_to_objects.empty())
66 { 74 {
67 // already optimized 75 // already optimized
@@ -91,7 +99,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data, @@ -91,7 +99,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
91 for (int pageno = 0; pageno < n; ++pageno) 99 for (int pageno = 0; pageno < n; ++pageno)
92 { 100 {
93 updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), 101 updateObjectMaps(ObjUser(ObjUser::ou_page, pageno),
94 - this->m->all_pages.at(toS(pageno))); 102 + this->m->all_pages.at(toS(pageno)),
  103 + skip_stream_parameters);
95 } 104 }
96 105
97 // Traverse document-level items 106 // Traverse document-level items
@@ -107,7 +116,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data, @@ -107,7 +116,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
107 else 116 else
108 { 117 {
109 updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key), 118 updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key),
110 - this->m->trailer.getKey(key)); 119 + this->m->trailer.getKey(key),
  120 + skip_stream_parameters);
111 } 121 }
112 } 122 }
113 123
@@ -124,7 +134,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data, @@ -124,7 +134,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
124 134
125 std::string const& key = *iter; 135 std::string const& key = *iter;
126 updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), 136 updateObjectMaps(ObjUser(ObjUser::ou_root_key, key),
127 - root.getKey(key)); 137 + root.getKey(key),
  138 + skip_stream_parameters);
128 } 139 }
129 140
130 ObjUser root_ou = ObjUser(ObjUser::ou_root); 141 ObjUser root_ou = ObjUser(ObjUser::ou_root);
@@ -351,16 +362,20 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -351,16 +362,20 @@ QPDF::pushInheritedAttributesToPageInternal(
351 } 362 }
352 363
353 void 364 void
354 -QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh) 365 +QPDF::updateObjectMaps(
  366 + ObjUser const& ou, QPDFObjectHandle oh,
  367 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
355 { 368 {
356 std::set<QPDFObjGen> visited; 369 std::set<QPDFObjGen> visited;
357 - updateObjectMapsInternal(ou, oh, visited, true, 0); 370 + updateObjectMapsInternal(ou, oh, skip_stream_parameters, visited, true, 0);
358 } 371 }
359 372
360 void 373 void
361 -QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,  
362 - std::set<QPDFObjGen>& visited, bool top,  
363 - int depth) 374 +QPDF::updateObjectMapsInternal(
  375 + ObjUser const& ou, QPDFObjectHandle oh,
  376 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
  377 + std::set<QPDFObjGen>& visited, bool top,
  378 + int depth)
364 { 379 {
365 // Traverse the object tree from this point taking care to avoid 380 // Traverse the object tree from this point taking care to avoid
366 // crossing page boundaries. 381 // crossing page boundaries.
@@ -399,15 +414,22 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh, @@ -399,15 +414,22 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh,
399 for (int i = 0; i < n; ++i) 414 for (int i = 0; i < n; ++i)
400 { 415 {
401 updateObjectMapsInternal( 416 updateObjectMapsInternal(
402 - ou, oh.getArrayItem(i), visited, false, 1 + depth); 417 + ou, oh.getArrayItem(i), skip_stream_parameters,
  418 + visited, false, 1 + depth);
403 } 419 }
404 } 420 }
405 else if (oh.isDictionary() || oh.isStream()) 421 else if (oh.isDictionary() || oh.isStream())
406 { 422 {
407 QPDFObjectHandle dict = oh; 423 QPDFObjectHandle dict = oh;
408 - if (oh.isStream()) 424 + bool is_stream = oh.isStream();
  425 + int ssp = 0;
  426 + if (is_stream)
409 { 427 {
410 dict = oh.getDict(); 428 dict = oh.getDict();
  429 + if (skip_stream_parameters)
  430 + {
  431 + ssp = skip_stream_parameters(oh);
  432 + }
411 } 433 }
412 434
413 std::set<std::string> keys = dict.getKeys(); 435 std::set<std::string> keys = dict.getKeys();
@@ -421,16 +443,25 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh, @@ -421,16 +443,25 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh,
421 // case. 443 // case.
422 updateObjectMapsInternal( 444 updateObjectMapsInternal(
423 ObjUser(ObjUser::ou_thumb, ou.pageno), 445 ObjUser(ObjUser::ou_thumb, ou.pageno),
424 - dict.getKey(key), visited, false, 1 + depth); 446 + dict.getKey(key), skip_stream_parameters,
  447 + visited, false, 1 + depth);
425 } 448 }
426 else if (is_page_node && (key == "/Parent")) 449 else if (is_page_node && (key == "/Parent"))
427 { 450 {
428 // Don't traverse back up the page tree 451 // Don't traverse back up the page tree
429 } 452 }
  453 + else if (((ssp >= 1) && (key == "/Length")) ||
  454 + ((ssp >= 2) && ((key == "/Filter") ||
  455 + (key == "/DecodeParms"))))
  456 + {
  457 + // Don't traverse into stream parameters that we are
  458 + // not going to write.
  459 + }
430 else 460 else
431 { 461 {
432 updateObjectMapsInternal( 462 updateObjectMapsInternal(
433 - ou, dict.getKey(key), visited, false, 1 + depth); 463 + ou, dict.getKey(key), skip_stream_parameters,
  464 + visited, false, 1 + depth);
434 } 465 }
435 } 466 }
436 } 467 }