Commit 1a62cce94012e05e25e81dd7016766d9d039281d

Authored by Jay Berkenbilt
1 parent 09027344

Restructure optimize to allow skipping parameters of filtered streams

include/qpdf/QPDF.hh
... ... @@ -564,16 +564,28 @@ class QPDF
564 564 // QPDF_optimization.cc
565 565  
566 566 // The object_stream_data map maps from a "compressed" object to
567   - // the object stream that contains it. This enables optimize to
  567 + // the object stream that contains it. This enables optimize to
568 568 // populate the object <-> user maps with only uncompressed
569   - // objects. If allow_changes is false, an exception will be
570   - // thrown if any changes are made during the optimization process.
571   - // This is available so that the test suite can make sure that a
572   - // linearized file is already optimized. When called in this way,
573   - // optimize() still populates the object <-> user maps
  569 + // objects. If allow_changes is false, an exception will be thrown
  570 + // if any changes are made during the optimization process. This
  571 + // is available so that the test suite can make sure that a
  572 + // linearized file is already optimized. When called in this way,
  573 + // optimize() still populates the object <-> user maps. The
  574 + // optional skip_stream_parameters parameter, if present, is
  575 + // called for each stream object. The function should return 2 if
  576 + // optimization should discard /Length, /Filter, and /DecodeParms;
  577 + // 1 if it should discard /Length, and 0 if it should preserve all
  578 + // keys. This is used by QPDFWriter to avoid creation of dangling
  579 + // objects for stream dictionary keys it will be regenerating.
574 580 QPDF_DLL
575 581 void optimize(std::map<int, int> const& object_stream_data,
576 582 bool allow_changes = true);
  583 + // ABI: make function optional and merge overloaded versions
  584 + QPDF_DLL
  585 + void optimize(
  586 + std::map<int, int> const& object_stream_data,
  587 + bool allow_changes,
  588 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
577 589  
578 590 // Traverse page tree return all /Page objects. It also detects
579 591 // and resolves cases in which the same /Page object is
... ... @@ -1356,10 +1368,14 @@ class QPDF
1356 1368 std::vector<QPDFObjectHandle>& all_pages,
1357 1369 bool allow_changes, bool warn_skipped_keys,
1358 1370 std::set<QPDFObjGen>& visited);
1359   - void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh);
1360   - void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
1361   - std::set<QPDFObjGen>& visited, bool top,
1362   - int depth);
  1371 + void updateObjectMaps(
  1372 + ObjUser const& ou, QPDFObjectHandle oh,
  1373 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  1374 + void updateObjectMapsInternal(
  1375 + ObjUser const& ou, QPDFObjectHandle oh,
  1376 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
  1377 + std::set<QPDFObjGen>& visited, bool top,
  1378 + int depth);
1363 1379 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
1364 1380  
1365 1381 // Type conversion helper methods
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -62,6 +62,14 @@ void
62 62 QPDF::optimize(std::map<int, int> const& object_stream_data,
63 63 bool allow_changes)
64 64 {
  65 + optimize(object_stream_data, allow_changes, nullptr);
  66 +}
  67 +
  68 +void
  69 +QPDF::optimize(std::map<int, int> const& object_stream_data,
  70 + bool allow_changes,
  71 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  72 +{
65 73 if (! this->m->obj_user_to_objects.empty())
66 74 {
67 75 // already optimized
... ... @@ -91,7 +99,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
91 99 for (int pageno = 0; pageno < n; ++pageno)
92 100 {
93 101 updateObjectMaps(ObjUser(ObjUser::ou_page, pageno),
94   - this->m->all_pages.at(toS(pageno)));
  102 + this->m->all_pages.at(toS(pageno)),
  103 + skip_stream_parameters);
95 104 }
96 105  
97 106 // Traverse document-level items
... ... @@ -107,7 +116,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
107 116 else
108 117 {
109 118 updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key),
110   - this->m->trailer.getKey(key));
  119 + this->m->trailer.getKey(key),
  120 + skip_stream_parameters);
111 121 }
112 122 }
113 123  
... ... @@ -124,7 +134,8 @@ QPDF::optimize(std::map&lt;int, int&gt; const&amp; object_stream_data,
124 134  
125 135 std::string const& key = *iter;
126 136 updateObjectMaps(ObjUser(ObjUser::ou_root_key, key),
127   - root.getKey(key));
  137 + root.getKey(key),
  138 + skip_stream_parameters);
128 139 }
129 140  
130 141 ObjUser root_ou = ObjUser(ObjUser::ou_root);
... ... @@ -351,16 +362,20 @@ QPDF::pushInheritedAttributesToPageInternal(
351 362 }
352 363  
353 364 void
354   -QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh)
  365 +QPDF::updateObjectMaps(
  366 + ObjUser const& ou, QPDFObjectHandle oh,
  367 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
355 368 {
356 369 std::set<QPDFObjGen> visited;
357   - updateObjectMapsInternal(ou, oh, visited, true, 0);
  370 + updateObjectMapsInternal(ou, oh, skip_stream_parameters, visited, true, 0);
358 371 }
359 372  
360 373 void
361   -QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
362   - std::set<QPDFObjGen>& visited, bool top,
363   - int depth)
  374 +QPDF::updateObjectMapsInternal(
  375 + ObjUser const& ou, QPDFObjectHandle oh,
  376 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
  377 + std::set<QPDFObjGen>& visited, bool top,
  378 + int depth)
364 379 {
365 380 // Traverse the object tree from this point taking care to avoid
366 381 // crossing page boundaries.
... ... @@ -399,15 +414,22 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh,
399 414 for (int i = 0; i < n; ++i)
400 415 {
401 416 updateObjectMapsInternal(
402   - ou, oh.getArrayItem(i), visited, false, 1 + depth);
  417 + ou, oh.getArrayItem(i), skip_stream_parameters,
  418 + visited, false, 1 + depth);
403 419 }
404 420 }
405 421 else if (oh.isDictionary() || oh.isStream())
406 422 {
407 423 QPDFObjectHandle dict = oh;
408   - if (oh.isStream())
  424 + bool is_stream = oh.isStream();
  425 + int ssp = 0;
  426 + if (is_stream)
409 427 {
410 428 dict = oh.getDict();
  429 + if (skip_stream_parameters)
  430 + {
  431 + ssp = skip_stream_parameters(oh);
  432 + }
411 433 }
412 434  
413 435 std::set<std::string> keys = dict.getKeys();
... ... @@ -421,16 +443,25 @@ QPDF::updateObjectMapsInternal(ObjUser const&amp; ou, QPDFObjectHandle oh,
421 443 // case.
422 444 updateObjectMapsInternal(
423 445 ObjUser(ObjUser::ou_thumb, ou.pageno),
424   - dict.getKey(key), visited, false, 1 + depth);
  446 + dict.getKey(key), skip_stream_parameters,
  447 + visited, false, 1 + depth);
425 448 }
426 449 else if (is_page_node && (key == "/Parent"))
427 450 {
428 451 // Don't traverse back up the page tree
429 452 }
  453 + else if (((ssp >= 1) && (key == "/Length")) ||
  454 + ((ssp >= 2) && ((key == "/Filter") ||
  455 + (key == "/DecodeParms"))))
  456 + {
  457 + // Don't traverse into stream parameters that we are
  458 + // not going to write.
  459 + }
430 460 else
431 461 {
432 462 updateObjectMapsInternal(
433   - ou, dict.getKey(key), visited, false, 1 + depth);
  463 + ou, dict.getKey(key), skip_stream_parameters,
  464 + visited, false, 1 + depth);
434 465 }
435 466 }
436 467 }
... ...