Commit 7775aec33e55db7a9440b6bcf402c062df1ee967
1 parent
a1b646fc
Refactor QPDFWriter::preserveObjectStreams
Showing
4 changed files
with
118 additions
and
104 deletions
libqpdf/QPDF.cc
| @@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) | @@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) | ||
| 1365 | 1365 | ||
| 1366 | case 2: | 1366 | case 2: |
| 1367 | entry = {0, Compressed(toI(f1), f2)}; | 1367 | entry = {0, Compressed(toI(f1), f2)}; |
| 1368 | + object_streams_ = true; | ||
| 1368 | break; | 1369 | break; |
| 1369 | 1370 | ||
| 1370 | default: | 1371 | default: |
libqpdf/QPDFWriter.cc
| @@ -1936,47 +1936,26 @@ void | @@ -1936,47 +1936,26 @@ void | ||
| 1936 | QPDFWriter::preserveObjectStreams() | 1936 | QPDFWriter::preserveObjectStreams() |
| 1937 | { | 1937 | { |
| 1938 | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); | 1938 | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
| 1939 | - // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object | ||
| 1940 | - // streams out of old objects that have generation numbers greater than zero. However in an | ||
| 1941 | - // existing PDF, all object stream objects and all objects in them must have generation 0 | ||
| 1942 | - // because the PDF spec does not provide any way to do otherwise. This code filters out objects | ||
| 1943 | - // that are not allowed to be in object streams. In addition to removing objects that were | ||
| 1944 | - // erroneously included in object streams in the source PDF, it also prevents unreferenced | ||
| 1945 | - // objects from being included. | ||
| 1946 | - auto end = xref.cend(); | ||
| 1947 | - m->obj.streams_empty = true; | 1939 | + m->obj.streams_empty = !xref.object_streams(); |
| 1940 | + if (m->obj.streams_empty) { | ||
| 1941 | + return; | ||
| 1942 | + } | ||
| 1943 | + // This code filters out objects that are not allowed to be in object streams. In addition to | ||
| 1944 | + // removing objects that were erroneously included in object streams in the source PDF, it also | ||
| 1945 | + // prevents unreferenced objects from being included. | ||
| 1948 | if (m->preserve_unreferenced_objects) { | 1946 | if (m->preserve_unreferenced_objects) { |
| 1949 | - for (auto iter = xref.cbegin(); iter != end; ++iter) { | ||
| 1950 | - if (iter->second.getType() == 2) { | ||
| 1951 | - // Pdf contains object streams. | ||
| 1952 | - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); | ||
| 1953 | - m->obj.streams_empty = false; | ||
| 1954 | - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | ||
| 1955 | - } | 1947 | + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); |
| 1948 | + for (auto [id, stream]: xref.compressed_objects()) { | ||
| 1949 | + m->obj[id].object_stream = stream; | ||
| 1956 | } | 1950 | } |
| 1957 | } else { | 1951 | } else { |
| 1958 | - // Start by scanning for first compressed object in case we don't have any object streams to | ||
| 1959 | - // process. | ||
| 1960 | - for (auto iter = xref.cbegin(); iter != end; ++iter) { | ||
| 1961 | - if (iter->second.getType() == 2) { | ||
| 1962 | - // Pdf contains object streams. | ||
| 1963 | - QTC::TC("qpdf", "QPDFWriter preserve object streams"); | ||
| 1964 | - m->obj.streams_empty = false; | ||
| 1965 | - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | ||
| 1966 | - // The object pointed to by iter may be a previous generation, in which case it is | ||
| 1967 | - // removed by getCompressibleObjSet. We need to restart the loop (while the object | ||
| 1968 | - // table may contain multiple generations of an object). | ||
| 1969 | - for (iter = xref.cbegin(); iter != end; ++iter) { | ||
| 1970 | - if (iter->second.getType() == 2) { | ||
| 1971 | - auto id = static_cast<size_t>(iter->first.getObj()); | ||
| 1972 | - if (id < eligible.size() && eligible[id]) { | ||
| 1973 | - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | ||
| 1974 | - } else { | ||
| 1975 | - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | ||
| 1976 | - } | ||
| 1977 | - } | ||
| 1978 | - } | ||
| 1979 | - return; | 1952 | + QTC::TC("qpdf", "QPDFWriter preserve object streams"); |
| 1953 | + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | ||
| 1954 | + for (auto [id, stream]: xref.compressed_objects()) { | ||
| 1955 | + if (eligible[id]) { | ||
| 1956 | + m->obj[id].object_stream = stream; | ||
| 1957 | + } else { | ||
| 1958 | + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | ||
| 1980 | } | 1959 | } |
| 1981 | } | 1960 | } |
| 1982 | } | 1961 | } |
libqpdf/qpdf/ObjTable.hh
| @@ -46,6 +46,12 @@ class ObjTable: public std::vector<T> | @@ -46,6 +46,12 @@ class ObjTable: public std::vector<T> | ||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | inline T const& | 48 | inline T const& |
| 49 | + operator[](unsigned int idx) const | ||
| 50 | + { | ||
| 51 | + return element(idx); | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + inline T const& | ||
| 49 | operator[](QPDFObjGen og) const | 55 | operator[](QPDFObjGen og) const |
| 50 | { | 56 | { |
| 51 | return element(static_cast<size_t>(og.getObj())); | 57 | return element(static_cast<size_t>(og.getObj())); |
libqpdf/qpdf/QPDF_private.hh
| @@ -112,6 +112,33 @@ class QPDF::Xref_table | @@ -112,6 +112,33 @@ class QPDF::Xref_table | ||
| 112 | return result; | 112 | return result; |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | + bool | ||
| 116 | + object_streams() const noexcept | ||
| 117 | + { | ||
| 118 | + return object_streams_; | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + // Return a vector of object id and stream number for each compressed object. | ||
| 122 | + std::vector<std::pair<unsigned int, int>> | ||
| 123 | + compressed_objects() const | ||
| 124 | + { | ||
| 125 | + if (!initialized()) { | ||
| 126 | + throw std::logic_error("Xref_table::compressed_objects called before parsing."); | ||
| 127 | + } | ||
| 128 | + | ||
| 129 | + std::vector<std::pair<unsigned int, int>> result; | ||
| 130 | + result.reserve(table.size()); | ||
| 131 | + | ||
| 132 | + unsigned int i{0}; | ||
| 133 | + for (auto const& item: table) { | ||
| 134 | + if (item.type() == 2) { | ||
| 135 | + result.emplace_back(i, item.stream_number()); | ||
| 136 | + } | ||
| 137 | + ++i; | ||
| 138 | + } | ||
| 139 | + return result; | ||
| 140 | + } | ||
| 141 | + | ||
| 115 | // Temporary access to underlying table size | 142 | // Temporary access to underlying table size |
| 116 | size_t | 143 | size_t |
| 117 | size() const noexcept | 144 | size() const noexcept |
| @@ -282,6 +309,7 @@ class QPDF::Xref_table | @@ -282,6 +309,7 @@ class QPDF::Xref_table | ||
| 282 | bool initialized_{false}; | 309 | bool initialized_{false}; |
| 283 | bool ignore_streams_{false}; | 310 | bool ignore_streams_{false}; |
| 284 | bool reconstructed_{false}; | 311 | bool reconstructed_{false}; |
| 312 | + bool object_streams_{false}; | ||
| 285 | // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids | 313 | // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids |
| 286 | // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the | 314 | // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the |
| 287 | // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref | 315 | // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref |
| @@ -293,72 +321,6 @@ class QPDF::Xref_table | @@ -293,72 +321,6 @@ class QPDF::Xref_table | ||
| 293 | qpdf_offset_t first_item_offset_{0}; // actual value from file | 321 | qpdf_offset_t first_item_offset_{0}; // actual value from file |
| 294 | }; | 322 | }; |
| 295 | 323 | ||
| 296 | -// Writer class is restricted to QPDFWriter so that only it can call certain methods. | ||
| 297 | -class QPDF::Writer | ||
| 298 | -{ | ||
| 299 | - friend class QPDFWriter; | ||
| 300 | - | ||
| 301 | - private: | ||
| 302 | - static void | ||
| 303 | - optimize( | ||
| 304 | - QPDF& qpdf, | ||
| 305 | - QPDFWriter::ObjTable const& obj, | ||
| 306 | - std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | ||
| 307 | - { | ||
| 308 | - return qpdf.optimize(obj, skip_stream_parameters); | ||
| 309 | - } | ||
| 310 | - | ||
| 311 | - static void | ||
| 312 | - getLinearizedParts( | ||
| 313 | - QPDF& qpdf, | ||
| 314 | - QPDFWriter::ObjTable const& obj, | ||
| 315 | - std::vector<QPDFObjectHandle>& part4, | ||
| 316 | - std::vector<QPDFObjectHandle>& part6, | ||
| 317 | - std::vector<QPDFObjectHandle>& part7, | ||
| 318 | - std::vector<QPDFObjectHandle>& part8, | ||
| 319 | - std::vector<QPDFObjectHandle>& part9) | ||
| 320 | - { | ||
| 321 | - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); | ||
| 322 | - } | ||
| 323 | - | ||
| 324 | - static void | ||
| 325 | - generateHintStream( | ||
| 326 | - QPDF& qpdf, | ||
| 327 | - QPDFWriter::NewObjTable const& new_obj, | ||
| 328 | - QPDFWriter::ObjTable const& obj, | ||
| 329 | - std::shared_ptr<Buffer>& hint_stream, | ||
| 330 | - int& S, | ||
| 331 | - int& O, | ||
| 332 | - bool compressed) | ||
| 333 | - { | ||
| 334 | - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); | ||
| 335 | - } | ||
| 336 | - | ||
| 337 | - static std::vector<QPDFObjGen> | ||
| 338 | - getCompressibleObjGens(QPDF& qpdf) | ||
| 339 | - { | ||
| 340 | - return qpdf.getCompressibleObjVector(); | ||
| 341 | - } | ||
| 342 | - | ||
| 343 | - static std::vector<bool> | ||
| 344 | - getCompressibleObjSet(QPDF& qpdf) | ||
| 345 | - { | ||
| 346 | - return qpdf.getCompressibleObjSet(); | ||
| 347 | - } | ||
| 348 | - | ||
| 349 | - static std::map<QPDFObjGen, QPDFXRefEntry> | ||
| 350 | - getXRefTable(QPDF& qpdf) | ||
| 351 | - { | ||
| 352 | - return qpdf.getXRefTableInternal(); | ||
| 353 | - } | ||
| 354 | - | ||
| 355 | - static size_t | ||
| 356 | - tableSize(QPDF& qpdf) | ||
| 357 | - { | ||
| 358 | - return qpdf.tableSize(); | ||
| 359 | - } | ||
| 360 | -}; | ||
| 361 | - | ||
| 362 | // The Resolver class is restricted to QPDFObject so that only it can resolve indirect | 324 | // The Resolver class is restricted to QPDFObject so that only it can resolve indirect |
| 363 | // references. | 325 | // references. |
| 364 | class QPDF::Resolver | 326 | class QPDF::Resolver |
| @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder | @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder | ||
| 841 | std::set<QPDFObjGen>::const_iterator iter; | 803 | std::set<QPDFObjGen>::const_iterator iter; |
| 842 | }; | 804 | }; |
| 843 | 805 | ||
| 806 | +// Writer class is restricted to QPDFWriter so that only it can call certain methods. | ||
| 807 | +class QPDF::Writer | ||
| 808 | +{ | ||
| 809 | + friend class QPDFWriter; | ||
| 810 | + | ||
| 811 | + private: | ||
| 812 | + static void | ||
| 813 | + optimize( | ||
| 814 | + QPDF& qpdf, | ||
| 815 | + QPDFWriter::ObjTable const& obj, | ||
| 816 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | ||
| 817 | + { | ||
| 818 | + return qpdf.optimize(obj, skip_stream_parameters); | ||
| 819 | + } | ||
| 820 | + | ||
| 821 | + static void | ||
| 822 | + getLinearizedParts( | ||
| 823 | + QPDF& qpdf, | ||
| 824 | + QPDFWriter::ObjTable const& obj, | ||
| 825 | + std::vector<QPDFObjectHandle>& part4, | ||
| 826 | + std::vector<QPDFObjectHandle>& part6, | ||
| 827 | + std::vector<QPDFObjectHandle>& part7, | ||
| 828 | + std::vector<QPDFObjectHandle>& part8, | ||
| 829 | + std::vector<QPDFObjectHandle>& part9) | ||
| 830 | + { | ||
| 831 | + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); | ||
| 832 | + } | ||
| 833 | + | ||
| 834 | + static void | ||
| 835 | + generateHintStream( | ||
| 836 | + QPDF& qpdf, | ||
| 837 | + QPDFWriter::NewObjTable const& new_obj, | ||
| 838 | + QPDFWriter::ObjTable const& obj, | ||
| 839 | + std::shared_ptr<Buffer>& hint_stream, | ||
| 840 | + int& S, | ||
| 841 | + int& O, | ||
| 842 | + bool compressed) | ||
| 843 | + { | ||
| 844 | + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); | ||
| 845 | + } | ||
| 846 | + | ||
| 847 | + static std::vector<QPDFObjGen> | ||
| 848 | + getCompressibleObjGens(QPDF& qpdf) | ||
| 849 | + { | ||
| 850 | + return qpdf.getCompressibleObjVector(); | ||
| 851 | + } | ||
| 852 | + | ||
| 853 | + static std::vector<bool> | ||
| 854 | + getCompressibleObjSet(QPDF& qpdf) | ||
| 855 | + { | ||
| 856 | + return qpdf.getCompressibleObjSet(); | ||
| 857 | + } | ||
| 858 | + | ||
| 859 | + static Xref_table const& | ||
| 860 | + getXRefTable(QPDF& qpdf) | ||
| 861 | + { | ||
| 862 | + return qpdf.m->xref_table; | ||
| 863 | + } | ||
| 864 | + | ||
| 865 | + static size_t | ||
| 866 | + tableSize(QPDF& qpdf) | ||
| 867 | + { | ||
| 868 | + return qpdf.tableSize(); | ||
| 869 | + } | ||
| 870 | +}; | ||
| 871 | + | ||
| 844 | #endif // QPDF_PRIVATE_HH | 872 | #endif // QPDF_PRIVATE_HH |