Commit 7775aec33e55db7a9440b6bcf402c062df1ee967
1 parent
a1b646fc
Refactor QPDFWriter::preserveObjectStreams
Showing
4 changed files
with
118 additions
and
104 deletions
libqpdf/QPDF.cc
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1936,47 +1936,26 @@ void |
| 1936 | 1936 | QPDFWriter::preserveObjectStreams() |
| 1937 | 1937 | { |
| 1938 | 1938 | auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
| 1939 | - // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object | |
| 1940 | - // streams out of old objects that have generation numbers greater than zero. However in an | |
| 1941 | - // existing PDF, all object stream objects and all objects in them must have generation 0 | |
| 1942 | - // because the PDF spec does not provide any way to do otherwise. This code filters out objects | |
| 1943 | - // that are not allowed to be in object streams. In addition to removing objects that were | |
| 1944 | - // erroneously included in object streams in the source PDF, it also prevents unreferenced | |
| 1945 | - // objects from being included. | |
| 1946 | - auto end = xref.cend(); | |
| 1947 | - m->obj.streams_empty = true; | |
| 1939 | + m->obj.streams_empty = !xref.object_streams(); | |
| 1940 | + if (m->obj.streams_empty) { | |
| 1941 | + return; | |
| 1942 | + } | |
| 1943 | + // This code filters out objects that are not allowed to be in object streams. In addition to | |
| 1944 | + // removing objects that were erroneously included in object streams in the source PDF, it also | |
| 1945 | + // prevents unreferenced objects from being included. | |
| 1948 | 1946 | if (m->preserve_unreferenced_objects) { |
| 1949 | - for (auto iter = xref.cbegin(); iter != end; ++iter) { | |
| 1950 | - if (iter->second.getType() == 2) { | |
| 1951 | - // Pdf contains object streams. | |
| 1952 | - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); | |
| 1953 | - m->obj.streams_empty = false; | |
| 1954 | - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | |
| 1955 | - } | |
| 1947 | + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); | |
| 1948 | + for (auto [id, stream]: xref.compressed_objects()) { | |
| 1949 | + m->obj[id].object_stream = stream; | |
| 1956 | 1950 | } |
| 1957 | 1951 | } else { |
| 1958 | - // Start by scanning for first compressed object in case we don't have any object streams to | |
| 1959 | - // process. | |
| 1960 | - for (auto iter = xref.cbegin(); iter != end; ++iter) { | |
| 1961 | - if (iter->second.getType() == 2) { | |
| 1962 | - // Pdf contains object streams. | |
| 1963 | - QTC::TC("qpdf", "QPDFWriter preserve object streams"); | |
| 1964 | - m->obj.streams_empty = false; | |
| 1965 | - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | |
| 1966 | - // The object pointed to by iter may be a previous generation, in which case it is | |
| 1967 | - // removed by getCompressibleObjSet. We need to restart the loop (while the object | |
| 1968 | - // table may contain multiple generations of an object). | |
| 1969 | - for (iter = xref.cbegin(); iter != end; ++iter) { | |
| 1970 | - if (iter->second.getType() == 2) { | |
| 1971 | - auto id = static_cast<size_t>(iter->first.getObj()); | |
| 1972 | - if (id < eligible.size() && eligible[id]) { | |
| 1973 | - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | |
| 1974 | - } else { | |
| 1975 | - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | |
| 1976 | - } | |
| 1977 | - } | |
| 1978 | - } | |
| 1979 | - return; | |
| 1952 | + QTC::TC("qpdf", "QPDFWriter preserve object streams"); | |
| 1953 | + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | |
| 1954 | + for (auto [id, stream]: xref.compressed_objects()) { | |
| 1955 | + if (eligible[id]) { | |
| 1956 | + m->obj[id].object_stream = stream; | |
| 1957 | + } else { | |
| 1958 | + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | |
| 1980 | 1959 | } |
| 1981 | 1960 | } |
| 1982 | 1961 | } | ... | ... |
libqpdf/qpdf/ObjTable.hh
| ... | ... | @@ -46,6 +46,12 @@ class ObjTable: public std::vector<T> |
| 46 | 46 | } |
| 47 | 47 | |
| 48 | 48 | inline T const& |
| 49 | + operator[](unsigned int idx) const | |
| 50 | + { | |
| 51 | + return element(idx); | |
| 52 | + } | |
| 53 | + | |
| 54 | + inline T const& | |
| 49 | 55 | operator[](QPDFObjGen og) const |
| 50 | 56 | { |
| 51 | 57 | return element(static_cast<size_t>(og.getObj())); | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -112,6 +112,33 @@ class QPDF::Xref_table |
| 112 | 112 | return result; |
| 113 | 113 | } |
| 114 | 114 | |
| 115 | + bool | |
| 116 | + object_streams() const noexcept | |
| 117 | + { | |
| 118 | + return object_streams_; | |
| 119 | + } | |
| 120 | + | |
| 121 | + // Return a vector of object id and stream number for each compressed object. | |
| 122 | + std::vector<std::pair<unsigned int, int>> | |
| 123 | + compressed_objects() const | |
| 124 | + { | |
| 125 | + if (!initialized()) { | |
| 126 | + throw std::logic_error("Xref_table::compressed_objects called before parsing."); | |
| 127 | + } | |
| 128 | + | |
| 129 | + std::vector<std::pair<unsigned int, int>> result; | |
| 130 | + result.reserve(table.size()); | |
| 131 | + | |
| 132 | + unsigned int i{0}; | |
| 133 | + for (auto const& item: table) { | |
| 134 | + if (item.type() == 2) { | |
| 135 | + result.emplace_back(i, item.stream_number()); | |
| 136 | + } | |
| 137 | + ++i; | |
| 138 | + } | |
| 139 | + return result; | |
| 140 | + } | |
| 141 | + | |
| 115 | 142 | // Temporary access to underlying table size |
| 116 | 143 | size_t |
| 117 | 144 | size() const noexcept |
| ... | ... | @@ -282,6 +309,7 @@ class QPDF::Xref_table |
| 282 | 309 | bool initialized_{false}; |
| 283 | 310 | bool ignore_streams_{false}; |
| 284 | 311 | bool reconstructed_{false}; |
| 312 | + bool object_streams_{false}; | |
| 285 | 313 | // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids |
| 286 | 314 | // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the |
| 287 | 315 | // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref |
| ... | ... | @@ -293,72 +321,6 @@ class QPDF::Xref_table |
| 293 | 321 | qpdf_offset_t first_item_offset_{0}; // actual value from file |
| 294 | 322 | }; |
| 295 | 323 | |
| 296 | -// Writer class is restricted to QPDFWriter so that only it can call certain methods. | |
| 297 | -class QPDF::Writer | |
| 298 | -{ | |
| 299 | - friend class QPDFWriter; | |
| 300 | - | |
| 301 | - private: | |
| 302 | - static void | |
| 303 | - optimize( | |
| 304 | - QPDF& qpdf, | |
| 305 | - QPDFWriter::ObjTable const& obj, | |
| 306 | - std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | |
| 307 | - { | |
| 308 | - return qpdf.optimize(obj, skip_stream_parameters); | |
| 309 | - } | |
| 310 | - | |
| 311 | - static void | |
| 312 | - getLinearizedParts( | |
| 313 | - QPDF& qpdf, | |
| 314 | - QPDFWriter::ObjTable const& obj, | |
| 315 | - std::vector<QPDFObjectHandle>& part4, | |
| 316 | - std::vector<QPDFObjectHandle>& part6, | |
| 317 | - std::vector<QPDFObjectHandle>& part7, | |
| 318 | - std::vector<QPDFObjectHandle>& part8, | |
| 319 | - std::vector<QPDFObjectHandle>& part9) | |
| 320 | - { | |
| 321 | - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); | |
| 322 | - } | |
| 323 | - | |
| 324 | - static void | |
| 325 | - generateHintStream( | |
| 326 | - QPDF& qpdf, | |
| 327 | - QPDFWriter::NewObjTable const& new_obj, | |
| 328 | - QPDFWriter::ObjTable const& obj, | |
| 329 | - std::shared_ptr<Buffer>& hint_stream, | |
| 330 | - int& S, | |
| 331 | - int& O, | |
| 332 | - bool compressed) | |
| 333 | - { | |
| 334 | - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); | |
| 335 | - } | |
| 336 | - | |
| 337 | - static std::vector<QPDFObjGen> | |
| 338 | - getCompressibleObjGens(QPDF& qpdf) | |
| 339 | - { | |
| 340 | - return qpdf.getCompressibleObjVector(); | |
| 341 | - } | |
| 342 | - | |
| 343 | - static std::vector<bool> | |
| 344 | - getCompressibleObjSet(QPDF& qpdf) | |
| 345 | - { | |
| 346 | - return qpdf.getCompressibleObjSet(); | |
| 347 | - } | |
| 348 | - | |
| 349 | - static std::map<QPDFObjGen, QPDFXRefEntry> | |
| 350 | - getXRefTable(QPDF& qpdf) | |
| 351 | - { | |
| 352 | - return qpdf.getXRefTableInternal(); | |
| 353 | - } | |
| 354 | - | |
| 355 | - static size_t | |
| 356 | - tableSize(QPDF& qpdf) | |
| 357 | - { | |
| 358 | - return qpdf.tableSize(); | |
| 359 | - } | |
| 360 | -}; | |
| 361 | - | |
| 362 | 324 | // The Resolver class is restricted to QPDFObject so that only it can resolve indirect |
| 363 | 325 | // references. |
| 364 | 326 | class QPDF::Resolver |
| ... | ... | @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder |
| 841 | 803 | std::set<QPDFObjGen>::const_iterator iter; |
| 842 | 804 | }; |
| 843 | 805 | |
| 806 | +// Writer class is restricted to QPDFWriter so that only it can call certain methods. | |
| 807 | +class QPDF::Writer | |
| 808 | +{ | |
| 809 | + friend class QPDFWriter; | |
| 810 | + | |
| 811 | + private: | |
| 812 | + static void | |
| 813 | + optimize( | |
| 814 | + QPDF& qpdf, | |
| 815 | + QPDFWriter::ObjTable const& obj, | |
| 816 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | |
| 817 | + { | |
| 818 | + return qpdf.optimize(obj, skip_stream_parameters); | |
| 819 | + } | |
| 820 | + | |
| 821 | + static void | |
| 822 | + getLinearizedParts( | |
| 823 | + QPDF& qpdf, | |
| 824 | + QPDFWriter::ObjTable const& obj, | |
| 825 | + std::vector<QPDFObjectHandle>& part4, | |
| 826 | + std::vector<QPDFObjectHandle>& part6, | |
| 827 | + std::vector<QPDFObjectHandle>& part7, | |
| 828 | + std::vector<QPDFObjectHandle>& part8, | |
| 829 | + std::vector<QPDFObjectHandle>& part9) | |
| 830 | + { | |
| 831 | + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); | |
| 832 | + } | |
| 833 | + | |
| 834 | + static void | |
| 835 | + generateHintStream( | |
| 836 | + QPDF& qpdf, | |
| 837 | + QPDFWriter::NewObjTable const& new_obj, | |
| 838 | + QPDFWriter::ObjTable const& obj, | |
| 839 | + std::shared_ptr<Buffer>& hint_stream, | |
| 840 | + int& S, | |
| 841 | + int& O, | |
| 842 | + bool compressed) | |
| 843 | + { | |
| 844 | + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); | |
| 845 | + } | |
| 846 | + | |
| 847 | + static std::vector<QPDFObjGen> | |
| 848 | + getCompressibleObjGens(QPDF& qpdf) | |
| 849 | + { | |
| 850 | + return qpdf.getCompressibleObjVector(); | |
| 851 | + } | |
| 852 | + | |
| 853 | + static std::vector<bool> | |
| 854 | + getCompressibleObjSet(QPDF& qpdf) | |
| 855 | + { | |
| 856 | + return qpdf.getCompressibleObjSet(); | |
| 857 | + } | |
| 858 | + | |
| 859 | + static Xref_table const& | |
| 860 | + getXRefTable(QPDF& qpdf) | |
| 861 | + { | |
| 862 | + return qpdf.m->xref_table; | |
| 863 | + } | |
| 864 | + | |
| 865 | + static size_t | |
| 866 | + tableSize(QPDF& qpdf) | |
| 867 | + { | |
| 868 | + return qpdf.tableSize(); | |
| 869 | + } | |
| 870 | +}; | |
| 871 | + | |
| 844 | 872 | #endif // QPDF_PRIVATE_HH | ... | ... |