Commit 753b86f916a1bf1293cb3c5381d341ba871656e0

Authored by m-holger
1 parent b84375c4

Remove `QPDF_optimization.cc`: merge functionality into `QPDF_linearization.cc` …

…for improved structure and better code locality.
libqpdf/CMakeLists.txt
... ... @@ -93,7 +93,6 @@ set(libqpdf_SOURCES
93 93 QPDF_json.cc
94 94 QPDF_linearization.cc
95 95 QPDF_objects.cc
96   - QPDF_optimization.cc
97 96 QPDF_pages.cc
98 97 QTC.cc
99 98 QUtil.cc
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -69,6 +69,293 @@ load_vector_vector(
69 69 bit_stream.skipToNextByte();
70 70 }
71 71  
  72 +QPDF::ObjUser::ObjUser(user_e type) :
  73 + ou_type(type)
  74 +{
  75 + qpdf_expect(type == ou_root);
  76 +}
  77 +
  78 +QPDF::ObjUser::ObjUser(user_e type, size_t pageno) :
  79 + ou_type(type),
  80 + pageno(pageno)
  81 +{
  82 + qpdf_expect(type == ou_page || type == ou_thumb);
  83 +}
  84 +
  85 +QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
  86 + ou_type(type),
  87 + key(key)
  88 +{
  89 + qpdf_expect(type == ou_trailer_key || type == ou_root_key);
  90 +}
  91 +
  92 +bool
  93 +QPDF::ObjUser::operator<(ObjUser const& rhs) const
  94 +{
  95 + if (ou_type < rhs.ou_type) {
  96 + return true;
  97 + }
  98 + if (ou_type == rhs.ou_type) {
  99 + if (pageno < rhs.pageno) {
  100 + return true;
  101 + }
  102 + if (pageno == rhs.pageno) {
  103 + return key < rhs.key;
  104 + }
  105 + }
  106 + return false;
  107 +}
  108 +
  109 +QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame(
  110 + QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) :
  111 + ou(ou),
  112 + oh(oh),
  113 + top(top)
  114 +{
  115 +}
  116 +
  117 +void
  118 +QPDF::optimize(
  119 + std::map<int, int> const& object_stream_data,
  120 + bool allow_changes,
  121 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  122 +{
  123 + m->lin.optimize_internal(object_stream_data, allow_changes, skip_stream_parameters);
  124 +}
  125 +
  126 +void
  127 +Lin::optimize(
  128 + QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  129 +{
  130 + optimize_internal(obj, true, skip_stream_parameters);
  131 +}
  132 +
  133 +template <typename T>
  134 +void
  135 +Lin::optimize_internal(
  136 + T const& object_stream_data,
  137 + bool allow_changes,
  138 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  139 +{
  140 + if (!m->obj_user_to_objects.empty()) {
  141 + // already optimized
  142 + return;
  143 + }
  144 +
  145 + // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force
  146 + // it to be so if it exists and is direct. (This has been seen in the wild.)
  147 + QPDFObjectHandle root = qpdf.getRoot();
  148 + if (root.getKey("/Outlines").isDictionary()) {
  149 + QPDFObjectHandle outlines = root.getKey("/Outlines");
  150 + if (!outlines.isIndirect()) {
  151 + root.replaceKey("/Outlines", qpdf.makeIndirectObject(outlines));
  152 + }
  153 + }
  154 +
  155 + // Traverse pages tree pushing all inherited resources down to the page level. This also
  156 + // initializes m->all_pages.
  157 + m->pages.pushInheritedAttributesToPage(allow_changes, false);
  158 + // Traverse pages
  159 +
  160 + size_t n = 0;
  161 + for (auto const& page: m->pages) {
  162 + updateObjectMaps(ObjUser(ObjUser::ou_page, n), page, skip_stream_parameters);
  163 + ++n;
  164 + }
  165 +
  166 + // Traverse document-level items
  167 + for (auto const& [key, value]: m->trailer.as_dictionary()) {
  168 + if (key == "/Root") {
  169 + // handled separately
  170 + } else {
  171 + if (!value.null()) {
  172 + updateObjectMaps(
  173 + ObjUser(ObjUser::ou_trailer_key, key), value, skip_stream_parameters);
  174 + }
  175 + }
  176 + }
  177 +
  178 + for (auto const& [key, value]: root.as_dictionary()) {
  179 + // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but
  180 + // we are going to disregard that specification for now. There is loads of evidence that
  181 + // pdlin and Acrobat both disregard things like this from time to time, so this is almost
  182 + // certain not to cause any problems.
  183 + if (!value.null()) {
  184 + updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), value, skip_stream_parameters);
  185 + }
  186 + }
  187 +
  188 + ObjUser root_ou = ObjUser(ObjUser::ou_root);
  189 + auto root_og = QPDFObjGen(root.getObjGen());
  190 + m->obj_user_to_objects[root_ou].insert(root_og);
  191 + m->object_to_obj_users[root_og].insert(root_ou);
  192 +
  193 + filterCompressedObjects(object_stream_data);
  194 +}
  195 +
  196 +void
  197 +Lin::updateObjectMaps(
  198 + ObjUser const& first_ou,
  199 + QPDFObjectHandle first_oh,
  200 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  201 +{
  202 + QPDFObjGen::set visited;
  203 + std::vector<UpdateObjectMapsFrame> pending;
  204 + pending.emplace_back(first_ou, first_oh, true);
  205 + // Traverse the object tree from this point taking care to avoid crossing page boundaries.
  206 + std::unique_ptr<ObjUser> thumb_ou;
  207 + while (!pending.empty()) {
  208 + auto cur = pending.back();
  209 + pending.pop_back();
  210 +
  211 + bool is_page_node = false;
  212 +
  213 + if (cur.oh.isDictionaryOfType("/Page")) {
  214 + is_page_node = true;
  215 + if (!cur.top) {
  216 + continue;
  217 + }
  218 + }
  219 +
  220 + if (cur.oh.isIndirect()) {
  221 + QPDFObjGen og(cur.oh.getObjGen());
  222 + if (!visited.add(og)) {
  223 + QTC::TC("qpdf", "QPDF opt loop detected");
  224 + continue;
  225 + }
  226 + m->obj_user_to_objects[cur.ou].insert(og);
  227 + m->object_to_obj_users[og].insert(cur.ou);
  228 + }
  229 +
  230 + if (cur.oh.isArray()) {
  231 + for (auto const& item: cur.oh.as_array()) {
  232 + pending.emplace_back(cur.ou, item, false);
  233 + }
  234 + } else if (cur.oh.isDictionary() || cur.oh.isStream()) {
  235 + QPDFObjectHandle dict = cur.oh;
  236 + bool is_stream = cur.oh.isStream();
  237 + int ssp = 0;
  238 + if (is_stream) {
  239 + dict = cur.oh.getDict();
  240 + if (skip_stream_parameters) {
  241 + ssp = skip_stream_parameters(cur.oh);
  242 + }
  243 + }
  244 +
  245 + for (auto& [key, value]: dict.as_dictionary()) {
  246 + if (value.null()) {
  247 + continue;
  248 + }
  249 +
  250 + if (is_page_node && (key == "/Thumb")) {
  251 + // Traverse page thumbnail dictionaries as a special case. There can only ever
  252 + // be one /Thumb key on a page, and we see at most one page node per call.
  253 + thumb_ou = std::make_unique<ObjUser>(ObjUser::ou_thumb, cur.ou.pageno);
  254 + pending.emplace_back(*thumb_ou, dict.getKey(key), false);
  255 + } else if (is_page_node && (key == "/Parent")) {
  256 + // Don't traverse back up the page tree
  257 + } else if (
  258 + ((ssp >= 1) && (key == "/Length")) ||
  259 + ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) {
  260 + // Don't traverse into stream parameters that we are not going to write.
  261 + } else {
  262 + pending.emplace_back(cur.ou, value, false);
  263 + }
  264 + }
  265 + }
  266 + }
  267 +}
  268 +
  269 +void
  270 +Lin::filterCompressedObjects(std::map<int, int> const& object_stream_data)
  271 +{
  272 + if (object_stream_data.empty()) {
  273 + return;
  274 + }
  275 +
  276 + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
  277 + // objects. If something is a user of a compressed object, then it is really a user of the
  278 + // object stream that contains it.
  279 +
  280 + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
  281 + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
  282 +
  283 + for (auto const& i1: m->obj_user_to_objects) {
  284 + ObjUser const& ou = i1.first;
  285 + // Loop over objects.
  286 + for (auto const& og: i1.second) {
  287 + auto i2 = object_stream_data.find(og.getObj());
  288 + if (i2 == object_stream_data.end()) {
  289 + t_obj_user_to_objects[ou].insert(og);
  290 + } else {
  291 + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0));
  292 + }
  293 + }
  294 + }
  295 +
  296 + for (auto const& i1: m->object_to_obj_users) {
  297 + QPDFObjGen const& og = i1.first;
  298 + // Loop over obj_users.
  299 + for (auto const& ou: i1.second) {
  300 + auto i2 = object_stream_data.find(og.getObj());
  301 + if (i2 == object_stream_data.end()) {
  302 + t_object_to_obj_users[og].insert(ou);
  303 + } else {
  304 + t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou);
  305 + }
  306 + }
  307 + }
  308 +
  309 + m->obj_user_to_objects = t_obj_user_to_objects;
  310 + m->object_to_obj_users = t_object_to_obj_users;
  311 +}
  312 +
  313 +void
  314 +Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
  315 +{
  316 + if (obj.getStreamsEmpty()) {
  317 + return;
  318 + }
  319 +
  320 + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
  321 + // objects. If something is a user of a compressed object, then it is really a user of the
  322 + // object stream that contains it.
  323 +
  324 + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
  325 + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
  326 +
  327 + for (auto const& i1: m->obj_user_to_objects) {
  328 + ObjUser const& ou = i1.first;
  329 + // Loop over objects.
  330 + for (auto const& og: i1.second) {
  331 + if (obj.contains(og)) {
  332 + if (auto const& i2 = obj[og].object_stream; i2 <= 0) {
  333 + t_obj_user_to_objects[ou].insert(og);
  334 + } else {
  335 + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0));
  336 + }
  337 + }
  338 + }
  339 + }
  340 +
  341 + for (auto const& i1: m->object_to_obj_users) {
  342 + QPDFObjGen const& og = i1.first;
  343 + if (obj.contains(og)) {
  344 + // Loop over obj_users.
  345 + for (auto const& ou: i1.second) {
  346 + if (auto i2 = obj[og].object_stream; i2 <= 0) {
  347 + t_object_to_obj_users[og].insert(ou);
  348 + } else {
  349 + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou);
  350 + }
  351 + }
  352 + }
  353 + }
  354 +
  355 + m->obj_user_to_objects = t_obj_user_to_objects;
  356 + m->object_to_obj_users = t_object_to_obj_users;
  357 +}
  358 +
72 359 void
73 360 Lin::linearizationWarning(std::string_view msg)
74 361 {
... ...
libqpdf/QPDF_optimization.cc deleted
1   -// See the "Optimization" section of the manual.
2   -
3   -#include <qpdf/QPDF_private.hh>
4   -
5   -#include <qpdf/QPDFExc.hh>
6   -#include <qpdf/QPDFObjectHandle_private.hh>
7   -#include <qpdf/QPDFWriter_private.hh>
8   -#include <qpdf/QTC.hh>
9   -
10   -using Lin = QPDF::Doc::Linearization;
11   -using Pages = QPDF::Doc::Pages;
12   -
13   -QPDF::ObjUser::ObjUser(user_e type) :
14   - ou_type(type)
15   -{
16   - qpdf_assert_debug(type == ou_root);
17   -}
18   -
19   -QPDF::ObjUser::ObjUser(user_e type, size_t pageno) :
20   - ou_type(type),
21   - pageno(pageno)
22   -{
23   - qpdf_assert_debug((type == ou_page) || (type == ou_thumb));
24   -}
25   -
26   -QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
27   - ou_type(type),
28   - key(key)
29   -{
30   - qpdf_assert_debug((type == ou_trailer_key) || (type == ou_root_key));
31   -}
32   -
33   -bool
34   -QPDF::ObjUser::operator<(ObjUser const& rhs) const
35   -{
36   - if (ou_type < rhs.ou_type) {
37   - return true;
38   - }
39   - if (ou_type == rhs.ou_type) {
40   - if (pageno < rhs.pageno) {
41   - return true;
42   - }
43   - if (pageno == rhs.pageno) {
44   - return key < rhs.key;
45   - }
46   - }
47   - return false;
48   -}
49   -
50   -QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame(
51   - QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) :
52   - ou(ou),
53   - oh(oh),
54   - top(top)
55   -{
56   -}
57   -
58   -void
59   -QPDF::optimize(
60   - std::map<int, int> const& object_stream_data,
61   - bool allow_changes,
62   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
63   -{
64   - m->lin.optimize_internal(object_stream_data, allow_changes, skip_stream_parameters);
65   -}
66   -
67   -void
68   -Lin::optimize(
69   - QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
70   -{
71   - optimize_internal(obj, true, skip_stream_parameters);
72   -}
73   -
74   -template <typename T>
75   -void
76   -Lin::optimize_internal(
77   - T const& object_stream_data,
78   - bool allow_changes,
79   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
80   -{
81   - if (!m->obj_user_to_objects.empty()) {
82   - // already optimized
83   - return;
84   - }
85   -
86   - // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force
87   - // it to be so if it exists and is direct. (This has been seen in the wild.)
88   - QPDFObjectHandle root = qpdf.getRoot();
89   - if (root.getKey("/Outlines").isDictionary()) {
90   - QPDFObjectHandle outlines = root.getKey("/Outlines");
91   - if (!outlines.isIndirect()) {
92   - root.replaceKey("/Outlines", qpdf.makeIndirectObject(outlines));
93   - }
94   - }
95   -
96   - // Traverse pages tree pushing all inherited resources down to the page level. This also
97   - // initializes m->all_pages.
98   - m->pages.pushInheritedAttributesToPage(allow_changes, false);
99   - // Traverse pages
100   -
101   - size_t n = 0;
102   - for (auto const& page: m->pages) {
103   - updateObjectMaps(ObjUser(ObjUser::ou_page, n), page, skip_stream_parameters);
104   - ++n;
105   - }
106   -
107   - // Traverse document-level items
108   - for (auto const& [key, value]: m->trailer.as_dictionary()) {
109   - if (key == "/Root") {
110   - // handled separately
111   - } else {
112   - if (!value.null()) {
113   - updateObjectMaps(
114   - ObjUser(ObjUser::ou_trailer_key, key), value, skip_stream_parameters);
115   - }
116   - }
117   - }
118   -
119   - for (auto const& [key, value]: root.as_dictionary()) {
120   - // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but
121   - // we are going to disregard that specification for now. There is loads of evidence that
122   - // pdlin and Acrobat both disregard things like this from time to time, so this is almost
123   - // certain not to cause any problems.
124   - if (!value.null()) {
125   - updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), value, skip_stream_parameters);
126   - }
127   - }
128   -
129   - ObjUser root_ou = ObjUser(ObjUser::ou_root);
130   - auto root_og = QPDFObjGen(root.getObjGen());
131   - m->obj_user_to_objects[root_ou].insert(root_og);
132   - m->object_to_obj_users[root_og].insert(root_ou);
133   -
134   - filterCompressedObjects(object_stream_data);
135   -}
136   -
137   -void
138   -Lin::updateObjectMaps(
139   - ObjUser const& first_ou,
140   - QPDFObjectHandle first_oh,
141   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
142   -{
143   - QPDFObjGen::set visited;
144   - std::vector<UpdateObjectMapsFrame> pending;
145   - pending.emplace_back(first_ou, first_oh, true);
146   - // Traverse the object tree from this point taking care to avoid crossing page boundaries.
147   - std::unique_ptr<ObjUser> thumb_ou;
148   - while (!pending.empty()) {
149   - auto cur = pending.back();
150   - pending.pop_back();
151   -
152   - bool is_page_node = false;
153   -
154   - if (cur.oh.isDictionaryOfType("/Page")) {
155   - is_page_node = true;
156   - if (!cur.top) {
157   - continue;
158   - }
159   - }
160   -
161   - if (cur.oh.isIndirect()) {
162   - QPDFObjGen og(cur.oh.getObjGen());
163   - if (!visited.add(og)) {
164   - QTC::TC("qpdf", "QPDF opt loop detected");
165   - continue;
166   - }
167   - m->obj_user_to_objects[cur.ou].insert(og);
168   - m->object_to_obj_users[og].insert(cur.ou);
169   - }
170   -
171   - if (cur.oh.isArray()) {
172   - for (auto const& item: cur.oh.as_array()) {
173   - pending.emplace_back(cur.ou, item, false);
174   - }
175   - } else if (cur.oh.isDictionary() || cur.oh.isStream()) {
176   - QPDFObjectHandle dict = cur.oh;
177   - bool is_stream = cur.oh.isStream();
178   - int ssp = 0;
179   - if (is_stream) {
180   - dict = cur.oh.getDict();
181   - if (skip_stream_parameters) {
182   - ssp = skip_stream_parameters(cur.oh);
183   - }
184   - }
185   -
186   - for (auto& [key, value]: dict.as_dictionary()) {
187   - if (value.null()) {
188   - continue;
189   - }
190   -
191   - if (is_page_node && (key == "/Thumb")) {
192   - // Traverse page thumbnail dictionaries as a special case. There can only ever
193   - // be one /Thumb key on a page, and we see at most one page node per call.
194   - thumb_ou = std::make_unique<ObjUser>(ObjUser::ou_thumb, cur.ou.pageno);
195   - pending.emplace_back(*thumb_ou, dict.getKey(key), false);
196   - } else if (is_page_node && (key == "/Parent")) {
197   - // Don't traverse back up the page tree
198   - } else if (
199   - ((ssp >= 1) && (key == "/Length")) ||
200   - ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) {
201   - // Don't traverse into stream parameters that we are not going to write.
202   - } else {
203   - pending.emplace_back(cur.ou, value, false);
204   - }
205   - }
206   - }
207   - }
208   -}
209   -
210   -void
211   -Lin::filterCompressedObjects(std::map<int, int> const& object_stream_data)
212   -{
213   - if (object_stream_data.empty()) {
214   - return;
215   - }
216   -
217   - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
218   - // objects. If something is a user of a compressed object, then it is really a user of the
219   - // object stream that contains it.
220   -
221   - std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
222   - std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
223   -
224   - for (auto const& i1: m->obj_user_to_objects) {
225   - ObjUser const& ou = i1.first;
226   - // Loop over objects.
227   - for (auto const& og: i1.second) {
228   - auto i2 = object_stream_data.find(og.getObj());
229   - if (i2 == object_stream_data.end()) {
230   - t_obj_user_to_objects[ou].insert(og);
231   - } else {
232   - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0));
233   - }
234   - }
235   - }
236   -
237   - for (auto const& i1: m->object_to_obj_users) {
238   - QPDFObjGen const& og = i1.first;
239   - // Loop over obj_users.
240   - for (auto const& ou: i1.second) {
241   - auto i2 = object_stream_data.find(og.getObj());
242   - if (i2 == object_stream_data.end()) {
243   - t_object_to_obj_users[og].insert(ou);
244   - } else {
245   - t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou);
246   - }
247   - }
248   - }
249   -
250   - m->obj_user_to_objects = t_obj_user_to_objects;
251   - m->object_to_obj_users = t_object_to_obj_users;
252   -}
253   -
254   -void
255   -Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
256   -{
257   - if (obj.getStreamsEmpty()) {
258   - return;
259   - }
260   -
261   - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
262   - // objects. If something is a user of a compressed object, then it is really a user of the
263   - // object stream that contains it.
264   -
265   - std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
266   - std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
267   -
268   - for (auto const& i1: m->obj_user_to_objects) {
269   - ObjUser const& ou = i1.first;
270   - // Loop over objects.
271   - for (auto const& og: i1.second) {
272   - if (obj.contains(og)) {
273   - if (auto const& i2 = obj[og].object_stream; i2 <= 0) {
274   - t_obj_user_to_objects[ou].insert(og);
275   - } else {
276   - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0));
277   - }
278   - }
279   - }
280   - }
281   -
282   - for (auto const& i1: m->object_to_obj_users) {
283   - QPDFObjGen const& og = i1.first;
284   - if (obj.contains(og)) {
285   - // Loop over obj_users.
286   - for (auto const& ou: i1.second) {
287   - if (auto i2 = obj[og].object_stream; i2 <= 0) {
288   - t_object_to_obj_users[og].insert(ou);
289   - } else {
290   - t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou);
291   - }
292   - }
293   - }
294   - }
295   -
296   - m->obj_user_to_objects = t_obj_user_to_objects;
297   - m->object_to_obj_users = t_object_to_obj_users;
298   -}