Commit 0c591c103960ebfbd77eebd0567e5529d2ded70a

Authored by m-holger
1 parent 0c8af4a3

Refactor `Doc` class: reorganize structure, move implementations, add inline fun…

…ctions and improve encapsulation.
libqpdf/QPDF.cc
... ... @@ -126,10 +126,10 @@ QPDF::QPDFVersion()
126 126 }
127 127  
128 128 QPDF::Members::Members(QPDF& qpdf) :
129   - doc(qpdf, *this),
130   - lin(doc.linearization()),
131   - objects(doc.objects()),
132   - pages(doc.pages()),
  129 + Doc(qpdf, this),
  130 + lin(qpdf, this),
  131 + objects(qpdf, this),
  132 + pages(qpdf, this),
133 133 log(QPDFLogger::defaultLogger()),
134 134 file(std::make_shared<InvalidInputSource>()),
135 135 encp(std::make_shared<EncryptionParameters>())
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -295,590 +295,570 @@ class QPDF::PatternFinder final: public InputSource::Finder
295 295 class QPDF::Doc
296 296 {
297 297 public:
  298 + class Encryption;
298 299 class JobSetter;
  300 + class Linearization;
  301 + class Objects;
  302 + class Pages;
299 303 class ParseGuard;
300 304 class Resolver;
301 305 class Writer;
302 306  
303   - class Encryption
  307 + Doc() = delete;
  308 + Doc(Doc const&) = delete;
  309 + Doc(Doc&&) = delete;
  310 + Doc& operator=(Doc const&) = delete;
  311 + Doc& operator=(Doc&&) = delete;
  312 + ~Doc() = default;
  313 +
  314 + Doc(QPDF& qpdf, QPDF::Members* m) :
  315 + qpdf(qpdf),
  316 + m(m)
304 317 {
305   - public:
306   - // This class holds data read from the encryption dictionary.
307   - Encryption(
308   - int V,
309   - int R,
310   - int Length_bytes,
311   - int P,
312   - std::string const& O,
313   - std::string const& U,
314   - std::string const& OE,
315   - std::string const& UE,
316   - std::string const& Perms,
317   - std::string const& id1,
318   - bool encrypt_metadata) :
319   - V(V),
320   - R(R),
321   - Length_bytes(Length_bytes),
322   - P(static_cast<unsigned long long>(P)),
323   - O(O),
324   - U(U),
325   - OE(OE),
326   - UE(UE),
327   - Perms(Perms),
328   - id1(id1),
329   - encrypt_metadata(encrypt_metadata)
330   - {
  318 + }
  319 +
  320 + inline Linearization& linearization();
  321 +
  322 + inline Objects& objects();
  323 +
  324 + inline Pages& pages();
  325 +
  326 + bool reconstructed_xref() const;
  327 +
  328 + QPDFAcroFormDocumentHelper&
  329 + acroform()
  330 + {
  331 + if (!acroform_) {
  332 + acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);
331 333 }
332   - Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) :
333   - V(V),
334   - R(R),
335   - Length_bytes(Length_bytes),
336   - encrypt_metadata(encrypt_metadata)
337   - {
  334 + return *acroform_;
  335 + }
  336 +
  337 + QPDFEmbeddedFileDocumentHelper&
  338 + embedded_files()
  339 + {
  340 + if (!embedded_files_) {
  341 + embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);
338 342 }
  343 + return *embedded_files_;
  344 + }
339 345  
340   - int getV() const;
341   - int getR() const;
342   - int getLengthBytes() const;
343   - int getP() const;
344   - // Bits in P are numbered from 1 as in the PDF spec.
345   - bool getP(size_t bit) const;
346   - std::string const& getO() const;
347   - std::string const& getU() const;
348   - std::string const& getOE() const;
349   - std::string const& getUE() const;
350   - std::string const& getPerms() const;
351   - std::string const& getId1() const;
352   - bool getEncryptMetadata() const;
353   - // Bits in P are numbered from 1 as in the PDF spec.
354   - void setP(size_t bit, bool val);
355   - void setP(unsigned long val);
356   - void setO(std::string const&);
357   - void setU(std::string const&);
358   - void setId1(std::string const& val);
359   - void setV5EncryptionParameters(
360   - std::string const& O,
361   - std::string const& OE,
362   - std::string const& U,
363   - std::string const& UE,
364   - std::string const& Perms);
365   -
366   - std::string compute_encryption_key(std::string const& password) const;
367   -
368   - bool
369   - check_owner_password(std::string& user_password, std::string const& owner_password) const;
370   -
371   - bool check_user_password(std::string const& user_password) const;
372   -
373   - std::string
374   - recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const;
375   -
376   - void compute_encryption_O_U(char const* user_password, char const* owner_password);
377   -
378   - std::string
379   - compute_encryption_parameters_V5(char const* user_password, char const* owner_password);
380   -
381   - std::string compute_parameters(char const* user_password, char const* owner_password);
  346 + QPDFOutlineDocumentHelper&
  347 + outlines()
  348 + {
  349 + if (!outlines_) {
  350 + outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);
  351 + }
  352 + return *outlines_;
  353 + }
382 354  
383   - private:
384   - static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest)
385   -
386   - Encryption(Encryption const&) = delete;
387   - Encryption& operator=(Encryption const&) = delete;
388   -
389   - std::string hash_V5(
390   - std::string const& password, std::string const& salt, std::string const& udata) const;
391   - std::string
392   - compute_O_value(std::string const& user_password, std::string const& owner_password) const;
393   - std::string compute_U_value(std::string const& user_password) const;
394   - std::string compute_encryption_key_from_password(std::string const& password) const;
395   - std::string recover_encryption_key_with_password(std::string const& password) const;
396   - bool check_owner_password_V4(
397   - std::string& user_password, std::string const& owner_password) const;
398   - bool check_owner_password_V5(std::string const& owner_passworda) const;
399   - std::string compute_Perms_value_V5_clear() const;
400   - std::string compute_O_rc4_key(
401   - std::string const& user_password, std::string const& owner_password) const;
402   - std::string compute_U_value_R2(std::string const& user_password) const;
403   - std::string compute_U_value_R3(std::string const& user_password) const;
404   - bool check_user_password_V4(std::string const& user_password) const;
405   - bool check_user_password_V5(std::string const& user_password) const;
406   -
407   - int V;
408   - int R;
409   - int Length_bytes;
410   - std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared.
411   - std::string O;
412   - std::string U;
413   - std::string OE;
414   - std::string UE;
415   - std::string Perms;
416   - std::string id1;
417   - bool encrypt_metadata;
418   - }; // class QPDF::Doc::Encryption
419   -
420   - class Linearization
  355 + QPDFPageDocumentHelper&
  356 + page_dh()
421 357 {
422   - public:
423   - Linearization() = delete;
424   - Linearization(Linearization const&) = delete;
425   - Linearization(Linearization&&) = delete;
426   - Linearization& operator=(Linearization const&) = delete;
427   - Linearization& operator=(Linearization&&) = delete;
428   - ~Linearization() = default;
429   -
430   - Linearization(QPDF& qpdf, QPDF::Members* m) :
431   - qpdf(qpdf),
432   - m(m)
433   - {
  358 + if (!page_dh_) {
  359 + page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);
434 360 }
  361 + return *page_dh_;
  362 + }
435 363  
436   - // For QPDFWriter:
437   -
438   - template <typename T>
439   - void optimize_internal(
440   - T const& object_stream_data,
441   - bool allow_changes = true,
442   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
443   - void optimize(
444   - QPDFWriter::ObjTable const& obj,
445   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
446   -
447   - // Get lists of all objects in order according to the part of a linearized file that they
448   - // belong to.
449   - void getLinearizedParts(
450   - QPDFWriter::ObjTable const& obj,
451   - std::vector<QPDFObjectHandle>& part4,
452   - std::vector<QPDFObjectHandle>& part6,
453   - std::vector<QPDFObjectHandle>& part7,
454   - std::vector<QPDFObjectHandle>& part8,
455   - std::vector<QPDFObjectHandle>& part9);
456   -
457   - void generateHintStream(
458   - QPDFWriter::NewObjTable const& new_obj,
459   - QPDFWriter::ObjTable const& obj,
460   - std::string& hint_stream,
461   - int& S,
462   - int& O,
463   - bool compressed);
464   -
465   - // methods to support linearization checking -- implemented in QPDF_linearization.cc
466   -
467   - void readLinearizationData();
468   - void checkLinearizationInternal();
469   - void dumpLinearizationDataInternal();
470   - void linearizationWarning(std::string_view);
471   - qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length);
472   - void readHPageOffset(BitStream);
473   - void readHSharedObject(BitStream);
474   - void readHGeneric(BitStream, HGeneric&);
475   - qpdf_offset_t maxEnd(ObjUser const& ou);
476   - qpdf_offset_t getLinearizationOffset(QPDFObjGen);
477   - QPDFObjectHandle
478   - getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
479   - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
480   - int lengthNextN(int first_object, int n);
481   - void checkHPageOffset(
482   - std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
483   - void checkHSharedObject(
484   - std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
485   - void checkHOutlines();
486   - void dumpHPageOffset();
487   - void dumpHSharedObject();
488   - void dumpHGeneric(HGeneric&);
489   - qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
490   - template <typename T>
491   - void calculateLinearizationData(T const& object_stream_data);
492   - template <typename T>
493   - void pushOutlinesToPart(
494   - std::vector<QPDFObjectHandle>& part,
495   - std::set<QPDFObjGen>& lc_outlines,
496   - T const& object_stream_data);
497   - int outputLengthNextN(
498   - int in_object,
499   - int n,
500   - QPDFWriter::NewObjTable const& new_obj,
501   - QPDFWriter::ObjTable const& obj);
502   - void calculateHPageOffset(
503   - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
504   - void calculateHSharedObject(
505   - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
506   - void
507   - calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
508   - void writeHPageOffset(BitWriter&);
509   - void writeHSharedObject(BitWriter&);
510   - void writeHGeneric(BitWriter&, HGeneric&);
511   -
512   - // Methods to support optimization
513   -
514   - void updateObjectMaps(
515   - ObjUser const& ou,
516   - QPDFObjectHandle oh,
517   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
518   - void filterCompressedObjects(std::map<int, int> const& object_stream_data);
519   - void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
  364 + QPDFPageLabelDocumentHelper&
  365 + page_labels()
  366 + {
  367 + if (!page_labels_) {
  368 + page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);
  369 + }
  370 + return *page_labels_;
  371 + }
520 372  
521   - private:
522   - QPDF& qpdf;
523   - QPDF::Members* m;
524   - };
  373 + private:
  374 + QPDF& qpdf;
  375 + QPDF::Members* m;
525 376  
526   - class Objects
  377 + // Document Helpers;
  378 + std::unique_ptr<QPDFAcroFormDocumentHelper> acroform_;
  379 + std::unique_ptr<QPDFEmbeddedFileDocumentHelper> embedded_files_;
  380 + std::unique_ptr<QPDFOutlineDocumentHelper> outlines_;
  381 + std::unique_ptr<QPDFPageDocumentHelper> page_dh_;
  382 + std::unique_ptr<QPDFPageLabelDocumentHelper> page_labels_;
  383 +};
  384 +
  385 +class QPDF::Doc::Encryption
  386 +{
  387 + public:
  388 + // This class holds data read from the encryption dictionary.
  389 + Encryption(
  390 + int V,
  391 + int R,
  392 + int Length_bytes,
  393 + int P,
  394 + std::string const& O,
  395 + std::string const& U,
  396 + std::string const& OE,
  397 + std::string const& UE,
  398 + std::string const& Perms,
  399 + std::string const& id1,
  400 + bool encrypt_metadata) :
  401 + V(V),
  402 + R(R),
  403 + Length_bytes(Length_bytes),
  404 + P(static_cast<unsigned long long>(P)),
  405 + O(O),
  406 + U(U),
  407 + OE(OE),
  408 + UE(UE),
  409 + Perms(Perms),
  410 + id1(id1),
  411 + encrypt_metadata(encrypt_metadata)
527 412 {
528   - public:
529   - class Foreign
530   - {
531   - class Copier
532   - {
533   - public:
534   - Copier(QPDF& qpdf) :
535   - qpdf(qpdf)
536   - {
537   - }
  413 + }
  414 + Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) :
  415 + V(V),
  416 + R(R),
  417 + Length_bytes(Length_bytes),
  418 + encrypt_metadata(encrypt_metadata)
  419 + {
  420 + }
538 421  
539   - QPDFObjectHandle copied(QPDFObjectHandle const& foreign);
  422 + int getV() const;
  423 + int getR() const;
  424 + int getLengthBytes() const;
  425 + int getP() const;
  426 + // Bits in P are numbered from 1 as in the PDF spec.
  427 + bool getP(size_t bit) const;
  428 + std::string const& getO() const;
  429 + std::string const& getU() const;
  430 + std::string const& getOE() const;
  431 + std::string const& getUE() const;
  432 + std::string const& getPerms() const;
  433 + std::string const& getId1() const;
  434 + bool getEncryptMetadata() const;
  435 + // Bits in P are numbered from 1 as in the PDF spec.
  436 + void setP(size_t bit, bool val);
  437 + void setP(unsigned long val);
  438 + void setO(std::string const&);
  439 + void setU(std::string const&);
  440 + void setId1(std::string const& val);
  441 + void setV5EncryptionParameters(
  442 + std::string const& O,
  443 + std::string const& OE,
  444 + std::string const& U,
  445 + std::string const& UE,
  446 + std::string const& Perms);
540 447  
541   - private:
542   - QPDFObjectHandle
543   - replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false);
544   - void reserve_objects(QPDFObjectHandle const& foreign, bool top = false);
  448 + std::string compute_encryption_key(std::string const& password) const;
545 449  
546   - QPDF& qpdf;
547   - std::map<QPDFObjGen, QPDFObjectHandle> object_map;
548   - std::vector<QPDFObjectHandle> to_copy;
549   - QPDFObjGen::set visiting;
550   - };
  450 + bool check_owner_password(std::string& user_password, std::string const& owner_password) const;
551 451  
552   - public:
553   - Foreign(QPDF& qpdf) :
554   - qpdf(qpdf)
555   - {
556   - }
  452 + bool check_user_password(std::string const& user_password) const;
557 453  
558   - Foreign() = delete;
559   - Foreign(Foreign const&) = delete;
560   - Foreign(Foreign&&) = delete;
561   - Foreign& operator=(Foreign const&) = delete;
562   - Foreign& operator=(Foreign&&) = delete;
563   - ~Foreign() = default;
  454 + std::string
  455 + recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const;
564 456  
565   - // Return a local handle to the foreign object. Copy the foreign object if necessary.
566   - QPDFObjectHandle
567   - copied(QPDFObjectHandle const& foreign)
568   - {
569   - return copier(foreign).copied(foreign);
570   - }
  457 + void compute_encryption_O_U(char const* user_password, char const* owner_password);
571 458  
572   - private:
573   - Copier& copier(QPDFObjectHandle const& foreign);
  459 + std::string
  460 + compute_encryption_parameters_V5(char const* user_password, char const* owner_password);
574 461  
575   - QPDF& qpdf;
576   - std::map<unsigned long long, Copier> copiers;
577   - }; // class QPDF::Doc::Objects::Foreign
  462 + std::string compute_parameters(char const* user_password, char const* owner_password);
578 463  
579   - class Streams
580   - {
581   - // Copier manages the copying of streams into this PDF. It is used both for copying
582   - // local and foreign streams.
583   - class Copier;
  464 + private:
  465 + static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest)
  466 +
  467 + Encryption(Encryption const&) = delete;
  468 + Encryption& operator=(Encryption const&) = delete;
  469 +
  470 + std::string
  471 + hash_V5(std::string const& password, std::string const& salt, std::string const& udata) const;
  472 + std::string
  473 + compute_O_value(std::string const& user_password, std::string const& owner_password) const;
  474 + std::string compute_U_value(std::string const& user_password) const;
  475 + std::string compute_encryption_key_from_password(std::string const& password) const;
  476 + std::string recover_encryption_key_with_password(std::string const& password) const;
  477 + bool
  478 + check_owner_password_V4(std::string& user_password, std::string const& owner_password) const;
  479 + bool check_owner_password_V5(std::string const& owner_passworda) const;
  480 + std::string compute_Perms_value_V5_clear() const;
  481 + std::string
  482 + compute_O_rc4_key(std::string const& user_password, std::string const& owner_password) const;
  483 + std::string compute_U_value_R2(std::string const& user_password) const;
  484 + std::string compute_U_value_R3(std::string const& user_password) const;
  485 + bool check_user_password_V4(std::string const& user_password) const;
  486 + bool check_user_password_V5(std::string const& user_password) const;
  487 +
  488 + int V;
  489 + int R;
  490 + int Length_bytes;
  491 + std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared.
  492 + std::string O;
  493 + std::string U;
  494 + std::string OE;
  495 + std::string UE;
  496 + std::string Perms;
  497 + std::string id1;
  498 + bool encrypt_metadata;
  499 +}; // class QPDF::Doc::Encryption
  500 +
  501 +class QPDF::Doc::Linearization
  502 +{
  503 + public:
  504 + Linearization() = delete;
  505 + Linearization(Linearization const&) = delete;
  506 + Linearization(Linearization&&) = delete;
  507 + Linearization& operator=(Linearization const&) = delete;
  508 + Linearization& operator=(Linearization&&) = delete;
  509 + ~Linearization() = default;
  510 +
  511 + Linearization(QPDF& qpdf, QPDF::Members* m) :
  512 + qpdf(qpdf),
  513 + m(m)
  514 + {
  515 + }
584 516  
585   - public:
586   - Streams(QPDF& qpdf);
  517 + // For QPDFWriter:
  518 +
  519 + template <typename T>
  520 + void optimize_internal(
  521 + T const& object_stream_data,
  522 + bool allow_changes = true,
  523 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
  524 + void optimize(
  525 + QPDFWriter::ObjTable const& obj,
  526 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  527 +
  528 + // Get lists of all objects in order according to the part of a linearized file that they
  529 + // belong to.
  530 + void getLinearizedParts(
  531 + QPDFWriter::ObjTable const& obj,
  532 + std::vector<QPDFObjectHandle>& part4,
  533 + std::vector<QPDFObjectHandle>& part6,
  534 + std::vector<QPDFObjectHandle>& part7,
  535 + std::vector<QPDFObjectHandle>& part8,
  536 + std::vector<QPDFObjectHandle>& part9);
  537 +
  538 + void generateHintStream(
  539 + QPDFWriter::NewObjTable const& new_obj,
  540 + QPDFWriter::ObjTable const& obj,
  541 + std::string& hint_stream,
  542 + int& S,
  543 + int& O,
  544 + bool compressed);
  545 +
  546 + // methods to support linearization checking -- implemented in QPDF_linearization.cc
  547 +
  548 + void readLinearizationData();
  549 + void checkLinearizationInternal();
  550 + void dumpLinearizationDataInternal();
  551 + void linearizationWarning(std::string_view);
  552 + qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length);
  553 + void readHPageOffset(BitStream);
  554 + void readHSharedObject(BitStream);
  555 + void readHGeneric(BitStream, HGeneric&);
  556 + qpdf_offset_t maxEnd(ObjUser const& ou);
  557 + qpdf_offset_t getLinearizationOffset(QPDFObjGen);
  558 + QPDFObjectHandle
  559 + getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
  560 + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
  561 + int lengthNextN(int first_object, int n);
  562 + void
  563 + checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
  564 + void
  565 + checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
  566 + void checkHOutlines();
  567 + void dumpHPageOffset();
  568 + void dumpHSharedObject();
  569 + void dumpHGeneric(HGeneric&);
  570 + qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
  571 + template <typename T>
  572 + void calculateLinearizationData(T const& object_stream_data);
  573 + template <typename T>
  574 + void pushOutlinesToPart(
  575 + std::vector<QPDFObjectHandle>& part,
  576 + std::set<QPDFObjGen>& lc_outlines,
  577 + T const& object_stream_data);
  578 + int outputLengthNextN(
  579 + int in_object,
  580 + int n,
  581 + QPDFWriter::NewObjTable const& new_obj,
  582 + QPDFWriter::ObjTable const& obj);
  583 + void
  584 + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  585 + void
  586 + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  587 + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  588 + void writeHPageOffset(BitWriter&);
  589 + void writeHSharedObject(BitWriter&);
  590 + void writeHGeneric(BitWriter&, HGeneric&);
587 591  
588   - Streams() = delete;
589   - Streams(Streams const&) = delete;
590   - Streams(Streams&&) = delete;
591   - Streams& operator=(Streams const&) = delete;
592   - Streams& operator=(Streams&&) = delete;
593   - ~Streams() = default;
  592 + // Methods to support optimization
594 593  
595   - public:
596   - static bool
597   - pipeStreamData(
598   - QPDF* qpdf,
599   - QPDFObjGen og,
600   - qpdf_offset_t offset,
601   - size_t length,
602   - QPDFObjectHandle dict,
603   - bool is_root_metadata,
604   - Pipeline* pipeline,
605   - bool suppress_warnings,
606   - bool will_retry)
607   - {
608   - return qpdf->pipeStreamData(
609   - og,
610   - offset,
611   - length,
612   - dict,
613   - is_root_metadata,
614   - pipeline,
615   - suppress_warnings,
616   - will_retry);
617   - }
  594 + void updateObjectMaps(
  595 + ObjUser const& ou,
  596 + QPDFObjectHandle oh,
  597 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  598 + void filterCompressedObjects(std::map<int, int> const& object_stream_data);
  599 + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
618 600  
619   - QPDF&
620   - qpdf() const
621   - {
622   - return qpdf_;
623   - }
  601 + private:
  602 + QPDF& qpdf;
  603 + QPDF::Members* m;
  604 +};
624 605  
625   - std::shared_ptr<Copier>&
626   - copier()
  606 +class QPDF::Doc::Objects
  607 +{
  608 + public:
  609 + class Foreign
  610 + {
  611 + class Copier
  612 + {
  613 + public:
  614 + Copier(QPDF& qpdf) :
  615 + qpdf(qpdf)
627 616 {
628   - return copier_;
629 617 }
630 618  
631   - bool immediate_copy_from() const;
  619 + QPDFObjectHandle copied(QPDFObjectHandle const& foreign);
632 620  
633 621 private:
634   - QPDF& qpdf_;
  622 + QPDFObjectHandle
  623 + replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false);
  624 + void reserve_objects(QPDFObjectHandle const& foreign, bool top = false);
635 625  
636   - std::shared_ptr<Copier> copier_;
637   - }; // class QPDF::Doc::Objects::Streams
  626 + QPDF& qpdf;
  627 + std::map<QPDFObjGen, QPDFObjectHandle> object_map;
  628 + std::vector<QPDFObjectHandle> to_copy;
  629 + QPDFObjGen::set visiting;
  630 + };
638 631  
639 632 public:
640   - Objects() = delete;
641   - Objects(Objects const&) = delete;
642   - Objects(Objects&&) = delete;
643   - Objects& operator=(Objects const&) = delete;
644   - Objects& operator=(Objects&&) = delete;
645   - ~Objects() = default;
646   -
647   - Objects(QPDF& qpdf, QPDF::Members* m) :
648   - qpdf(qpdf),
649   - m(m),
650   - foreign_(qpdf),
651   - streams_(qpdf)
  633 + Foreign(QPDF& qpdf) :
  634 + qpdf(qpdf)
652 635 {
653 636 }
654 637  
655   - Foreign&
656   - foreign()
657   - {
658   - return foreign_;
659   - }
  638 + Foreign() = delete;
  639 + Foreign(Foreign const&) = delete;
  640 + Foreign(Foreign&&) = delete;
  641 + Foreign& operator=(Foreign const&) = delete;
  642 + Foreign& operator=(Foreign&&) = delete;
  643 + ~Foreign() = default;
660 644  
661   - Streams&
662   - streams()
  645 + // Return a local handle to the foreign object. Copy the foreign object if necessary.
  646 + QPDFObjectHandle
  647 + copied(QPDFObjectHandle const& foreign)
663 648 {
664   - return streams_;
  649 + return copier(foreign).copied(foreign);
665 650 }
666 651  
667   - void parse(char const* password);
668   - std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og);
669   - void inParse(bool);
670   - QPDFObjGen nextObjGen();
671   - QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr<QPDFObject> const&);
672   - void updateCache(
673   - QPDFObjGen og,
674   - std::shared_ptr<QPDFObject> const& object,
675   - qpdf_offset_t end_before_space,
676   - qpdf_offset_t end_after_space,
677   - bool destroy = true);
678   - bool resolveXRefTable();
679   - QPDFObjectHandle readObjectAtOffset(
680   - qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref);
681   - QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0);
682   - QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
683   - std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
684   - std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
685   - size_t tableSize();
686   -
687   - // For QPDFWriter:
688   -
689   - std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
690   - // Get a list of objects that would be permitted in an object stream.
691   - template <typename T>
692   - std::vector<T> getCompressibleObjGens();
693   - std::vector<QPDFObjGen> getCompressibleObjVector();
694   - std::vector<bool> getCompressibleObjSet();
695   -
696 652 private:
697   - void setTrailer(QPDFObjectHandle obj);
698   - void reconstruct_xref(QPDFExc& e, bool found_startxref = true);
699   - void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);
700   - bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
701   - bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
702   - bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
703   - qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
704   - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false);
705   - qpdf_offset_t processXRefStream(
706   - qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false);
707   - std::pair<int, std::array<int, 3>>
708   - processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
709   - int processXRefSize(
710   - QPDFObjectHandle& dict,
711   - int entry_size,
712   - std::function<QPDFExc(std::string_view)> damaged);
713   - std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
714   - QPDFObjectHandle& dict,
715   - int max_num_entries,
716   - std::function<QPDFExc(std::string_view)> damaged);
717   - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
718   - void insertFreeXrefEntry(QPDFObjGen);
719   - QPDFObjectHandle readTrailer();
720   - QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
721   - void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
722   - void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
723   - QPDFObjectHandle
724   - readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);
725   - size_t recoverStreamLength(
726   - std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
727   -
728   - QPDFObjGen read_object_start(qpdf_offset_t offset);
729   - void readObjectAtOffset(
730   - bool attempt_recovery,
731   - qpdf_offset_t offset,
732   - std::string const& description,
733   - QPDFObjGen exp_og);
734   - void resolveObjectsInStream(int obj_stream_number);
735   - bool isCached(QPDFObjGen og);
736   - bool isUnresolved(QPDFObjGen og);
737   - void setLastObjectDescription(std::string const& description, QPDFObjGen og);
  653 + Copier& copier(QPDFObjectHandle const& foreign);
738 654  
739 655 QPDF& qpdf;
740   - QPDF::Members* m;
741   -
742   - Foreign foreign_;
743   - Streams streams_;
744   - }; // class QPDF::Doc::Objects
  656 + std::map<unsigned long long, Copier> copiers;
  657 + }; // class QPDF::Doc::Objects::Foreign
745 658  
746   - // This class is used to represent a PDF Pages tree.
747   - class Pages
  659 + class Streams
748 660 {
  661 + // Copier manages the copying of streams into this PDF. It is used both for copying
  662 + // local and foreign streams.
  663 + class Copier;
  664 +
  665 + public:
  666 + Streams(QPDF& qpdf);
  667 +
  668 + Streams() = delete;
  669 + Streams(Streams const&) = delete;
  670 + Streams(Streams&&) = delete;
  671 + Streams& operator=(Streams const&) = delete;
  672 + Streams& operator=(Streams&&) = delete;
  673 + ~Streams() = default;
  674 +
749 675 public:
750   - Pages() = delete;
751   - Pages(Pages const&) = delete;
752   - Pages(Pages&&) = delete;
753   - Pages& operator=(Pages const&) = delete;
754   - Pages& operator=(Pages&&) = delete;
755   - ~Pages() = default;
756   -
757   - Pages(QPDF& qpdf, QPDF::Members* m) :
758   - qpdf(qpdf),
759   - m(m)
  676 + static bool
  677 + pipeStreamData(
  678 + QPDF* qpdf,
  679 + QPDFObjGen og,
  680 + qpdf_offset_t offset,
  681 + size_t length,
  682 + QPDFObjectHandle dict,
  683 + bool is_root_metadata,
  684 + Pipeline* pipeline,
  685 + bool suppress_warnings,
  686 + bool will_retry)
760 687 {
  688 + return qpdf->pipeStreamData(
  689 + og,
  690 + offset,
  691 + length,
  692 + dict,
  693 + is_root_metadata,
  694 + pipeline,
  695 + suppress_warnings,
  696 + will_retry);
761 697 }
762 698  
763   - void getAllPagesInternal(
764   - QPDFObjectHandle cur_pages,
765   - QPDFObjGen::set& visited,
766   - QPDFObjGen::set& seen,
767   - bool media_box,
768   - bool resources);
769   - void insertPage(QPDFObjectHandle newpage, int pos);
770   - void flattenPagesTree();
771   - void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
772   - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
773   - void pushInheritedAttributesToPageInternal(
774   - QPDFObjectHandle,
775   - std::map<std::string, std::vector<QPDFObjectHandle>>&,
776   - bool allow_changes,
777   - bool warn_skipped_keys);
  699 + QPDF&
  700 + qpdf() const
  701 + {
  702 + return qpdf_;
  703 + }
  704 +
  705 + std::shared_ptr<Copier>&
  706 + copier()
  707 + {
  708 + return copier_;
  709 + }
  710 +
  711 + bool immediate_copy_from() const;
778 712  
779 713 private:
780   - QPDF& qpdf;
781   - QPDF::Members* m;
782   - }; // class QPDF::Doc::Pages
  714 + QPDF& qpdf_;
783 715  
784   - Doc() = delete;
785   - Doc(Doc const&) = delete;
786   - Doc(Doc&&) = delete;
787   - Doc& operator=(Doc const&) = delete;
788   - Doc& operator=(Doc&&) = delete;
789   - ~Doc() = default;
  716 + std::shared_ptr<Copier> copier_;
  717 + }; // class QPDF::Doc::Objects::Streams
790 718  
791   - Doc(QPDF& qpdf, QPDF::Members& m) :
  719 + public:
  720 + Objects() = delete;
  721 + Objects(Objects const&) = delete;
  722 + Objects(Objects&&) = delete;
  723 + Objects& operator=(Objects const&) = delete;
  724 + Objects& operator=(Objects&&) = delete;
  725 + ~Objects() = default;
  726 +
  727 + Objects(QPDF& qpdf, QPDF::Members* m) :
792 728 qpdf(qpdf),
793 729 m(m),
794   - lin_(qpdf, &m),
795   - objects_(qpdf, &m),
796   - pages_(qpdf, &m)
  730 + foreign_(qpdf),
  731 + streams_(qpdf)
797 732 {
798 733 }
799 734  
800   - Linearization&
801   - linearization()
802   - {
803   - return lin_;
804   - };
805   -
806   - Objects&
807   - objects()
  735 + Foreign&
  736 + foreign()
808 737 {
809   - return objects_;
810   - };
  738 + return foreign_;
  739 + }
811 740  
812   - Pages&
813   - pages()
  741 + Streams&
  742 + streams()
814 743 {
815   - return pages_;
  744 + return streams_;
816 745 }
817 746  
818   - bool reconstructed_xref() const;
  747 + void parse(char const* password);
  748 + std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og);
  749 + void inParse(bool);
  750 + QPDFObjGen nextObjGen();
  751 + QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr<QPDFObject> const&);
  752 + void updateCache(
  753 + QPDFObjGen og,
  754 + std::shared_ptr<QPDFObject> const& object,
  755 + qpdf_offset_t end_before_space,
  756 + qpdf_offset_t end_after_space,
  757 + bool destroy = true);
  758 + bool resolveXRefTable();
  759 + QPDFObjectHandle readObjectAtOffset(
  760 + qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref);
  761 + QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0);
  762 + QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
  763 + std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
  764 + std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
  765 + size_t tableSize();
  766 +
  767 + // For QPDFWriter:
  768 +
  769 + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
  770 + // Get a list of objects that would be permitted in an object stream.
  771 + template <typename T>
  772 + std::vector<T> getCompressibleObjGens();
  773 + std::vector<QPDFObjGen> getCompressibleObjVector();
  774 + std::vector<bool> getCompressibleObjSet();
819 775  
820   - QPDFAcroFormDocumentHelper&
821   - acroform()
822   - {
823   - if (!acroform_) {
824   - acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);
825   - }
826   - return *acroform_;
827   - }
  776 + private:
  777 + void setTrailer(QPDFObjectHandle obj);
  778 + void reconstruct_xref(QPDFExc& e, bool found_startxref = true);
  779 + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);
  780 + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
  781 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  782 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  783 + qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
  784 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false);
  785 + qpdf_offset_t processXRefStream(
  786 + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false);
  787 + std::pair<int, std::array<int, 3>>
  788 + processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
  789 + int processXRefSize(
  790 + QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
  791 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
  792 + QPDFObjectHandle& dict,
  793 + int max_num_entries,
  794 + std::function<QPDFExc(std::string_view)> damaged);
  795 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  796 + void insertFreeXrefEntry(QPDFObjGen);
  797 + QPDFObjectHandle readTrailer();
  798 + QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
  799 + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  800 + void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  801 + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);
  802 + size_t recoverStreamLength(
  803 + std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
  804 +
  805 + QPDFObjGen read_object_start(qpdf_offset_t offset);
  806 + void readObjectAtOffset(
  807 + bool attempt_recovery,
  808 + qpdf_offset_t offset,
  809 + std::string const& description,
  810 + QPDFObjGen exp_og);
  811 + void resolveObjectsInStream(int obj_stream_number);
  812 + bool isCached(QPDFObjGen og);
  813 + bool isUnresolved(QPDFObjGen og);
  814 + void setLastObjectDescription(std::string const& description, QPDFObjGen og);
828 815  
829   - QPDFEmbeddedFileDocumentHelper&
830   - embedded_files()
831   - {
832   - if (!embedded_files_) {
833   - embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);
834   - }
835   - return *embedded_files_;
836   - }
  816 + QPDF& qpdf;
  817 + QPDF::Members* m;
837 818  
838   - QPDFOutlineDocumentHelper&
839   - outlines()
840   - {
841   - if (!outlines_) {
842   - outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);
843   - }
844   - return *outlines_;
845   - }
  819 + Foreign foreign_;
  820 + Streams streams_;
  821 +}; // class QPDF::Doc::Objects
846 822  
847   - QPDFPageDocumentHelper&
848   - page_dh()
  823 +// This class is used to represent a PDF Pages tree.
  824 +class QPDF::Doc::Pages
  825 +{
  826 + public:
  827 + Pages() = delete;
  828 + Pages(Pages const&) = delete;
  829 + Pages(Pages&&) = delete;
  830 + Pages& operator=(Pages const&) = delete;
  831 + Pages& operator=(Pages&&) = delete;
  832 + ~Pages() = default;
  833 +
  834 + Pages(QPDF& qpdf, QPDF::Members* m) :
  835 + qpdf(qpdf),
  836 + m(m)
849 837 {
850   - if (!page_dh_) {
851   - page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);
852   - }
853   - return *page_dh_;
854 838 }
855 839  
856   - QPDFPageLabelDocumentHelper&
857   - page_labels()
858   - {
859   - if (!page_labels_) {
860   - page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);
861   - }
862   - return *page_labels_;
863   - }
  840 + void getAllPagesInternal(
  841 + QPDFObjectHandle cur_pages,
  842 + QPDFObjGen::set& visited,
  843 + QPDFObjGen::set& seen,
  844 + bool media_box,
  845 + bool resources);
  846 + void insertPage(QPDFObjectHandle newpage, int pos);
  847 + void flattenPagesTree();
  848 + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
  849 + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
  850 + void pushInheritedAttributesToPageInternal(
  851 + QPDFObjectHandle,
  852 + std::map<std::string, std::vector<QPDFObjectHandle>>&,
  853 + bool allow_changes,
  854 + bool warn_skipped_keys);
864 855  
865 856 private:
866 857 QPDF& qpdf;
867   - QPDF::Members& m;
868   -
869   - Linearization lin_;
870   - Objects objects_;
871   - Pages pages_;
872   -
873   - // Document Helpers;
874   - std::unique_ptr<QPDFAcroFormDocumentHelper> acroform_;
875   - std::unique_ptr<QPDFEmbeddedFileDocumentHelper> embedded_files_;
876   - std::unique_ptr<QPDFOutlineDocumentHelper> outlines_;
877   - std::unique_ptr<QPDFPageDocumentHelper> page_dh_;
878   - std::unique_ptr<QPDFPageLabelDocumentHelper> page_labels_;
879   -};
  858 + QPDF::Members* m;
  859 +}; // class QPDF::Doc::Pages
880 860  
881   -class QPDF::Members
  861 +class QPDF::Members: QPDF::Doc
882 862 {
883 863 friend class QPDF;
884 864 friend class ResolveRecorder;
... ... @@ -889,10 +869,9 @@ class QPDF::Members
889 869 ~Members() = default;
890 870  
891 871 private:
892   - Doc doc;
893   - Doc::Linearization& lin;
894   - Doc::Objects& objects;
895   - Doc::Pages& pages;
  872 + Doc::Linearization lin;
  873 + Doc::Objects objects;
  874 + Doc::Pages pages;
896 875 std::shared_ptr<QPDFLogger> log;
897 876 unsigned long long unique_id{0};
898 877 qpdf::Tokenizer tokenizer;
... ... @@ -978,16 +957,34 @@ class QPDF::Doc::Resolver
978 957 }
979 958 };
980 959  
  960 +inline QPDF::Doc::Linearization&
  961 +QPDF::Doc::linearization()
  962 +{
  963 + return m->lin;
  964 +};
  965 +
  966 +inline QPDF::Doc::Objects&
  967 +QPDF::Doc::objects()
  968 +{
  969 + return m->objects;
  970 +};
  971 +
  972 +inline QPDF::Doc::Pages&
  973 +QPDF::Doc::pages()
  974 +{
  975 + return m->pages;
  976 +}
  977 +
981 978 inline bool
982 979 QPDF::Doc::reconstructed_xref() const
983 980 {
984   - return m.reconstructed_xref;
  981 + return m->reconstructed_xref;
985 982 }
986 983  
987 984 inline QPDF::Doc&
988 985 QPDF::doc()
989 986 {
990   - return m->doc;
  987 + return *m;
991 988 }
992 989  
993 990 // Throw a generic exception for unusual error conditions that do not be covered during CI testing.
... ...