Commit 0c591c103960ebfbd77eebd0567e5529d2ded70a

Authored by m-holger
1 parent 0c8af4a3

Refactor `Doc` class: reorganize structure, move implementations, add inline fun…

…ctions and improve encapsulation.
libqpdf/QPDF.cc
@@ -126,10 +126,10 @@ QPDF::QPDFVersion() @@ -126,10 +126,10 @@ QPDF::QPDFVersion()
126 } 126 }
127 127
128 QPDF::Members::Members(QPDF& qpdf) : 128 QPDF::Members::Members(QPDF& qpdf) :
129 - doc(qpdf, *this),  
130 - lin(doc.linearization()),  
131 - objects(doc.objects()),  
132 - pages(doc.pages()), 129 + Doc(qpdf, this),
  130 + lin(qpdf, this),
  131 + objects(qpdf, this),
  132 + pages(qpdf, this),
133 log(QPDFLogger::defaultLogger()), 133 log(QPDFLogger::defaultLogger()),
134 file(std::make_shared<InvalidInputSource>()), 134 file(std::make_shared<InvalidInputSource>()),
135 encp(std::make_shared<EncryptionParameters>()) 135 encp(std::make_shared<EncryptionParameters>())
libqpdf/qpdf/QPDF_private.hh
@@ -295,590 +295,570 @@ class QPDF::PatternFinder final: public InputSource::Finder @@ -295,590 +295,570 @@ class QPDF::PatternFinder final: public InputSource::Finder
295 class QPDF::Doc 295 class QPDF::Doc
296 { 296 {
297 public: 297 public:
  298 + class Encryption;
298 class JobSetter; 299 class JobSetter;
  300 + class Linearization;
  301 + class Objects;
  302 + class Pages;
299 class ParseGuard; 303 class ParseGuard;
300 class Resolver; 304 class Resolver;
301 class Writer; 305 class Writer;
302 306
303 - class Encryption 307 + Doc() = delete;
  308 + Doc(Doc const&) = delete;
  309 + Doc(Doc&&) = delete;
  310 + Doc& operator=(Doc const&) = delete;
  311 + Doc& operator=(Doc&&) = delete;
  312 + ~Doc() = default;
  313 +
  314 + Doc(QPDF& qpdf, QPDF::Members* m) :
  315 + qpdf(qpdf),
  316 + m(m)
304 { 317 {
305 - public:  
306 - // This class holds data read from the encryption dictionary.  
307 - Encryption(  
308 - int V,  
309 - int R,  
310 - int Length_bytes,  
311 - int P,  
312 - std::string const& O,  
313 - std::string const& U,  
314 - std::string const& OE,  
315 - std::string const& UE,  
316 - std::string const& Perms,  
317 - std::string const& id1,  
318 - bool encrypt_metadata) :  
319 - V(V),  
320 - R(R),  
321 - Length_bytes(Length_bytes),  
322 - P(static_cast<unsigned long long>(P)),  
323 - O(O),  
324 - U(U),  
325 - OE(OE),  
326 - UE(UE),  
327 - Perms(Perms),  
328 - id1(id1),  
329 - encrypt_metadata(encrypt_metadata)  
330 - { 318 + }
  319 +
  320 + inline Linearization& linearization();
  321 +
  322 + inline Objects& objects();
  323 +
  324 + inline Pages& pages();
  325 +
  326 + bool reconstructed_xref() const;
  327 +
  328 + QPDFAcroFormDocumentHelper&
  329 + acroform()
  330 + {
  331 + if (!acroform_) {
  332 + acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);
331 } 333 }
332 - Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) :  
333 - V(V),  
334 - R(R),  
335 - Length_bytes(Length_bytes),  
336 - encrypt_metadata(encrypt_metadata)  
337 - { 334 + return *acroform_;
  335 + }
  336 +
  337 + QPDFEmbeddedFileDocumentHelper&
  338 + embedded_files()
  339 + {
  340 + if (!embedded_files_) {
  341 + embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);
338 } 342 }
  343 + return *embedded_files_;
  344 + }
339 345
340 - int getV() const;  
341 - int getR() const;  
342 - int getLengthBytes() const;  
343 - int getP() const;  
344 - // Bits in P are numbered from 1 as in the PDF spec.  
345 - bool getP(size_t bit) const;  
346 - std::string const& getO() const;  
347 - std::string const& getU() const;  
348 - std::string const& getOE() const;  
349 - std::string const& getUE() const;  
350 - std::string const& getPerms() const;  
351 - std::string const& getId1() const;  
352 - bool getEncryptMetadata() const;  
353 - // Bits in P are numbered from 1 as in the PDF spec.  
354 - void setP(size_t bit, bool val);  
355 - void setP(unsigned long val);  
356 - void setO(std::string const&);  
357 - void setU(std::string const&);  
358 - void setId1(std::string const& val);  
359 - void setV5EncryptionParameters(  
360 - std::string const& O,  
361 - std::string const& OE,  
362 - std::string const& U,  
363 - std::string const& UE,  
364 - std::string const& Perms);  
365 -  
366 - std::string compute_encryption_key(std::string const& password) const;  
367 -  
368 - bool  
369 - check_owner_password(std::string& user_password, std::string const& owner_password) const;  
370 -  
371 - bool check_user_password(std::string const& user_password) const;  
372 -  
373 - std::string  
374 - recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const;  
375 -  
376 - void compute_encryption_O_U(char const* user_password, char const* owner_password);  
377 -  
378 - std::string  
379 - compute_encryption_parameters_V5(char const* user_password, char const* owner_password);  
380 -  
381 - std::string compute_parameters(char const* user_password, char const* owner_password); 346 + QPDFOutlineDocumentHelper&
  347 + outlines()
  348 + {
  349 + if (!outlines_) {
  350 + outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);
  351 + }
  352 + return *outlines_;
  353 + }
382 354
383 - private:  
384 - static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest)  
385 -  
386 - Encryption(Encryption const&) = delete;  
387 - Encryption& operator=(Encryption const&) = delete;  
388 -  
389 - std::string hash_V5(  
390 - std::string const& password, std::string const& salt, std::string const& udata) const;  
391 - std::string  
392 - compute_O_value(std::string const& user_password, std::string const& owner_password) const;  
393 - std::string compute_U_value(std::string const& user_password) const;  
394 - std::string compute_encryption_key_from_password(std::string const& password) const;  
395 - std::string recover_encryption_key_with_password(std::string const& password) const;  
396 - bool check_owner_password_V4(  
397 - std::string& user_password, std::string const& owner_password) const;  
398 - bool check_owner_password_V5(std::string const& owner_passworda) const;  
399 - std::string compute_Perms_value_V5_clear() const;  
400 - std::string compute_O_rc4_key(  
401 - std::string const& user_password, std::string const& owner_password) const;  
402 - std::string compute_U_value_R2(std::string const& user_password) const;  
403 - std::string compute_U_value_R3(std::string const& user_password) const;  
404 - bool check_user_password_V4(std::string const& user_password) const;  
405 - bool check_user_password_V5(std::string const& user_password) const;  
406 -  
407 - int V;  
408 - int R;  
409 - int Length_bytes;  
410 - std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared.  
411 - std::string O;  
412 - std::string U;  
413 - std::string OE;  
414 - std::string UE;  
415 - std::string Perms;  
416 - std::string id1;  
417 - bool encrypt_metadata;  
418 - }; // class QPDF::Doc::Encryption  
419 -  
420 - class Linearization 355 + QPDFPageDocumentHelper&
  356 + page_dh()
421 { 357 {
422 - public:  
423 - Linearization() = delete;  
424 - Linearization(Linearization const&) = delete;  
425 - Linearization(Linearization&&) = delete;  
426 - Linearization& operator=(Linearization const&) = delete;  
427 - Linearization& operator=(Linearization&&) = delete;  
428 - ~Linearization() = default;  
429 -  
430 - Linearization(QPDF& qpdf, QPDF::Members* m) :  
431 - qpdf(qpdf),  
432 - m(m)  
433 - { 358 + if (!page_dh_) {
  359 + page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);
434 } 360 }
  361 + return *page_dh_;
  362 + }
435 363
436 - // For QPDFWriter:  
437 -  
438 - template <typename T>  
439 - void optimize_internal(  
440 - T const& object_stream_data,  
441 - bool allow_changes = true,  
442 - std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);  
443 - void optimize(  
444 - QPDFWriter::ObjTable const& obj,  
445 - std::function<int(QPDFObjectHandle&)> skip_stream_parameters);  
446 -  
447 - // Get lists of all objects in order according to the part of a linearized file that they  
448 - // belong to.  
449 - void getLinearizedParts(  
450 - QPDFWriter::ObjTable const& obj,  
451 - std::vector<QPDFObjectHandle>& part4,  
452 - std::vector<QPDFObjectHandle>& part6,  
453 - std::vector<QPDFObjectHandle>& part7,  
454 - std::vector<QPDFObjectHandle>& part8,  
455 - std::vector<QPDFObjectHandle>& part9);  
456 -  
457 - void generateHintStream(  
458 - QPDFWriter::NewObjTable const& new_obj,  
459 - QPDFWriter::ObjTable const& obj,  
460 - std::string& hint_stream,  
461 - int& S,  
462 - int& O,  
463 - bool compressed);  
464 -  
465 - // methods to support linearization checking -- implemented in QPDF_linearization.cc  
466 -  
467 - void readLinearizationData();  
468 - void checkLinearizationInternal();  
469 - void dumpLinearizationDataInternal();  
470 - void linearizationWarning(std::string_view);  
471 - qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length);  
472 - void readHPageOffset(BitStream);  
473 - void readHSharedObject(BitStream);  
474 - void readHGeneric(BitStream, HGeneric&);  
475 - qpdf_offset_t maxEnd(ObjUser const& ou);  
476 - qpdf_offset_t getLinearizationOffset(QPDFObjGen);  
477 - QPDFObjectHandle  
478 - getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);  
479 - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);  
480 - int lengthNextN(int first_object, int n);  
481 - void checkHPageOffset(  
482 - std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);  
483 - void checkHSharedObject(  
484 - std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);  
485 - void checkHOutlines();  
486 - void dumpHPageOffset();  
487 - void dumpHSharedObject();  
488 - void dumpHGeneric(HGeneric&);  
489 - qpdf_offset_t adjusted_offset(qpdf_offset_t offset);  
490 - template <typename T>  
491 - void calculateLinearizationData(T const& object_stream_data);  
492 - template <typename T>  
493 - void pushOutlinesToPart(  
494 - std::vector<QPDFObjectHandle>& part,  
495 - std::set<QPDFObjGen>& lc_outlines,  
496 - T const& object_stream_data);  
497 - int outputLengthNextN(  
498 - int in_object,  
499 - int n,  
500 - QPDFWriter::NewObjTable const& new_obj,  
501 - QPDFWriter::ObjTable const& obj);  
502 - void calculateHPageOffset(  
503 - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);  
504 - void calculateHSharedObject(  
505 - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);  
506 - void  
507 - calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);  
508 - void writeHPageOffset(BitWriter&);  
509 - void writeHSharedObject(BitWriter&);  
510 - void writeHGeneric(BitWriter&, HGeneric&);  
511 -  
512 - // Methods to support optimization  
513 -  
514 - void updateObjectMaps(  
515 - ObjUser const& ou,  
516 - QPDFObjectHandle oh,  
517 - std::function<int(QPDFObjectHandle&)> skip_stream_parameters);  
518 - void filterCompressedObjects(std::map<int, int> const& object_stream_data);  
519 - void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); 364 + QPDFPageLabelDocumentHelper&
  365 + page_labels()
  366 + {
  367 + if (!page_labels_) {
  368 + page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);
  369 + }
  370 + return *page_labels_;
  371 + }
520 372
521 - private:  
522 - QPDF& qpdf;  
523 - QPDF::Members* m;  
524 - }; 373 + private:
  374 + QPDF& qpdf;
  375 + QPDF::Members* m;
525 376
526 - class Objects 377 + // Document Helpers;
  378 + std::unique_ptr<QPDFAcroFormDocumentHelper> acroform_;
  379 + std::unique_ptr<QPDFEmbeddedFileDocumentHelper> embedded_files_;
  380 + std::unique_ptr<QPDFOutlineDocumentHelper> outlines_;
  381 + std::unique_ptr<QPDFPageDocumentHelper> page_dh_;
  382 + std::unique_ptr<QPDFPageLabelDocumentHelper> page_labels_;
  383 +};
  384 +
  385 +class QPDF::Doc::Encryption
  386 +{
  387 + public:
  388 + // This class holds data read from the encryption dictionary.
  389 + Encryption(
  390 + int V,
  391 + int R,
  392 + int Length_bytes,
  393 + int P,
  394 + std::string const& O,
  395 + std::string const& U,
  396 + std::string const& OE,
  397 + std::string const& UE,
  398 + std::string const& Perms,
  399 + std::string const& id1,
  400 + bool encrypt_metadata) :
  401 + V(V),
  402 + R(R),
  403 + Length_bytes(Length_bytes),
  404 + P(static_cast<unsigned long long>(P)),
  405 + O(O),
  406 + U(U),
  407 + OE(OE),
  408 + UE(UE),
  409 + Perms(Perms),
  410 + id1(id1),
  411 + encrypt_metadata(encrypt_metadata)
527 { 412 {
528 - public:  
529 - class Foreign  
530 - {  
531 - class Copier  
532 - {  
533 - public:  
534 - Copier(QPDF& qpdf) :  
535 - qpdf(qpdf)  
536 - {  
537 - } 413 + }
  414 + Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) :
  415 + V(V),
  416 + R(R),
  417 + Length_bytes(Length_bytes),
  418 + encrypt_metadata(encrypt_metadata)
  419 + {
  420 + }
538 421
539 - QPDFObjectHandle copied(QPDFObjectHandle const& foreign); 422 + int getV() const;
  423 + int getR() const;
  424 + int getLengthBytes() const;
  425 + int getP() const;
  426 + // Bits in P are numbered from 1 as in the PDF spec.
  427 + bool getP(size_t bit) const;
  428 + std::string const& getO() const;
  429 + std::string const& getU() const;
  430 + std::string const& getOE() const;
  431 + std::string const& getUE() const;
  432 + std::string const& getPerms() const;
  433 + std::string const& getId1() const;
  434 + bool getEncryptMetadata() const;
  435 + // Bits in P are numbered from 1 as in the PDF spec.
  436 + void setP(size_t bit, bool val);
  437 + void setP(unsigned long val);
  438 + void setO(std::string const&);
  439 + void setU(std::string const&);
  440 + void setId1(std::string const& val);
  441 + void setV5EncryptionParameters(
  442 + std::string const& O,
  443 + std::string const& OE,
  444 + std::string const& U,
  445 + std::string const& UE,
  446 + std::string const& Perms);
540 447
541 - private:  
542 - QPDFObjectHandle  
543 - replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false);  
544 - void reserve_objects(QPDFObjectHandle const& foreign, bool top = false); 448 + std::string compute_encryption_key(std::string const& password) const;
545 449
546 - QPDF& qpdf;  
547 - std::map<QPDFObjGen, QPDFObjectHandle> object_map;  
548 - std::vector<QPDFObjectHandle> to_copy;  
549 - QPDFObjGen::set visiting;  
550 - }; 450 + bool check_owner_password(std::string& user_password, std::string const& owner_password) const;
551 451
552 - public:  
553 - Foreign(QPDF& qpdf) :  
554 - qpdf(qpdf)  
555 - {  
556 - } 452 + bool check_user_password(std::string const& user_password) const;
557 453
558 - Foreign() = delete;  
559 - Foreign(Foreign const&) = delete;  
560 - Foreign(Foreign&&) = delete;  
561 - Foreign& operator=(Foreign const&) = delete;  
562 - Foreign& operator=(Foreign&&) = delete;  
563 - ~Foreign() = default; 454 + std::string
  455 + recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const;
564 456
565 - // Return a local handle to the foreign object. Copy the foreign object if necessary.  
566 - QPDFObjectHandle  
567 - copied(QPDFObjectHandle const& foreign)  
568 - {  
569 - return copier(foreign).copied(foreign);  
570 - } 457 + void compute_encryption_O_U(char const* user_password, char const* owner_password);
571 458
572 - private:  
573 - Copier& copier(QPDFObjectHandle const& foreign); 459 + std::string
  460 + compute_encryption_parameters_V5(char const* user_password, char const* owner_password);
574 461
575 - QPDF& qpdf;  
576 - std::map<unsigned long long, Copier> copiers;  
577 - }; // class QPDF::Doc::Objects::Foreign 462 + std::string compute_parameters(char const* user_password, char const* owner_password);
578 463
579 - class Streams  
580 - {  
581 - // Copier manages the copying of streams into this PDF. It is used both for copying  
582 - // local and foreign streams.  
583 - class Copier; 464 + private:
  465 + static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest)
  466 +
  467 + Encryption(Encryption const&) = delete;
  468 + Encryption& operator=(Encryption const&) = delete;
  469 +
  470 + std::string
  471 + hash_V5(std::string const& password, std::string const& salt, std::string const& udata) const;
  472 + std::string
  473 + compute_O_value(std::string const& user_password, std::string const& owner_password) const;
  474 + std::string compute_U_value(std::string const& user_password) const;
  475 + std::string compute_encryption_key_from_password(std::string const& password) const;
  476 + std::string recover_encryption_key_with_password(std::string const& password) const;
  477 + bool
  478 + check_owner_password_V4(std::string& user_password, std::string const& owner_password) const;
  479 + bool check_owner_password_V5(std::string const& owner_passworda) const;
  480 + std::string compute_Perms_value_V5_clear() const;
  481 + std::string
  482 + compute_O_rc4_key(std::string const& user_password, std::string const& owner_password) const;
  483 + std::string compute_U_value_R2(std::string const& user_password) const;
  484 + std::string compute_U_value_R3(std::string const& user_password) const;
  485 + bool check_user_password_V4(std::string const& user_password) const;
  486 + bool check_user_password_V5(std::string const& user_password) const;
  487 +
  488 + int V;
  489 + int R;
  490 + int Length_bytes;
  491 + std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared.
  492 + std::string O;
  493 + std::string U;
  494 + std::string OE;
  495 + std::string UE;
  496 + std::string Perms;
  497 + std::string id1;
  498 + bool encrypt_metadata;
  499 +}; // class QPDF::Doc::Encryption
  500 +
  501 +class QPDF::Doc::Linearization
  502 +{
  503 + public:
  504 + Linearization() = delete;
  505 + Linearization(Linearization const&) = delete;
  506 + Linearization(Linearization&&) = delete;
  507 + Linearization& operator=(Linearization const&) = delete;
  508 + Linearization& operator=(Linearization&&) = delete;
  509 + ~Linearization() = default;
  510 +
  511 + Linearization(QPDF& qpdf, QPDF::Members* m) :
  512 + qpdf(qpdf),
  513 + m(m)
  514 + {
  515 + }
584 516
585 - public:  
586 - Streams(QPDF& qpdf); 517 + // For QPDFWriter:
  518 +
  519 + template <typename T>
  520 + void optimize_internal(
  521 + T const& object_stream_data,
  522 + bool allow_changes = true,
  523 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
  524 + void optimize(
  525 + QPDFWriter::ObjTable const& obj,
  526 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  527 +
  528 + // Get lists of all objects in order according to the part of a linearized file that they
  529 + // belong to.
  530 + void getLinearizedParts(
  531 + QPDFWriter::ObjTable const& obj,
  532 + std::vector<QPDFObjectHandle>& part4,
  533 + std::vector<QPDFObjectHandle>& part6,
  534 + std::vector<QPDFObjectHandle>& part7,
  535 + std::vector<QPDFObjectHandle>& part8,
  536 + std::vector<QPDFObjectHandle>& part9);
  537 +
  538 + void generateHintStream(
  539 + QPDFWriter::NewObjTable const& new_obj,
  540 + QPDFWriter::ObjTable const& obj,
  541 + std::string& hint_stream,
  542 + int& S,
  543 + int& O,
  544 + bool compressed);
  545 +
  546 + // methods to support linearization checking -- implemented in QPDF_linearization.cc
  547 +
  548 + void readLinearizationData();
  549 + void checkLinearizationInternal();
  550 + void dumpLinearizationDataInternal();
  551 + void linearizationWarning(std::string_view);
  552 + qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length);
  553 + void readHPageOffset(BitStream);
  554 + void readHSharedObject(BitStream);
  555 + void readHGeneric(BitStream, HGeneric&);
  556 + qpdf_offset_t maxEnd(ObjUser const& ou);
  557 + qpdf_offset_t getLinearizationOffset(QPDFObjGen);
  558 + QPDFObjectHandle
  559 + getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
  560 + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
  561 + int lengthNextN(int first_object, int n);
  562 + void
  563 + checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
  564 + void
  565 + checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
  566 + void checkHOutlines();
  567 + void dumpHPageOffset();
  568 + void dumpHSharedObject();
  569 + void dumpHGeneric(HGeneric&);
  570 + qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
  571 + template <typename T>
  572 + void calculateLinearizationData(T const& object_stream_data);
  573 + template <typename T>
  574 + void pushOutlinesToPart(
  575 + std::vector<QPDFObjectHandle>& part,
  576 + std::set<QPDFObjGen>& lc_outlines,
  577 + T const& object_stream_data);
  578 + int outputLengthNextN(
  579 + int in_object,
  580 + int n,
  581 + QPDFWriter::NewObjTable const& new_obj,
  582 + QPDFWriter::ObjTable const& obj);
  583 + void
  584 + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  585 + void
  586 + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  587 + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  588 + void writeHPageOffset(BitWriter&);
  589 + void writeHSharedObject(BitWriter&);
  590 + void writeHGeneric(BitWriter&, HGeneric&);
587 591
588 - Streams() = delete;  
589 - Streams(Streams const&) = delete;  
590 - Streams(Streams&&) = delete;  
591 - Streams& operator=(Streams const&) = delete;  
592 - Streams& operator=(Streams&&) = delete;  
593 - ~Streams() = default; 592 + // Methods to support optimization
594 593
595 - public:  
596 - static bool  
597 - pipeStreamData(  
598 - QPDF* qpdf,  
599 - QPDFObjGen og,  
600 - qpdf_offset_t offset,  
601 - size_t length,  
602 - QPDFObjectHandle dict,  
603 - bool is_root_metadata,  
604 - Pipeline* pipeline,  
605 - bool suppress_warnings,  
606 - bool will_retry)  
607 - {  
608 - return qpdf->pipeStreamData(  
609 - og,  
610 - offset,  
611 - length,  
612 - dict,  
613 - is_root_metadata,  
614 - pipeline,  
615 - suppress_warnings,  
616 - will_retry);  
617 - } 594 + void updateObjectMaps(
  595 + ObjUser const& ou,
  596 + QPDFObjectHandle oh,
  597 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  598 + void filterCompressedObjects(std::map<int, int> const& object_stream_data);
  599 + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
618 600
619 - QPDF&  
620 - qpdf() const  
621 - {  
622 - return qpdf_;  
623 - } 601 + private:
  602 + QPDF& qpdf;
  603 + QPDF::Members* m;
  604 +};
624 605
625 - std::shared_ptr<Copier>&  
626 - copier() 606 +class QPDF::Doc::Objects
  607 +{
  608 + public:
  609 + class Foreign
  610 + {
  611 + class Copier
  612 + {
  613 + public:
  614 + Copier(QPDF& qpdf) :
  615 + qpdf(qpdf)
627 { 616 {
628 - return copier_;  
629 } 617 }
630 618
631 - bool immediate_copy_from() const; 619 + QPDFObjectHandle copied(QPDFObjectHandle const& foreign);
632 620
633 private: 621 private:
634 - QPDF& qpdf_; 622 + QPDFObjectHandle
  623 + replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false);
  624 + void reserve_objects(QPDFObjectHandle const& foreign, bool top = false);
635 625
636 - std::shared_ptr<Copier> copier_;  
637 - }; // class QPDF::Doc::Objects::Streams 626 + QPDF& qpdf;
  627 + std::map<QPDFObjGen, QPDFObjectHandle> object_map;
  628 + std::vector<QPDFObjectHandle> to_copy;
  629 + QPDFObjGen::set visiting;
  630 + };
638 631
639 public: 632 public:
640 - Objects() = delete;  
641 - Objects(Objects const&) = delete;  
642 - Objects(Objects&&) = delete;  
643 - Objects& operator=(Objects const&) = delete;  
644 - Objects& operator=(Objects&&) = delete;  
645 - ~Objects() = default;  
646 -  
647 - Objects(QPDF& qpdf, QPDF::Members* m) :  
648 - qpdf(qpdf),  
649 - m(m),  
650 - foreign_(qpdf),  
651 - streams_(qpdf) 633 + Foreign(QPDF& qpdf) :
  634 + qpdf(qpdf)
652 { 635 {
653 } 636 }
654 637
655 - Foreign&  
656 - foreign()  
657 - {  
658 - return foreign_;  
659 - } 638 + Foreign() = delete;
  639 + Foreign(Foreign const&) = delete;
  640 + Foreign(Foreign&&) = delete;
  641 + Foreign& operator=(Foreign const&) = delete;
  642 + Foreign& operator=(Foreign&&) = delete;
  643 + ~Foreign() = default;
660 644
661 - Streams&  
662 - streams() 645 + // Return a local handle to the foreign object. Copy the foreign object if necessary.
  646 + QPDFObjectHandle
  647 + copied(QPDFObjectHandle const& foreign)
663 { 648 {
664 - return streams_; 649 + return copier(foreign).copied(foreign);
665 } 650 }
666 651
667 - void parse(char const* password);  
668 - std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og);  
669 - void inParse(bool);  
670 - QPDFObjGen nextObjGen();  
671 - QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr<QPDFObject> const&);  
672 - void updateCache(  
673 - QPDFObjGen og,  
674 - std::shared_ptr<QPDFObject> const& object,  
675 - qpdf_offset_t end_before_space,  
676 - qpdf_offset_t end_after_space,  
677 - bool destroy = true);  
678 - bool resolveXRefTable();  
679 - QPDFObjectHandle readObjectAtOffset(  
680 - qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref);  
681 - QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0);  
682 - QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);  
683 - std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);  
684 - std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);  
685 - size_t tableSize();  
686 -  
687 - // For QPDFWriter:  
688 -  
689 - std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();  
690 - // Get a list of objects that would be permitted in an object stream.  
691 - template <typename T>  
692 - std::vector<T> getCompressibleObjGens();  
693 - std::vector<QPDFObjGen> getCompressibleObjVector();  
694 - std::vector<bool> getCompressibleObjSet();  
695 -  
696 private: 652 private:
697 - void setTrailer(QPDFObjectHandle obj);  
698 - void reconstruct_xref(QPDFExc& e, bool found_startxref = true);  
699 - void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);  
700 - bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);  
701 - bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);  
702 - bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);  
703 - qpdf_offset_t read_xrefTable(qpdf_offset_t offset);  
704 - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false);  
705 - qpdf_offset_t processXRefStream(  
706 - qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false);  
707 - std::pair<int, std::array<int, 3>>  
708 - processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);  
709 - int processXRefSize(  
710 - QPDFObjectHandle& dict,  
711 - int entry_size,  
712 - std::function<QPDFExc(std::string_view)> damaged);  
713 - std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(  
714 - QPDFObjectHandle& dict,  
715 - int max_num_entries,  
716 - std::function<QPDFExc(std::string_view)> damaged);  
717 - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);  
718 - void insertFreeXrefEntry(QPDFObjGen);  
719 - QPDFObjectHandle readTrailer();  
720 - QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);  
721 - void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);  
722 - void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);  
723 - QPDFObjectHandle  
724 - readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);  
725 - size_t recoverStreamLength(  
726 - std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);  
727 -  
728 - QPDFObjGen read_object_start(qpdf_offset_t offset);  
729 - void readObjectAtOffset(  
730 - bool attempt_recovery,  
731 - qpdf_offset_t offset,  
732 - std::string const& description,  
733 - QPDFObjGen exp_og);  
734 - void resolveObjectsInStream(int obj_stream_number);  
735 - bool isCached(QPDFObjGen og);  
736 - bool isUnresolved(QPDFObjGen og);  
737 - void setLastObjectDescription(std::string const& description, QPDFObjGen og); 653 + Copier& copier(QPDFObjectHandle const& foreign);
738 654
739 QPDF& qpdf; 655 QPDF& qpdf;
740 - QPDF::Members* m;  
741 -  
742 - Foreign foreign_;  
743 - Streams streams_;  
744 - }; // class QPDF::Doc::Objects 656 + std::map<unsigned long long, Copier> copiers;
  657 + }; // class QPDF::Doc::Objects::Foreign
745 658
746 - // This class is used to represent a PDF Pages tree.  
747 - class Pages 659 + class Streams
748 { 660 {
  661 + // Copier manages the copying of streams into this PDF. It is used both for copying
  662 + // local and foreign streams.
  663 + class Copier;
  664 +
  665 + public:
  666 + Streams(QPDF& qpdf);
  667 +
  668 + Streams() = delete;
  669 + Streams(Streams const&) = delete;
  670 + Streams(Streams&&) = delete;
  671 + Streams& operator=(Streams const&) = delete;
  672 + Streams& operator=(Streams&&) = delete;
  673 + ~Streams() = default;
  674 +
749 public: 675 public:
750 - Pages() = delete;  
751 - Pages(Pages const&) = delete;  
752 - Pages(Pages&&) = delete;  
753 - Pages& operator=(Pages const&) = delete;  
754 - Pages& operator=(Pages&&) = delete;  
755 - ~Pages() = default;  
756 -  
757 - Pages(QPDF& qpdf, QPDF::Members* m) :  
758 - qpdf(qpdf),  
759 - m(m) 676 + static bool
  677 + pipeStreamData(
  678 + QPDF* qpdf,
  679 + QPDFObjGen og,
  680 + qpdf_offset_t offset,
  681 + size_t length,
  682 + QPDFObjectHandle dict,
  683 + bool is_root_metadata,
  684 + Pipeline* pipeline,
  685 + bool suppress_warnings,
  686 + bool will_retry)
760 { 687 {
  688 + return qpdf->pipeStreamData(
  689 + og,
  690 + offset,
  691 + length,
  692 + dict,
  693 + is_root_metadata,
  694 + pipeline,
  695 + suppress_warnings,
  696 + will_retry);
761 } 697 }
762 698
763 - void getAllPagesInternal(  
764 - QPDFObjectHandle cur_pages,  
765 - QPDFObjGen::set& visited,  
766 - QPDFObjGen::set& seen,  
767 - bool media_box,  
768 - bool resources);  
769 - void insertPage(QPDFObjectHandle newpage, int pos);  
770 - void flattenPagesTree();  
771 - void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);  
772 - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);  
773 - void pushInheritedAttributesToPageInternal(  
774 - QPDFObjectHandle,  
775 - std::map<std::string, std::vector<QPDFObjectHandle>>&,  
776 - bool allow_changes,  
777 - bool warn_skipped_keys); 699 + QPDF&
  700 + qpdf() const
  701 + {
  702 + return qpdf_;
  703 + }
  704 +
  705 + std::shared_ptr<Copier>&
  706 + copier()
  707 + {
  708 + return copier_;
  709 + }
  710 +
  711 + bool immediate_copy_from() const;
778 712
779 private: 713 private:
780 - QPDF& qpdf;  
781 - QPDF::Members* m;  
782 - }; // class QPDF::Doc::Pages 714 + QPDF& qpdf_;
783 715
784 - Doc() = delete;  
785 - Doc(Doc const&) = delete;  
786 - Doc(Doc&&) = delete;  
787 - Doc& operator=(Doc const&) = delete;  
788 - Doc& operator=(Doc&&) = delete;  
789 - ~Doc() = default; 716 + std::shared_ptr<Copier> copier_;
  717 + }; // class QPDF::Doc::Objects::Streams
790 718
791 - Doc(QPDF& qpdf, QPDF::Members& m) : 719 + public:
  720 + Objects() = delete;
  721 + Objects(Objects const&) = delete;
  722 + Objects(Objects&&) = delete;
  723 + Objects& operator=(Objects const&) = delete;
  724 + Objects& operator=(Objects&&) = delete;
  725 + ~Objects() = default;
  726 +
  727 + Objects(QPDF& qpdf, QPDF::Members* m) :
792 qpdf(qpdf), 728 qpdf(qpdf),
793 m(m), 729 m(m),
794 - lin_(qpdf, &m),  
795 - objects_(qpdf, &m),  
796 - pages_(qpdf, &m) 730 + foreign_(qpdf),
  731 + streams_(qpdf)
797 { 732 {
798 } 733 }
799 734
800 - Linearization&  
801 - linearization()  
802 - {  
803 - return lin_;  
804 - };  
805 -  
806 - Objects&  
807 - objects() 735 + Foreign&
  736 + foreign()
808 { 737 {
809 - return objects_;  
810 - }; 738 + return foreign_;
  739 + }
811 740
812 - Pages&  
813 - pages() 741 + Streams&
  742 + streams()
814 { 743 {
815 - return pages_; 744 + return streams_;
816 } 745 }
817 746
818 - bool reconstructed_xref() const; 747 + void parse(char const* password);
  748 + std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og);
  749 + void inParse(bool);
  750 + QPDFObjGen nextObjGen();
  751 + QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr<QPDFObject> const&);
  752 + void updateCache(
  753 + QPDFObjGen og,
  754 + std::shared_ptr<QPDFObject> const& object,
  755 + qpdf_offset_t end_before_space,
  756 + qpdf_offset_t end_after_space,
  757 + bool destroy = true);
  758 + bool resolveXRefTable();
  759 + QPDFObjectHandle readObjectAtOffset(
  760 + qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref);
  761 + QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0);
  762 + QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
  763 + std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
  764 + std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
  765 + size_t tableSize();
  766 +
  767 + // For QPDFWriter:
  768 +
  769 + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
  770 + // Get a list of objects that would be permitted in an object stream.
  771 + template <typename T>
  772 + std::vector<T> getCompressibleObjGens();
  773 + std::vector<QPDFObjGen> getCompressibleObjVector();
  774 + std::vector<bool> getCompressibleObjSet();
819 775
820 - QPDFAcroFormDocumentHelper&  
821 - acroform()  
822 - {  
823 - if (!acroform_) {  
824 - acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);  
825 - }  
826 - return *acroform_;  
827 - } 776 + private:
  777 + void setTrailer(QPDFObjectHandle obj);
  778 + void reconstruct_xref(QPDFExc& e, bool found_startxref = true);
  779 + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);
  780 + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
  781 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  782 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  783 + qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
  784 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false);
  785 + qpdf_offset_t processXRefStream(
  786 + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false);
  787 + std::pair<int, std::array<int, 3>>
  788 + processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
  789 + int processXRefSize(
  790 + QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
  791 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
  792 + QPDFObjectHandle& dict,
  793 + int max_num_entries,
  794 + std::function<QPDFExc(std::string_view)> damaged);
  795 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  796 + void insertFreeXrefEntry(QPDFObjGen);
  797 + QPDFObjectHandle readTrailer();
  798 + QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
  799 + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  800 + void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  801 + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);
  802 + size_t recoverStreamLength(
  803 + std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
  804 +
  805 + QPDFObjGen read_object_start(qpdf_offset_t offset);
  806 + void readObjectAtOffset(
  807 + bool attempt_recovery,
  808 + qpdf_offset_t offset,
  809 + std::string const& description,
  810 + QPDFObjGen exp_og);
  811 + void resolveObjectsInStream(int obj_stream_number);
  812 + bool isCached(QPDFObjGen og);
  813 + bool isUnresolved(QPDFObjGen og);
  814 + void setLastObjectDescription(std::string const& description, QPDFObjGen og);
828 815
829 - QPDFEmbeddedFileDocumentHelper&  
830 - embedded_files()  
831 - {  
832 - if (!embedded_files_) {  
833 - embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);  
834 - }  
835 - return *embedded_files_;  
836 - } 816 + QPDF& qpdf;
  817 + QPDF::Members* m;
837 818
838 - QPDFOutlineDocumentHelper&  
839 - outlines()  
840 - {  
841 - if (!outlines_) {  
842 - outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);  
843 - }  
844 - return *outlines_;  
845 - } 819 + Foreign foreign_;
  820 + Streams streams_;
  821 +}; // class QPDF::Doc::Objects
846 822
847 - QPDFPageDocumentHelper&  
848 - page_dh() 823 +// This class is used to represent a PDF Pages tree.
  824 +class QPDF::Doc::Pages
  825 +{
  826 + public:
  827 + Pages() = delete;
  828 + Pages(Pages const&) = delete;
  829 + Pages(Pages&&) = delete;
  830 + Pages& operator=(Pages const&) = delete;
  831 + Pages& operator=(Pages&&) = delete;
  832 + ~Pages() = default;
  833 +
  834 + Pages(QPDF& qpdf, QPDF::Members* m) :
  835 + qpdf(qpdf),
  836 + m(m)
849 { 837 {
850 - if (!page_dh_) {  
851 - page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);  
852 - }  
853 - return *page_dh_;  
854 } 838 }
855 839
856 - QPDFPageLabelDocumentHelper&  
857 - page_labels()  
858 - {  
859 - if (!page_labels_) {  
860 - page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);  
861 - }  
862 - return *page_labels_;  
863 - } 840 + void getAllPagesInternal(
  841 + QPDFObjectHandle cur_pages,
  842 + QPDFObjGen::set& visited,
  843 + QPDFObjGen::set& seen,
  844 + bool media_box,
  845 + bool resources);
  846 + void insertPage(QPDFObjectHandle newpage, int pos);
  847 + void flattenPagesTree();
  848 + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
  849 + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
  850 + void pushInheritedAttributesToPageInternal(
  851 + QPDFObjectHandle,
  852 + std::map<std::string, std::vector<QPDFObjectHandle>>&,
  853 + bool allow_changes,
  854 + bool warn_skipped_keys);
864 855
865 private: 856 private:
866 QPDF& qpdf; 857 QPDF& qpdf;
867 - QPDF::Members& m;  
868 -  
869 - Linearization lin_;  
870 - Objects objects_;  
871 - Pages pages_;  
872 -  
873 - // Document Helpers;  
874 - std::unique_ptr<QPDFAcroFormDocumentHelper> acroform_;  
875 - std::unique_ptr<QPDFEmbeddedFileDocumentHelper> embedded_files_;  
876 - std::unique_ptr<QPDFOutlineDocumentHelper> outlines_;  
877 - std::unique_ptr<QPDFPageDocumentHelper> page_dh_;  
878 - std::unique_ptr<QPDFPageLabelDocumentHelper> page_labels_;  
879 -}; 858 + QPDF::Members* m;
  859 +}; // class QPDF::Doc::Pages
880 860
881 -class QPDF::Members 861 +class QPDF::Members: QPDF::Doc
882 { 862 {
883 friend class QPDF; 863 friend class QPDF;
884 friend class ResolveRecorder; 864 friend class ResolveRecorder;
@@ -889,10 +869,9 @@ class QPDF::Members @@ -889,10 +869,9 @@ class QPDF::Members
889 ~Members() = default; 869 ~Members() = default;
890 870
891 private: 871 private:
892 - Doc doc;  
893 - Doc::Linearization& lin;  
894 - Doc::Objects& objects;  
895 - Doc::Pages& pages; 872 + Doc::Linearization lin;
  873 + Doc::Objects objects;
  874 + Doc::Pages pages;
896 std::shared_ptr<QPDFLogger> log; 875 std::shared_ptr<QPDFLogger> log;
897 unsigned long long unique_id{0}; 876 unsigned long long unique_id{0};
898 qpdf::Tokenizer tokenizer; 877 qpdf::Tokenizer tokenizer;
@@ -978,16 +957,34 @@ class QPDF::Doc::Resolver @@ -978,16 +957,34 @@ class QPDF::Doc::Resolver
978 } 957 }
979 }; 958 };
980 959
  960 +inline QPDF::Doc::Linearization&
  961 +QPDF::Doc::linearization()
  962 +{
  963 + return m->lin;
  964 +};
  965 +
  966 +inline QPDF::Doc::Objects&
  967 +QPDF::Doc::objects()
  968 +{
  969 + return m->objects;
  970 +};
  971 +
  972 +inline QPDF::Doc::Pages&
  973 +QPDF::Doc::pages()
  974 +{
  975 + return m->pages;
  976 +}
  977 +
981 inline bool 978 inline bool
982 QPDF::Doc::reconstructed_xref() const 979 QPDF::Doc::reconstructed_xref() const
983 { 980 {
984 - return m.reconstructed_xref; 981 + return m->reconstructed_xref;
985 } 982 }
986 983
987 inline QPDF::Doc& 984 inline QPDF::Doc&
988 QPDF::doc() 985 QPDF::doc()
989 { 986 {
990 - return m->doc; 987 + return *m;
991 } 988 }
992 989
993 // Throw a generic exception for unusual error conditions that do not be covered during CI testing. 990 // Throw a generic exception for unusual error conditions that do not be covered during CI testing.