diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 010e101..6c24ac0 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -126,10 +126,10 @@ QPDF::QPDFVersion() } QPDF::Members::Members(QPDF& qpdf) : - doc(qpdf, *this), - lin(doc.linearization()), - objects(doc.objects()), - pages(doc.pages()), + Doc(qpdf, this), + lin(qpdf, this), + objects(qpdf, this), + pages(qpdf, this), log(QPDFLogger::defaultLogger()), file(std::make_shared()), encp(std::make_shared()) diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 5c15132..c249c8e 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -295,590 +295,570 @@ class QPDF::PatternFinder final: public InputSource::Finder class QPDF::Doc { public: + class Encryption; class JobSetter; + class Linearization; + class Objects; + class Pages; class ParseGuard; class Resolver; class Writer; - class Encryption + Doc() = delete; + Doc(Doc const&) = delete; + Doc(Doc&&) = delete; + Doc& operator=(Doc const&) = delete; + Doc& operator=(Doc&&) = delete; + ~Doc() = default; + + Doc(QPDF& qpdf, QPDF::Members* m) : + qpdf(qpdf), + m(m) { - public: - // This class holds data read from the encryption dictionary. - Encryption( - int V, - int R, - int Length_bytes, - int P, - std::string const& O, - std::string const& U, - std::string const& OE, - std::string const& UE, - std::string const& Perms, - std::string const& id1, - bool encrypt_metadata) : - V(V), - R(R), - Length_bytes(Length_bytes), - P(static_cast(P)), - O(O), - U(U), - OE(OE), - UE(UE), - Perms(Perms), - id1(id1), - encrypt_metadata(encrypt_metadata) - { + } + + inline Linearization& linearization(); + + inline Objects& objects(); + + inline Pages& pages(); + + bool reconstructed_xref() const; + + QPDFAcroFormDocumentHelper& + acroform() + { + if (!acroform_) { + acroform_ = std::make_unique(qpdf); } - Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) : - V(V), - R(R), - Length_bytes(Length_bytes), - encrypt_metadata(encrypt_metadata) - { + return *acroform_; + } + + QPDFEmbeddedFileDocumentHelper& + embedded_files() + { + if (!embedded_files_) { + embedded_files_ = std::make_unique(qpdf); } + return *embedded_files_; + } - int getV() const; - int getR() const; - int getLengthBytes() const; - int getP() const; - // Bits in P are numbered from 1 as in the PDF spec. - bool getP(size_t bit) const; - std::string const& getO() const; - std::string const& getU() const; - std::string const& getOE() const; - std::string const& getUE() const; - std::string const& getPerms() const; - std::string const& getId1() const; - bool getEncryptMetadata() const; - // Bits in P are numbered from 1 as in the PDF spec. - void setP(size_t bit, bool val); - void setP(unsigned long val); - void setO(std::string const&); - void setU(std::string const&); - void setId1(std::string const& val); - void setV5EncryptionParameters( - std::string const& O, - std::string const& OE, - std::string const& U, - std::string const& UE, - std::string const& Perms); - - std::string compute_encryption_key(std::string const& password) const; - - bool - check_owner_password(std::string& user_password, std::string const& owner_password) const; - - bool check_user_password(std::string const& user_password) const; - - std::string - recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const; - - void compute_encryption_O_U(char const* user_password, char const* owner_password); - - std::string - compute_encryption_parameters_V5(char const* user_password, char const* owner_password); - - std::string compute_parameters(char const* user_password, char const* owner_password); + QPDFOutlineDocumentHelper& + outlines() + { + if (!outlines_) { + outlines_ = std::make_unique(qpdf); + } + return *outlines_; + } - private: - static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest) - - Encryption(Encryption const&) = delete; - Encryption& operator=(Encryption const&) = delete; - - std::string hash_V5( - std::string const& password, std::string const& salt, std::string const& udata) const; - std::string - compute_O_value(std::string const& user_password, std::string const& owner_password) const; - std::string compute_U_value(std::string const& user_password) const; - std::string compute_encryption_key_from_password(std::string const& password) const; - std::string recover_encryption_key_with_password(std::string const& password) const; - bool check_owner_password_V4( - std::string& user_password, std::string const& owner_password) const; - bool check_owner_password_V5(std::string const& owner_passworda) const; - std::string compute_Perms_value_V5_clear() const; - std::string compute_O_rc4_key( - std::string const& user_password, std::string const& owner_password) const; - std::string compute_U_value_R2(std::string const& user_password) const; - std::string compute_U_value_R3(std::string const& user_password) const; - bool check_user_password_V4(std::string const& user_password) const; - bool check_user_password_V5(std::string const& user_password) const; - - int V; - int R; - int Length_bytes; - std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared. - std::string O; - std::string U; - std::string OE; - std::string UE; - std::string Perms; - std::string id1; - bool encrypt_metadata; - }; // class QPDF::Doc::Encryption - - class Linearization + QPDFPageDocumentHelper& + page_dh() { - public: - Linearization() = delete; - Linearization(Linearization const&) = delete; - Linearization(Linearization&&) = delete; - Linearization& operator=(Linearization const&) = delete; - Linearization& operator=(Linearization&&) = delete; - ~Linearization() = default; - - Linearization(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m) - { + if (!page_dh_) { + page_dh_ = std::make_unique(qpdf); } + return *page_dh_; + } - // For QPDFWriter: - - template - void optimize_internal( - T const& object_stream_data, - bool allow_changes = true, - std::function skip_stream_parameters = nullptr); - void optimize( - QPDFWriter::ObjTable const& obj, - std::function skip_stream_parameters); - - // Get lists of all objects in order according to the part of a linearized file that they - // belong to. - void getLinearizedParts( - QPDFWriter::ObjTable const& obj, - std::vector& part4, - std::vector& part6, - std::vector& part7, - std::vector& part8, - std::vector& part9); - - void generateHintStream( - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj, - std::string& hint_stream, - int& S, - int& O, - bool compressed); - - // methods to support linearization checking -- implemented in QPDF_linearization.cc - - void readLinearizationData(); - void checkLinearizationInternal(); - void dumpLinearizationDataInternal(); - void linearizationWarning(std::string_view); - qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length); - void readHPageOffset(BitStream); - void readHSharedObject(BitStream); - void readHGeneric(BitStream, HGeneric&); - qpdf_offset_t maxEnd(ObjUser const& ou); - qpdf_offset_t getLinearizationOffset(QPDFObjGen); - QPDFObjectHandle - getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); - int lengthNextN(int first_object, int n); - void checkHPageOffset( - std::vector const& pages, std::map& idx_to_obj); - void checkHSharedObject( - std::vector const& pages, std::map& idx_to_obj); - void checkHOutlines(); - void dumpHPageOffset(); - void dumpHSharedObject(); - void dumpHGeneric(HGeneric&); - qpdf_offset_t adjusted_offset(qpdf_offset_t offset); - template - void calculateLinearizationData(T const& object_stream_data); - template - void pushOutlinesToPart( - std::vector& part, - std::set& lc_outlines, - T const& object_stream_data); - int outputLengthNextN( - int in_object, - int n, - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj); - void calculateHPageOffset( - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); - void calculateHSharedObject( - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); - void - calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); - void writeHPageOffset(BitWriter&); - void writeHSharedObject(BitWriter&); - void writeHGeneric(BitWriter&, HGeneric&); - - // Methods to support optimization - - void updateObjectMaps( - ObjUser const& ou, - QPDFObjectHandle oh, - std::function skip_stream_parameters); - void filterCompressedObjects(std::map const& object_stream_data); - void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); + QPDFPageLabelDocumentHelper& + page_labels() + { + if (!page_labels_) { + page_labels_ = std::make_unique(qpdf); + } + return *page_labels_; + } - private: - QPDF& qpdf; - QPDF::Members* m; - }; + private: + QPDF& qpdf; + QPDF::Members* m; - class Objects + // Document Helpers; + std::unique_ptr acroform_; + std::unique_ptr embedded_files_; + std::unique_ptr outlines_; + std::unique_ptr page_dh_; + std::unique_ptr page_labels_; +}; + +class QPDF::Doc::Encryption +{ + public: + // This class holds data read from the encryption dictionary. + Encryption( + int V, + int R, + int Length_bytes, + int P, + std::string const& O, + std::string const& U, + std::string const& OE, + std::string const& UE, + std::string const& Perms, + std::string const& id1, + bool encrypt_metadata) : + V(V), + R(R), + Length_bytes(Length_bytes), + P(static_cast(P)), + O(O), + U(U), + OE(OE), + UE(UE), + Perms(Perms), + id1(id1), + encrypt_metadata(encrypt_metadata) { - public: - class Foreign - { - class Copier - { - public: - Copier(QPDF& qpdf) : - qpdf(qpdf) - { - } + } + Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) : + V(V), + R(R), + Length_bytes(Length_bytes), + encrypt_metadata(encrypt_metadata) + { + } - QPDFObjectHandle copied(QPDFObjectHandle const& foreign); + int getV() const; + int getR() const; + int getLengthBytes() const; + int getP() const; + // Bits in P are numbered from 1 as in the PDF spec. + bool getP(size_t bit) const; + std::string const& getO() const; + std::string const& getU() const; + std::string const& getOE() const; + std::string const& getUE() const; + std::string const& getPerms() const; + std::string const& getId1() const; + bool getEncryptMetadata() const; + // Bits in P are numbered from 1 as in the PDF spec. + void setP(size_t bit, bool val); + void setP(unsigned long val); + void setO(std::string const&); + void setU(std::string const&); + void setId1(std::string const& val); + void setV5EncryptionParameters( + std::string const& O, + std::string const& OE, + std::string const& U, + std::string const& UE, + std::string const& Perms); - private: - QPDFObjectHandle - replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false); - void reserve_objects(QPDFObjectHandle const& foreign, bool top = false); + std::string compute_encryption_key(std::string const& password) const; - QPDF& qpdf; - std::map object_map; - std::vector to_copy; - QPDFObjGen::set visiting; - }; + bool check_owner_password(std::string& user_password, std::string const& owner_password) const; - public: - Foreign(QPDF& qpdf) : - qpdf(qpdf) - { - } + bool check_user_password(std::string const& user_password) const; - Foreign() = delete; - Foreign(Foreign const&) = delete; - Foreign(Foreign&&) = delete; - Foreign& operator=(Foreign const&) = delete; - Foreign& operator=(Foreign&&) = delete; - ~Foreign() = default; + std::string + recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const; - // Return a local handle to the foreign object. Copy the foreign object if necessary. - QPDFObjectHandle - copied(QPDFObjectHandle const& foreign) - { - return copier(foreign).copied(foreign); - } + void compute_encryption_O_U(char const* user_password, char const* owner_password); - private: - Copier& copier(QPDFObjectHandle const& foreign); + std::string + compute_encryption_parameters_V5(char const* user_password, char const* owner_password); - QPDF& qpdf; - std::map copiers; - }; // class QPDF::Doc::Objects::Foreign + std::string compute_parameters(char const* user_password, char const* owner_password); - class Streams - { - // Copier manages the copying of streams into this PDF. It is used both for copying - // local and foreign streams. - class Copier; + private: + static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest) + + Encryption(Encryption const&) = delete; + Encryption& operator=(Encryption const&) = delete; + + std::string + hash_V5(std::string const& password, std::string const& salt, std::string const& udata) const; + std::string + compute_O_value(std::string const& user_password, std::string const& owner_password) const; + std::string compute_U_value(std::string const& user_password) const; + std::string compute_encryption_key_from_password(std::string const& password) const; + std::string recover_encryption_key_with_password(std::string const& password) const; + bool + check_owner_password_V4(std::string& user_password, std::string const& owner_password) const; + bool check_owner_password_V5(std::string const& owner_passworda) const; + std::string compute_Perms_value_V5_clear() const; + std::string + compute_O_rc4_key(std::string const& user_password, std::string const& owner_password) const; + std::string compute_U_value_R2(std::string const& user_password) const; + std::string compute_U_value_R3(std::string const& user_password) const; + bool check_user_password_V4(std::string const& user_password) const; + bool check_user_password_V5(std::string const& user_password) const; + + int V; + int R; + int Length_bytes; + std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared. + std::string O; + std::string U; + std::string OE; + std::string UE; + std::string Perms; + std::string id1; + bool encrypt_metadata; +}; // class QPDF::Doc::Encryption + +class QPDF::Doc::Linearization +{ + public: + Linearization() = delete; + Linearization(Linearization const&) = delete; + Linearization(Linearization&&) = delete; + Linearization& operator=(Linearization const&) = delete; + Linearization& operator=(Linearization&&) = delete; + ~Linearization() = default; + + Linearization(QPDF& qpdf, QPDF::Members* m) : + qpdf(qpdf), + m(m) + { + } - public: - Streams(QPDF& qpdf); + // For QPDFWriter: + + template + void optimize_internal( + T const& object_stream_data, + bool allow_changes = true, + std::function skip_stream_parameters = nullptr); + void optimize( + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters); + + // Get lists of all objects in order according to the part of a linearized file that they + // belong to. + void getLinearizedParts( + QPDFWriter::ObjTable const& obj, + std::vector& part4, + std::vector& part6, + std::vector& part7, + std::vector& part8, + std::vector& part9); + + void generateHintStream( + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, + std::string& hint_stream, + int& S, + int& O, + bool compressed); + + // methods to support linearization checking -- implemented in QPDF_linearization.cc + + void readLinearizationData(); + void checkLinearizationInternal(); + void dumpLinearizationDataInternal(); + void linearizationWarning(std::string_view); + qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length); + void readHPageOffset(BitStream); + void readHSharedObject(BitStream); + void readHGeneric(BitStream, HGeneric&); + qpdf_offset_t maxEnd(ObjUser const& ou); + qpdf_offset_t getLinearizationOffset(QPDFObjGen); + QPDFObjectHandle + getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); + int lengthNextN(int first_object, int n); + void + checkHPageOffset(std::vector const& pages, std::map& idx_to_obj); + void + checkHSharedObject(std::vector const& pages, std::map& idx_to_obj); + void checkHOutlines(); + void dumpHPageOffset(); + void dumpHSharedObject(); + void dumpHGeneric(HGeneric&); + qpdf_offset_t adjusted_offset(qpdf_offset_t offset); + template + void calculateLinearizationData(T const& object_stream_data); + template + void pushOutlinesToPart( + std::vector& part, + std::set& lc_outlines, + T const& object_stream_data); + int outputLengthNextN( + int in_object, + int n, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj); + void + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void writeHPageOffset(BitWriter&); + void writeHSharedObject(BitWriter&); + void writeHGeneric(BitWriter&, HGeneric&); - Streams() = delete; - Streams(Streams const&) = delete; - Streams(Streams&&) = delete; - Streams& operator=(Streams const&) = delete; - Streams& operator=(Streams&&) = delete; - ~Streams() = default; + // Methods to support optimization - public: - static bool - pipeStreamData( - QPDF* qpdf, - QPDFObjGen og, - qpdf_offset_t offset, - size_t length, - QPDFObjectHandle dict, - bool is_root_metadata, - Pipeline* pipeline, - bool suppress_warnings, - bool will_retry) - { - return qpdf->pipeStreamData( - og, - offset, - length, - dict, - is_root_metadata, - pipeline, - suppress_warnings, - will_retry); - } + void updateObjectMaps( + ObjUser const& ou, + QPDFObjectHandle oh, + std::function skip_stream_parameters); + void filterCompressedObjects(std::map const& object_stream_data); + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); - QPDF& - qpdf() const - { - return qpdf_; - } + private: + QPDF& qpdf; + QPDF::Members* m; +}; - std::shared_ptr& - copier() +class QPDF::Doc::Objects +{ + public: + class Foreign + { + class Copier + { + public: + Copier(QPDF& qpdf) : + qpdf(qpdf) { - return copier_; } - bool immediate_copy_from() const; + QPDFObjectHandle copied(QPDFObjectHandle const& foreign); private: - QPDF& qpdf_; + QPDFObjectHandle + replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false); + void reserve_objects(QPDFObjectHandle const& foreign, bool top = false); - std::shared_ptr copier_; - }; // class QPDF::Doc::Objects::Streams + QPDF& qpdf; + std::map object_map; + std::vector to_copy; + QPDFObjGen::set visiting; + }; public: - Objects() = delete; - Objects(Objects const&) = delete; - Objects(Objects&&) = delete; - Objects& operator=(Objects const&) = delete; - Objects& operator=(Objects&&) = delete; - ~Objects() = default; - - Objects(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m), - foreign_(qpdf), - streams_(qpdf) + Foreign(QPDF& qpdf) : + qpdf(qpdf) { } - Foreign& - foreign() - { - return foreign_; - } + Foreign() = delete; + Foreign(Foreign const&) = delete; + Foreign(Foreign&&) = delete; + Foreign& operator=(Foreign const&) = delete; + Foreign& operator=(Foreign&&) = delete; + ~Foreign() = default; - Streams& - streams() + // Return a local handle to the foreign object. Copy the foreign object if necessary. + QPDFObjectHandle + copied(QPDFObjectHandle const& foreign) { - return streams_; + return copier(foreign).copied(foreign); } - void parse(char const* password); - std::shared_ptr const& resolve(QPDFObjGen og); - void inParse(bool); - QPDFObjGen nextObjGen(); - QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr const&); - void updateCache( - QPDFObjGen og, - std::shared_ptr const& object, - qpdf_offset_t end_before_space, - qpdf_offset_t end_after_space, - bool destroy = true); - bool resolveXRefTable(); - QPDFObjectHandle readObjectAtOffset( - qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref); - QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0); - QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr const& obj); - std::shared_ptr getObjectForParser(int id, int gen, bool parse_pdf); - std::shared_ptr getObjectForJSON(int id, int gen); - size_t tableSize(); - - // For QPDFWriter: - - std::map const& getXRefTableInternal(); - // Get a list of objects that would be permitted in an object stream. - template - std::vector getCompressibleObjGens(); - std::vector getCompressibleObjVector(); - std::vector getCompressibleObjSet(); - private: - void setTrailer(QPDFObjectHandle obj); - void reconstruct_xref(QPDFExc& e, bool found_startxref = true); - void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); - bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); - bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); - bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); - qpdf_offset_t read_xrefTable(qpdf_offset_t offset); - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); - qpdf_offset_t processXRefStream( - qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); - std::pair> - processXRefW(QPDFObjectHandle& dict, std::function damaged); - int processXRefSize( - QPDFObjectHandle& dict, - int entry_size, - std::function damaged); - std::pair>> processXRefIndex( - QPDFObjectHandle& dict, - int max_num_entries, - std::function damaged); - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); - void insertFreeXrefEntry(QPDFObjGen); - QPDFObjectHandle readTrailer(); - QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); - void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); - void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); - QPDFObjectHandle - readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); - size_t recoverStreamLength( - std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); - - QPDFObjGen read_object_start(qpdf_offset_t offset); - void readObjectAtOffset( - bool attempt_recovery, - qpdf_offset_t offset, - std::string const& description, - QPDFObjGen exp_og); - void resolveObjectsInStream(int obj_stream_number); - bool isCached(QPDFObjGen og); - bool isUnresolved(QPDFObjGen og); - void setLastObjectDescription(std::string const& description, QPDFObjGen og); + Copier& copier(QPDFObjectHandle const& foreign); QPDF& qpdf; - QPDF::Members* m; - - Foreign foreign_; - Streams streams_; - }; // class QPDF::Doc::Objects + std::map copiers; + }; // class QPDF::Doc::Objects::Foreign - // This class is used to represent a PDF Pages tree. - class Pages + class Streams { + // Copier manages the copying of streams into this PDF. It is used both for copying + // local and foreign streams. + class Copier; + + public: + Streams(QPDF& qpdf); + + Streams() = delete; + Streams(Streams const&) = delete; + Streams(Streams&&) = delete; + Streams& operator=(Streams const&) = delete; + Streams& operator=(Streams&&) = delete; + ~Streams() = default; + public: - Pages() = delete; - Pages(Pages const&) = delete; - Pages(Pages&&) = delete; - Pages& operator=(Pages const&) = delete; - Pages& operator=(Pages&&) = delete; - ~Pages() = default; - - Pages(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m) + static bool + pipeStreamData( + QPDF* qpdf, + QPDFObjGen og, + qpdf_offset_t offset, + size_t length, + QPDFObjectHandle dict, + bool is_root_metadata, + Pipeline* pipeline, + bool suppress_warnings, + bool will_retry) { + return qpdf->pipeStreamData( + og, + offset, + length, + dict, + is_root_metadata, + pipeline, + suppress_warnings, + will_retry); } - void getAllPagesInternal( - QPDFObjectHandle cur_pages, - QPDFObjGen::set& visited, - QPDFObjGen::set& seen, - bool media_box, - bool resources); - void insertPage(QPDFObjectHandle newpage, int pos); - void flattenPagesTree(); - void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); - void pushInheritedAttributesToPageInternal( - QPDFObjectHandle, - std::map>&, - bool allow_changes, - bool warn_skipped_keys); + QPDF& + qpdf() const + { + return qpdf_; + } + + std::shared_ptr& + copier() + { + return copier_; + } + + bool immediate_copy_from() const; private: - QPDF& qpdf; - QPDF::Members* m; - }; // class QPDF::Doc::Pages + QPDF& qpdf_; - Doc() = delete; - Doc(Doc const&) = delete; - Doc(Doc&&) = delete; - Doc& operator=(Doc const&) = delete; - Doc& operator=(Doc&&) = delete; - ~Doc() = default; + std::shared_ptr copier_; + }; // class QPDF::Doc::Objects::Streams - Doc(QPDF& qpdf, QPDF::Members& m) : + public: + Objects() = delete; + Objects(Objects const&) = delete; + Objects(Objects&&) = delete; + Objects& operator=(Objects const&) = delete; + Objects& operator=(Objects&&) = delete; + ~Objects() = default; + + Objects(QPDF& qpdf, QPDF::Members* m) : qpdf(qpdf), m(m), - lin_(qpdf, &m), - objects_(qpdf, &m), - pages_(qpdf, &m) + foreign_(qpdf), + streams_(qpdf) { } - Linearization& - linearization() - { - return lin_; - }; - - Objects& - objects() + Foreign& + foreign() { - return objects_; - }; + return foreign_; + } - Pages& - pages() + Streams& + streams() { - return pages_; + return streams_; } - bool reconstructed_xref() const; + void parse(char const* password); + std::shared_ptr const& resolve(QPDFObjGen og); + void inParse(bool); + QPDFObjGen nextObjGen(); + QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr const&); + void updateCache( + QPDFObjGen og, + std::shared_ptr const& object, + qpdf_offset_t end_before_space, + qpdf_offset_t end_after_space, + bool destroy = true); + bool resolveXRefTable(); + QPDFObjectHandle readObjectAtOffset( + qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref); + QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0); + QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr const& obj); + std::shared_ptr getObjectForParser(int id, int gen, bool parse_pdf); + std::shared_ptr getObjectForJSON(int id, int gen); + size_t tableSize(); + + // For QPDFWriter: + + std::map const& getXRefTableInternal(); + // Get a list of objects that would be permitted in an object stream. + template + std::vector getCompressibleObjGens(); + std::vector getCompressibleObjVector(); + std::vector getCompressibleObjSet(); - QPDFAcroFormDocumentHelper& - acroform() - { - if (!acroform_) { - acroform_ = std::make_unique(qpdf); - } - return *acroform_; - } + private: + void setTrailer(QPDFObjectHandle obj); + void reconstruct_xref(QPDFExc& e, bool found_startxref = true); + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); + qpdf_offset_t read_xrefTable(qpdf_offset_t offset); + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); + qpdf_offset_t processXRefStream( + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); + std::pair> + processXRefW(QPDFObjectHandle& dict, std::function damaged); + int processXRefSize( + QPDFObjectHandle& dict, int entry_size, std::function damaged); + std::pair>> processXRefIndex( + QPDFObjectHandle& dict, + int max_num_entries, + std::function damaged); + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); + void insertFreeXrefEntry(QPDFObjGen); + QPDFObjectHandle readTrailer(); + QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); + void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); + size_t recoverStreamLength( + std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); + + QPDFObjGen read_object_start(qpdf_offset_t offset); + void readObjectAtOffset( + bool attempt_recovery, + qpdf_offset_t offset, + std::string const& description, + QPDFObjGen exp_og); + void resolveObjectsInStream(int obj_stream_number); + bool isCached(QPDFObjGen og); + bool isUnresolved(QPDFObjGen og); + void setLastObjectDescription(std::string const& description, QPDFObjGen og); - QPDFEmbeddedFileDocumentHelper& - embedded_files() - { - if (!embedded_files_) { - embedded_files_ = std::make_unique(qpdf); - } - return *embedded_files_; - } + QPDF& qpdf; + QPDF::Members* m; - QPDFOutlineDocumentHelper& - outlines() - { - if (!outlines_) { - outlines_ = std::make_unique(qpdf); - } - return *outlines_; - } + Foreign foreign_; + Streams streams_; +}; // class QPDF::Doc::Objects - QPDFPageDocumentHelper& - page_dh() +// This class is used to represent a PDF Pages tree. +class QPDF::Doc::Pages +{ + public: + Pages() = delete; + Pages(Pages const&) = delete; + Pages(Pages&&) = delete; + Pages& operator=(Pages const&) = delete; + Pages& operator=(Pages&&) = delete; + ~Pages() = default; + + Pages(QPDF& qpdf, QPDF::Members* m) : + qpdf(qpdf), + m(m) { - if (!page_dh_) { - page_dh_ = std::make_unique(qpdf); - } - return *page_dh_; } - QPDFPageLabelDocumentHelper& - page_labels() - { - if (!page_labels_) { - page_labels_ = std::make_unique(qpdf); - } - return *page_labels_; - } + void getAllPagesInternal( + QPDFObjectHandle cur_pages, + QPDFObjGen::set& visited, + QPDFObjGen::set& seen, + bool media_box, + bool resources); + void insertPage(QPDFObjectHandle newpage, int pos); + void flattenPagesTree(); + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); + void pushInheritedAttributesToPageInternal( + QPDFObjectHandle, + std::map>&, + bool allow_changes, + bool warn_skipped_keys); private: QPDF& qpdf; - QPDF::Members& m; - - Linearization lin_; - Objects objects_; - Pages pages_; - - // Document Helpers; - std::unique_ptr acroform_; - std::unique_ptr embedded_files_; - std::unique_ptr outlines_; - std::unique_ptr page_dh_; - std::unique_ptr page_labels_; -}; + QPDF::Members* m; +}; // class QPDF::Doc::Pages -class QPDF::Members +class QPDF::Members: QPDF::Doc { friend class QPDF; friend class ResolveRecorder; @@ -889,10 +869,9 @@ class QPDF::Members ~Members() = default; private: - Doc doc; - Doc::Linearization& lin; - Doc::Objects& objects; - Doc::Pages& pages; + Doc::Linearization lin; + Doc::Objects objects; + Doc::Pages pages; std::shared_ptr log; unsigned long long unique_id{0}; qpdf::Tokenizer tokenizer; @@ -978,16 +957,34 @@ class QPDF::Doc::Resolver } }; +inline QPDF::Doc::Linearization& +QPDF::Doc::linearization() +{ + return m->lin; +}; + +inline QPDF::Doc::Objects& +QPDF::Doc::objects() +{ + return m->objects; +}; + +inline QPDF::Doc::Pages& +QPDF::Doc::pages() +{ + return m->pages; +} + inline bool QPDF::Doc::reconstructed_xref() const { - return m.reconstructed_xref; + return m->reconstructed_xref; } inline QPDF::Doc& QPDF::doc() { - return m->doc; + return *m; } // Throw a generic exception for unusual error conditions that do not be covered during CI testing.