diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh index 53b221c..33adc5a 100644 --- a/include/qpdf/BufferInputSource.hh +++ b/include/qpdf/BufferInputSource.hh @@ -30,6 +30,8 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource // Otherwise, the caller owns the memory. QPDF_DLL BufferInputSource(std::string const& description, Buffer* buf, bool own_memory = false); + + // NB This overload copies the string contents. QPDF_DLL BufferInputSource(std::string const& description, std::string const& contents); QPDF_DLL diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 934a8e7..b02e13d 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -726,167 +726,14 @@ class QPDF void removePage(QPDFObjectHandle page); // End legacy page helpers - // Writer class is restricted to QPDFWriter so that only it can call certain methods. - class Writer - { - friend class QPDFWriter; - - private: - static void - optimize( - QPDF& qpdf, - QPDFWriter::ObjTable const& obj, - std::function skip_stream_parameters) - { - return qpdf.optimize(obj, skip_stream_parameters); - } - - static void - getLinearizedParts( - QPDF& qpdf, - QPDFWriter::ObjTable const& obj, - std::vector& part4, - std::vector& part6, - std::vector& part7, - std::vector& part8, - std::vector& part9) - { - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); - } - - static void - generateHintStream( - QPDF& qpdf, - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj, - std::shared_ptr& hint_stream, - int& S, - int& O, - bool compressed) - { - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); - } - - static std::vector - getCompressibleObjGens(QPDF& qpdf) - { - return qpdf.getCompressibleObjVector(); - } - - static std::vector - getCompressibleObjSet(QPDF& qpdf) - { - return qpdf.getCompressibleObjSet(); - } - - static std::map const& - getXRefTable(QPDF& qpdf) - { - return qpdf.getXRefTableInternal(); - } + // End of the public API. The following classes and methods are for qpdf internal use only. - static size_t - tableSize(QPDF& qpdf) - { - return qpdf.tableSize(); - } - }; - - // The Resolver class is restricted to QPDFObject so that only it can resolve indirect - // references. - class Resolver - { - friend class QPDFObject; - friend class QPDF_Unresolved; - friend class qpdf::BaseHandle; - - private: - static std::shared_ptr const& - resolved(QPDF* qpdf, QPDFObjGen og) - { - return qpdf->resolve(og); - } - }; - - // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. - class StreamCopier - { - friend class QPDFObjectHandle; - - private: - static void - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src) - { - qpdf->copyStreamData(dest, src); - } - }; - - // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides - // special access to allow the parser to create unresolved objects and dangling references. - class ParseGuard - { - friend class QPDFParser; - - private: - ParseGuard(QPDF* qpdf) : - qpdf(qpdf) - { - if (qpdf) { - qpdf->inParse(true); - } - } - - static std::shared_ptr - getObject(QPDF* qpdf, int id, int gen, bool parse_pdf) - { - return qpdf->getObjectForParser(id, gen, parse_pdf); - } - - ~ParseGuard() - { - if (qpdf) { - qpdf->inParse(false); - } - } - QPDF* qpdf; - }; - - // Pipe class is restricted to QPDF_Stream. - class Pipe - { - friend class QPDF_Stream; - friend class qpdf::Stream; - - private: - static bool - pipeStreamData( - QPDF* qpdf, - QPDFObjGen og, - qpdf_offset_t offset, - size_t length, - QPDFObjectHandle dict, - Pipeline* pipeline, - bool suppress_warnings, - bool will_retry) - { - return qpdf->pipeStreamData( - og, offset, length, dict, pipeline, suppress_warnings, will_retry); - } - }; - - // JobSetter class is restricted to QPDFJob. - class JobSetter - { - friend class QPDFJob; - - private: - // Enable enhanced warnings for pdf file checking. - static void - setCheckMode(QPDF& qpdf, bool val) - { - qpdf.m->check_mode = val; - } - }; + class Writer; + class Resolver; + class StreamCopier; + class ParseGuard; + class Pipe; + class JobSetter; // For testing only -- do not add to DLL static bool test_json_validators(); @@ -901,136 +748,13 @@ class QPDF static std::string const qpdf_version; - class ObjCache - { - public: - ObjCache() : - end_before_space(0), - end_after_space(0) - { - } - ObjCache( - std::shared_ptr object, - qpdf_offset_t end_before_space = 0, - qpdf_offset_t end_after_space = 0) : - object(object), - end_before_space(end_before_space), - end_after_space(end_after_space) - { - } - - std::shared_ptr object; - qpdf_offset_t end_before_space; - qpdf_offset_t end_after_space; - }; - - class ObjCopier - { - public: - std::map object_map; - std::vector to_copy; - QPDFObjGen::set visiting; - }; - - class EncryptionParameters - { - friend class QPDF; - - public: - EncryptionParameters(); - - private: - bool encrypted; - bool encryption_initialized; - int encryption_V; - int encryption_R; - bool encrypt_metadata; - std::map crypt_filters; - encryption_method_e cf_stream; - encryption_method_e cf_string; - encryption_method_e cf_file; - std::string provided_password; - std::string user_password; - std::string encryption_key; - std::string cached_object_encryption_key; - QPDFObjGen cached_key_og; - bool user_password_matched; - bool owner_password_matched; - }; - - class ForeignStreamData - { - friend class QPDF; - - public: - ForeignStreamData( - std::shared_ptr encp, - std::shared_ptr file, - QPDFObjGen foreign_og, - qpdf_offset_t offset, - size_t length, - QPDFObjectHandle local_dict); - - private: - std::shared_ptr encp; - std::shared_ptr file; - QPDFObjGen foreign_og; - qpdf_offset_t offset; - size_t length; - QPDFObjectHandle local_dict; - }; - - class CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider - { - public: - CopiedStreamDataProvider(QPDF& destination_qpdf); - ~CopiedStreamDataProvider() override = default; - bool provideStreamData( - QPDFObjGen const& og, - Pipeline* pipeline, - bool suppress_warnings, - bool will_retry) override; - void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream); - void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr); - - private: - QPDF& destination_qpdf; - std::map foreign_streams; - std::map> foreign_stream_data; - }; - - class StringDecrypter: public QPDFObjectHandle::StringDecrypter - { - friend class QPDF; - - public: - StringDecrypter(QPDF* qpdf, QPDFObjGen og); - ~StringDecrypter() override = default; - void decryptString(std::string& val) override; - - private: - QPDF* qpdf; - QPDFObjGen og; - }; - - class ResolveRecorder - { - public: - ResolveRecorder(QPDF* qpdf, QPDFObjGen og) : - qpdf(qpdf), - iter(qpdf->m->resolving.insert(og).first) - { - } - virtual ~ResolveRecorder() - { - this->qpdf->m->resolving.erase(iter); - } - - private: - QPDF* qpdf; - std::set::const_iterator iter; - }; - + class ObjCache; + class ObjCopier; + class EncryptionParameters; + class ForeignStreamData; + class CopiedStreamDataProvider; + class StringDecrypter; + class ResolveRecorder; class JSONReactor; void parse(char const* password); @@ -1200,200 +924,19 @@ class QPDF replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top); void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream); - // Linearization Hint table structures. - // Naming conventions: - - // HSomething is the Something Hint Table or table header - // HSomethingEntry is an entry in the Something table - - // delta_something + min_something = something - // nbits_something = number of bits required for something - - // something_offset is the pre-adjusted offset in the file. If >= - // H0_offset, H0_length must be added to get an actual file - // offset. - - // PDF 1.4: Table F.4 - struct HPageOffsetEntry - { - int delta_nobjects{0}; // 1 - qpdf_offset_t delta_page_length{0}; // 2 - // vectors' sizes = nshared_objects - int nshared_objects{0}; // 3 - std::vector shared_identifiers; // 4 - std::vector shared_numerators; // 5 - qpdf_offset_t delta_content_offset{0}; // 6 - qpdf_offset_t delta_content_length{0}; // 7 - }; - - // PDF 1.4: Table F.3 - struct HPageOffset - { - int min_nobjects{0}; // 1 - qpdf_offset_t first_page_offset{0}; // 2 - int nbits_delta_nobjects{0}; // 3 - int min_page_length{0}; // 4 - int nbits_delta_page_length{0}; // 5 - int min_content_offset{0}; // 6 - int nbits_delta_content_offset{0}; // 7 - int min_content_length{0}; // 8 - int nbits_delta_content_length{0}; // 9 - int nbits_nshared_objects{0}; // 10 - int nbits_shared_identifier{0}; // 11 - int nbits_shared_numerator{0}; // 12 - int shared_denominator{0}; // 13 - // vector size is npages - std::vector entries; - }; - - // PDF 1.4: Table F.6 - struct HSharedObjectEntry - { - // Item 3 is a 128-bit signature (unsupported by Acrobat) - int delta_group_length{0}; // 1 - int signature_present{0}; // 2 -- always 0 - int nobjects_minus_one{0}; // 4 -- always 0 - }; - - // PDF 1.4: Table F.5 - struct HSharedObject - { - int first_shared_obj{0}; // 1 - qpdf_offset_t first_shared_offset{0}; // 2 - int nshared_first_page{0}; // 3 - int nshared_total{0}; // 4 - int nbits_nobjects{0}; // 5 - int min_group_length{0}; // 6 - int nbits_delta_group_length{0}; // 7 - // vector size is nshared_total - std::vector entries; - }; - - // PDF 1.4: Table F.9 - struct HGeneric - { - int first_object{0}; // 1 - qpdf_offset_t first_object_offset{0}; // 2 - int nobjects{0}; // 3 - int group_length{0}; // 4 - }; - - // Other linearization data structures - - // Initialized from Linearization Parameter dictionary - struct LinParameters - { - qpdf_offset_t file_size{0}; // /L - int first_page_object{0}; // /O - qpdf_offset_t first_page_end{0}; // /E - int npages{0}; // /N - qpdf_offset_t xref_zero_offset{0}; // /T - int first_page{0}; // /P - qpdf_offset_t H_offset{0}; // offset of primary hint stream - qpdf_offset_t H_length{0}; // length of primary hint stream - }; - - // Computed hint table value data structures. These tables contain the computed values on which - // the hint table values are based. They exclude things like number of bits and store actual - // values instead of mins and deltas. File offsets are also absolute rather than being offset - // by the size of the primary hint table. We populate the hint table structures from these - // during writing and compare the hint table values with these during validation. We ignore - // some values for various reasons described in the code. Those values are omitted from these - // structures. Note also that object numbers are object numbers from the input file, not the - // output file. - - // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. - - struct CHPageOffsetEntry - { - int nobjects{0}; - int nshared_objects{0}; - // vectors' sizes = nshared_objects - std::vector shared_identifiers; - }; - - struct CHPageOffset - { - // vector size is npages - std::vector entries; - }; - - struct CHSharedObjectEntry - { - CHSharedObjectEntry(int object) : - object(object) - { - } - - int object; - }; - - // PDF 1.4: Table F.5 - struct CHSharedObject - { - int first_shared_obj{0}; - int nshared_first_page{0}; - int nshared_total{0}; - // vector size is nshared_total - std::vector entries; - }; - - // No need for CHGeneric -- HGeneric is fine as is. - - // Data structures to support optimization -- implemented in QPDF_optimization.cc - - class ObjUser - { - public: - enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; - - // type is set to ou_bad - ObjUser(); - - // type must be ou_root - ObjUser(user_e type); - - // type must be one of ou_page or ou_thumb - ObjUser(user_e type, int pageno); - - // type must be one of ou_trailer_key or ou_root_key - ObjUser(user_e type, std::string const& key); - - bool operator<(ObjUser const&) const; - - user_e ou_type; - int pageno; // if ou_page; - std::string key; // if ou_trailer_key or ou_root_key - }; - - struct UpdateObjectMapsFrame - { - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top); - - ObjUser const& ou; - QPDFObjectHandle oh; - bool top; - }; - - class PatternFinder: public InputSource::Finder - { - public: - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : - qpdf(qpdf), - checker(checker) - { - } - ~PatternFinder() override = default; - bool - check() override - { - return (this->qpdf.*checker)(); - } - - private: - QPDF& qpdf; - bool (QPDF::*checker)(); - }; + struct HPageOffsetEntry; + struct HPageOffset; + struct HSharedObjectEntry; + struct HSharedObject; + struct HGeneric; + struct LinParameters; + struct CHPageOffsetEntry; + struct CHPageOffset; + struct CHSharedObjectEntry; + struct CHSharedObject; + class ObjUser; + struct UpdateObjectMapsFrame; + class PatternFinder; // Methods to support pattern finding static bool validatePDFVersion(char const*&, std::string& version); @@ -1490,88 +1033,7 @@ class QPDF return QIntC::to_ulonglong(i); } - class Members - { - friend class QPDF; - friend class ResolveRecorder; - - public: - Members(); - Members(Members const&) = delete; - ~Members() = default; - - private: - std::shared_ptr log; - unsigned long long unique_id{0}; - QPDFTokenizer tokenizer; - std::shared_ptr file; - std::string last_object_description; - bool provided_password_is_hex_key{false}; - bool ignore_xref_streams{false}; - bool suppress_warnings{false}; - size_t max_warnings{0}; - bool attempt_recovery{true}; - bool check_mode{false}; - std::shared_ptr encp; - std::string pdf_version; - std::map xref_table; - // Various tables are indexed by object id, with potential size id + 1 - int xref_table_max_id{std::numeric_limits::max() - 1}; - qpdf_offset_t xref_table_max_offset{0}; - std::set deleted_objects; - std::map obj_cache; - std::set resolving; - QPDFObjectHandle trailer; - std::vector all_pages; - bool invalid_page_found{false}; - std::map pageobj_to_pages_pos; - bool pushed_inherited_attributes_to_pages{false}; - bool ever_pushed_inherited_attributes_to_pages{false}; - bool ever_called_get_all_pages{false}; - std::vector warnings; - std::map object_copiers; - std::shared_ptr copied_streams; - // copied_stream_data_provider is owned by copied_streams - CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; - bool reconstructed_xref{false}; - bool fixed_dangling_refs{false}; - bool immediate_copy_from{false}; - bool in_parse{false}; - bool parsed{false}; - std::set resolved_object_streams; - - // Linearization data - qpdf_offset_t first_xref_item_offset{0}; // actual value from file - bool uncompressed_after_compressed{false}; - bool linearization_warnings{false}; - - // Linearization parameter dictionary and hint table data: may be read from file or computed - // prior to writing a linearized file - QPDFObjectHandle lindict; - LinParameters linp; - HPageOffset page_offset_hints; - HSharedObject shared_object_hints; - HGeneric outline_hints; - - // Computed linearization data: used to populate above tables during writing and to compare - // with them during validation. c_ means computed. - LinParameters c_linp; - CHPageOffset c_page_offset_data; - CHSharedObject c_shared_object_data; - HGeneric c_outline_data; - - // Object ordering data for linearized files: initialized by calculateLinearizationData(). - // Part numbers refer to the PDF 1.4 specification. - std::vector part4; - std::vector part6; - std::vector part7; - std::vector part8; - std::vector part9; - - // Optimization data - std::map> obj_user_to_objects; - std::map> object_to_obj_users; - }; + class Members; // Keep all member variables inside the Members object, which we dynamically allocate. This // makes it possible to add new private members without breaking binary compatibility. diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 8b7f1fd..074d544 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1,6 +1,6 @@ #include // include first for large file support -#include +#include #include #include @@ -168,12 +168,6 @@ QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : { } -void -QPDF::StringDecrypter::decryptString(std::string& val) -{ - qpdf->decryptString(val, og); -} - std::string const& QPDF::QPDFVersion() { @@ -181,20 +175,6 @@ QPDF::QPDFVersion() return QPDF::qpdf_version; } -QPDF::EncryptionParameters::EncryptionParameters() : - encrypted(false), - encryption_initialized(false), - encryption_V(0), - encryption_R(0), - encrypt_metadata(true), - cf_stream(e_none), - cf_string(e_none), - cf_file(e_none), - user_password_matched(false), - owner_password_matched(false) -{ -} - QPDF::Members::Members() : log(QPDFLogger::defaultLogger()), file(new InvalidInputSource()), diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 6b1d6cb..f0e229d 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 4c001cd..f4c71cd 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1495,19 +1495,23 @@ QPDFObjectHandle QPDFObjectHandle::parse( QPDF* context, std::string const& object_str, std::string const& object_description) { - auto input = std::shared_ptr(new BufferInputSource("parsed object", object_str)); - QPDFTokenizer tokenizer; + // BufferInputSource does not modify the input, but Buffer either requires a string& or copies + // the string. + Buffer buf(const_cast(object_str)); + auto input = BufferInputSource("parsed object", &buf); + qpdf::Tokenizer tokenizer; bool empty = false; - QPDFObjectHandle result = parse(input, object_description, tokenizer, empty, nullptr, context); - size_t offset = QIntC::to_size(input->tell()); + auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false) + .parse(empty, false); + size_t offset = QIntC::to_size(input.tell()); while (offset < object_str.length()) { if (!isspace(object_str.at(offset))) { QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse"); throw QPDFExc( qpdf_e_damaged_pdf, - input->getName(), + "parsed object", object_description, - input->getLastOffset(), + input.getLastOffset(), "trailing data found parsing object from string"); } ++offset; @@ -1614,51 +1618,52 @@ QPDFObjectHandle::parseContentStream_data( QPDF* context) { size_t stream_length = stream_data->getSize(); - auto input = - std::shared_ptr(new BufferInputSource(description, stream_data.get())); - QPDFTokenizer tokenizer; + auto input = BufferInputSource(description, stream_data.get()); + Tokenizer tokenizer; tokenizer.allowEOF(); + auto sp_description = QPDFParser::make_description(description, "content"); bool empty = false; - while (QIntC::to_size(input->tell()) < stream_length) { + while (QIntC::to_size(input.tell()) < stream_length) { // Read a token and seek to the beginning. The offset we get from this process is the // beginning of the next non-ignorable (space, comment) token. This way, the offset and // don't including ignorable content. - tokenizer.readToken(input, "content", true); - qpdf_offset_t offset = input->getLastOffset(); - input->seek(offset, SEEK_SET); + tokenizer.nextToken(input, "content", true); + qpdf_offset_t offset = input.getLastOffset(); + input.seek(offset, SEEK_SET); auto obj = - QPDFParser(*input, "content", tokenizer, nullptr, context, false).parse(empty, true); + QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true); if (!obj) { // EOF break; } - size_t length = QIntC::to_size(input->tell() - offset); + size_t length = QIntC::to_size(input.tell() - offset); callbacks->handleObject(obj, QIntC::to_size(offset), length); if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { // Discard next character; it is the space after ID that terminated the token. Read // until end of inline image. char ch; - input->read(&ch, 1); + input.read(&ch, 1); tokenizer.expectInlineImage(input); - QPDFTokenizer::Token t = tokenizer.readToken(input, description, true); - offset = input->getLastOffset(); - length = QIntC::to_size(input->tell() - offset); - if (t.getType() == QPDFTokenizer::tt_bad) { + tokenizer.nextToken(input, description); + offset = input.getLastOffset(); + length = QIntC::to_size(input.tell() - offset); + if (tokenizer.getType() == QPDFTokenizer::tt_bad) { QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); warn( context, QPDFExc( qpdf_e_damaged_pdf, - input->getName(), + description, "stream data", - input->tell(), + input.tell(), "EOF found while reading inline image")); } else { - std::string inline_image = t.getValue(); QTC::TC("qpdf", "QPDFObjectHandle inline image token"); callbacks->handleObject( - QPDFObjectHandle::newInlineImage(inline_image), QIntC::to_size(offset), length); + QPDFObjectHandle::newInlineImage(tokenizer.getValue()), + QIntC::to_size(offset), + length); } } } diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 7ca4664..f156cab 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -52,10 +52,10 @@ QPDFWordTokenFinder::check() { // Find a word token matching the given string, preceded by a delimiter, and followed by a // delimiter or EOF. - QPDFTokenizer tokenizer; - QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true, str.size() + 2); + Tokenizer tokenizer; + tokenizer.nextToken(is, "finder", str.size() + 2); qpdf_offset_t pos = is.tell(); - if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) { + if (tokenizer.getType() != tt::tt_word || tokenizer.getValue() != str) { QTC::TC("qpdf", "QPDFTokenizer finder found wrong word"); return false; } @@ -845,7 +845,7 @@ Tokenizer::findEI(InputSource& input) } inline_image_bytes = QIntC::to_size(input.tell() - pos - 2); - QPDFTokenizer check; + Tokenizer check; bool found_bad = false; // Look at the next 10 tokens or up to EOF. The next inline image's image data would look // like bad tokens, but there will always be at least 10 tokens between one inline image's @@ -853,13 +853,13 @@ Tokenizer::findEI(InputSource& input) // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can // be pretty sure we've found the actual EI. for (int i = 0; i < 10; ++i) { - QPDFTokenizer::Token t = check.readToken(input, "checker", true); - QPDFTokenizer::token_type_e type = t.getType(); - if (type == tt::tt_eof) { + check.nextToken(input, "checker"); + auto typ = check.getType(); + if (typ == tt::tt_eof) { okay = true; - } else if (type == tt::tt_bad) { + } else if (typ == tt::tt_bad) { found_bad = true; - } else if (t.isWord()) { + } else if (typ == tt::tt_word) { // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into // "words". We recognize strings of alphabetic characters as potential valid // operators for purposes of telling whether we're in valid content or not. It's not @@ -868,13 +868,12 @@ Tokenizer::findEI(InputSource& input) bool found_alpha = false; bool found_non_printable = false; bool found_other = false; - for (char ch: t.getValue()) { - if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || - (ch == '*')) { + for (char ch: check.getValue()) { + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '*')) { // Treat '*' as alpha since there are valid PDF operators that contain * // along with alphabetic characters. found_alpha = true; - } else if ((static_cast(ch) < 32) && (!isSpace(ch))) { + } else if (static_cast(ch) < 32 && !isSpace(ch)) { // Compare ch as a signed char so characters outside of 7-bit will be < 0. found_non_printable = true; break; diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index c9b90cc..edad06f 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -14,9 +14,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index c846b5a..29e735b 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 9f39345..3abfc64 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -3,7 +3,7 @@ #include -#include +#include #include diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index b183c07..01b1c77 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -1,6 +1,6 @@ // See doc/linearization. -#include +#include #include #include diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index d19c2c6..65916c7 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -2,22 +2,15 @@ #include -#include +#include #include #include #include #include -QPDF::ObjUser::ObjUser() : - ou_type(ou_bad), - pageno(0) -{ -} - QPDF::ObjUser::ObjUser(user_e type) : - ou_type(type), - pageno(0) + ou_type(type) { qpdf_assert_debug(type == ou_root); } @@ -31,7 +24,6 @@ QPDF::ObjUser::ObjUser(user_e type, int pageno) : QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : ou_type(type), - pageno(0), key(key) { qpdf_assert_debug((type == ou_trailer_key) || (type == ou_root_key)); @@ -40,16 +32,17 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : bool QPDF::ObjUser::operator<(ObjUser const& rhs) const { - if (this->ou_type < rhs.ou_type) { + if (ou_type < rhs.ou_type) { return true; - } else if (this->ou_type == rhs.ou_type) { - if (this->pageno < rhs.pageno) { + } + if (ou_type == rhs.ou_type) { + if (pageno < rhs.pageno) { return true; - } else if (this->pageno == rhs.pageno) { - return (this->key < rhs.key); + } + if (pageno == rhs.pageno) { + return key < rhs.key; } } - return false; } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index ece5c32..749533f 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/libqpdf/qpdf/QPDFObject_private.hh b/libqpdf/qpdf/QPDFObject_private.hh index db2da1c..8a9be46 100644 --- a/libqpdf/qpdf/QPDFObject_private.hh +++ b/libqpdf/qpdf/QPDFObject_private.hh @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 6033f30..ae0c394 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -12,6 +12,8 @@ class QPDFParser { public: QPDFParser() = delete; + + // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer. QPDFParser( InputSource& input, std::string const& object_description, @@ -24,16 +26,56 @@ class QPDFParser tokenizer(*tokenizer.m), decrypter(decrypter), context(context), - description( - std::make_shared( - std::string(input.getName() + ", " + object_description + " at offset $PO"))), + description(make_description(input.getName(), object_description)), parse_pdf(parse_pdf) { } - virtual ~QPDFParser() = default; + + QPDFParser( + InputSource& input, + std::string const& object_description, + qpdf::Tokenizer& tokenizer, + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF* context, + bool parse_pdf) : + input(input), + object_description(object_description), + tokenizer(tokenizer), + decrypter(decrypter), + context(context), + description(make_description(input.getName(), object_description)), + parse_pdf(parse_pdf) + { + } + + // Used by parseContentStream_data only + QPDFParser( + InputSource& input, + std::shared_ptr sp_description, + std::string const& object_description, + qpdf::Tokenizer& tokenizer, + QPDF* context) : + input(input), + object_description(object_description), + tokenizer(tokenizer), + decrypter(nullptr), + context(context), + description(std::move(sp_description)), + parse_pdf(false) + { + } + ~QPDFParser() = default; QPDFObjectHandle parse(bool& empty, bool content_stream); + static std::shared_ptr + make_description(std::string const& input_name, std::string const& object_description) + { + using namespace std::literals; + return std::make_shared( + input_name + ", " + object_description + " at offset $PO"); + } + private: // Parser state. Note: // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) @@ -83,7 +125,7 @@ class QPDFParser bool parse_pdf; std::vector stack; - StackFrame* frame; + StackFrame* frame{nullptr}; // Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as // it only gets incremented or reset when a bad token is encountered. int bad_count{0}; @@ -92,9 +134,9 @@ class QPDFParser // Number of good tokens since last bad token. Irrelevant if bad_count == 0. int good_count{0}; // Start offset including any leading whitespace. - qpdf_offset_t start; + qpdf_offset_t start{0}; // Number of successive integer tokens. - int int_count = 0; + int int_count{0}; long long int_buffer[2]{0, 0}; qpdf_offset_t last_offset_buffer[2]{0, 0}; }; diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh new file mode 100644 index 0000000..a7d9989 --- /dev/null +++ b/libqpdf/qpdf/QPDF_private.hh @@ -0,0 +1,559 @@ +#ifndef QPDF_PRIVATE_HH +#define QPDF_PRIVATE_HH + +#include + +#include + +// Writer class is restricted to QPDFWriter so that only it can call certain methods. +class QPDF::Writer +{ + friend class QPDFWriter; + + private: + static void + optimize( + QPDF& qpdf, + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters) + { + qpdf.optimize(obj, skip_stream_parameters); + } + + static void + getLinearizedParts( + QPDF& qpdf, + QPDFWriter::ObjTable const& obj, + std::vector& part4, + std::vector& part6, + std::vector& part7, + std::vector& part8, + std::vector& part9) + { + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); + } + + static void + generateHintStream( + QPDF& qpdf, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, + std::shared_ptr& hint_stream, + int& S, + int& O, + bool compressed) + { + qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); + } + + static std::vector + getCompressibleObjGens(QPDF& qpdf) + { + return qpdf.getCompressibleObjVector(); + } + + static std::vector + getCompressibleObjSet(QPDF& qpdf) + { + return qpdf.getCompressibleObjSet(); + } + + static std::map const& + getXRefTable(QPDF& qpdf) + { + return qpdf.getXRefTableInternal(); + } + + static size_t + tableSize(QPDF& qpdf) + { + return qpdf.tableSize(); + } +}; + +// The Resolver class is restricted to QPDFObject so that only it can resolve indirect +// references. +class QPDF::Resolver +{ + friend class QPDFObject; + friend class qpdf::BaseHandle; + + private: + static std::shared_ptr const& + resolved(QPDF* qpdf, QPDFObjGen og) + { + return qpdf->resolve(og); + } +}; + +// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. +class QPDF::StreamCopier +{ + friend class QPDFObjectHandle; + + private: + static void + copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src) + { + qpdf->copyStreamData(dest, src); + } +}; + +// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides +// special access to allow the parser to create unresolved objects and dangling references. +class QPDF::ParseGuard +{ + friend class QPDFParser; + + private: + ParseGuard(QPDF* qpdf) : + qpdf(qpdf) + { + if (qpdf) { + qpdf->inParse(true); + } + } + + static std::shared_ptr + getObject(QPDF* qpdf, int id, int gen, bool parse_pdf) + { + return qpdf->getObjectForParser(id, gen, parse_pdf); + } + + ~ParseGuard() + { + if (qpdf) { + qpdf->inParse(false); + } + } + QPDF* qpdf; +}; + +// Pipe class is restricted to QPDF_Stream. +class QPDF::Pipe +{ + friend class qpdf::Stream; + + private: + static bool + pipeStreamData( + QPDF* qpdf, + QPDFObjGen og, + qpdf_offset_t offset, + size_t length, + QPDFObjectHandle dict, + Pipeline* pipeline, + bool suppress_warnings, + bool will_retry) + { + return qpdf->pipeStreamData( + og, offset, length, dict, pipeline, suppress_warnings, will_retry); + } +}; + +class QPDF::ObjCache +{ + public: + ObjCache() = default; + ObjCache( + std::shared_ptr object, + qpdf_offset_t end_before_space = 0, + qpdf_offset_t end_after_space = 0) : + object(std::move(object)), + end_before_space(end_before_space), + end_after_space(end_after_space) + { + } + + std::shared_ptr object; + qpdf_offset_t end_before_space{0}; + qpdf_offset_t end_after_space{0}; +}; + +class QPDF::ObjCopier +{ + public: + std::map object_map; + std::vector to_copy; + QPDFObjGen::set visiting; +}; + +class QPDF::EncryptionParameters +{ + friend class QPDF; + + public: + EncryptionParameters() = default; + + private: + bool encrypted{false}; + bool encryption_initialized{false}; + int encryption_V{0}; + int encryption_R{0}; + bool encrypt_metadata{true}; + std::map crypt_filters; + encryption_method_e cf_stream{e_none}; + encryption_method_e cf_string{e_none}; + encryption_method_e cf_file{e_none}; + std::string provided_password; + std::string user_password; + std::string encryption_key; + std::string cached_object_encryption_key; + QPDFObjGen cached_key_og{}; + bool user_password_matched{false}; + bool owner_password_matched{false}; +}; + +class QPDF::ForeignStreamData +{ + friend class QPDF; + + public: + ForeignStreamData( + std::shared_ptr encp, + std::shared_ptr file, + QPDFObjGen foreign_og, + qpdf_offset_t offset, + size_t length, + QPDFObjectHandle local_dict); + + private: + std::shared_ptr encp; + std::shared_ptr file; + QPDFObjGen foreign_og; + qpdf_offset_t offset; + size_t length; + QPDFObjectHandle local_dict; +}; + +class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider +{ + public: + CopiedStreamDataProvider(QPDF& destination_qpdf); + ~CopiedStreamDataProvider() override = default; + bool provideStreamData( + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override; + void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream); + void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr); + + private: + QPDF& destination_qpdf; + std::map foreign_streams; + std::map> foreign_stream_data; +}; + +class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter +{ + friend class QPDF; + + public: + StringDecrypter(QPDF* qpdf, QPDFObjGen og); + ~StringDecrypter() final = default; + void + decryptString(std::string& val) final + { + qpdf->decryptString(val, og); + } + + private: + QPDF* qpdf; + QPDFObjGen og; +}; + +// PDF 1.4: Table F.4 +struct QPDF::HPageOffsetEntry +{ + int delta_nobjects{0}; // 1 + qpdf_offset_t delta_page_length{0}; // 2 + // vectors' sizes = nshared_objects + int nshared_objects{0}; // 3 + std::vector shared_identifiers; // 4 + std::vector shared_numerators; // 5 + qpdf_offset_t delta_content_offset{0}; // 6 + qpdf_offset_t delta_content_length{0}; // 7 +}; + +// PDF 1.4: Table F.3 +struct QPDF::HPageOffset +{ + int min_nobjects{0}; // 1 + qpdf_offset_t first_page_offset{0}; // 2 + int nbits_delta_nobjects{0}; // 3 + int min_page_length{0}; // 4 + int nbits_delta_page_length{0}; // 5 + int min_content_offset{0}; // 6 + int nbits_delta_content_offset{0}; // 7 + int min_content_length{0}; // 8 + int nbits_delta_content_length{0}; // 9 + int nbits_nshared_objects{0}; // 10 + int nbits_shared_identifier{0}; // 11 + int nbits_shared_numerator{0}; // 12 + int shared_denominator{0}; // 13 + // vector size is npages + std::vector entries; +}; + +// PDF 1.4: Table F.6 +struct QPDF::HSharedObjectEntry +{ + // Item 3 is a 128-bit signature (unsupported by Acrobat) + int delta_group_length{0}; // 1 + int signature_present{0}; // 2 -- always 0 + int nobjects_minus_one{0}; // 4 -- always 0 +}; + +// PDF 1.4: Table F.5 +struct QPDF::HSharedObject +{ + int first_shared_obj{0}; // 1 + qpdf_offset_t first_shared_offset{0}; // 2 + int nshared_first_page{0}; // 3 + int nshared_total{0}; // 4 + int nbits_nobjects{0}; // 5 + int min_group_length{0}; // 6 + int nbits_delta_group_length{0}; // 7 + // vector size is nshared_total + std::vector entries; +}; + +// PDF 1.4: Table F.9 +struct QPDF::HGeneric +{ + int first_object{0}; // 1 + qpdf_offset_t first_object_offset{0}; // 2 + int nobjects{0}; // 3 + int group_length{0}; // 4 +}; + +// Other linearization data structures + +// Initialized from Linearization Parameter dictionary +struct QPDF::LinParameters +{ + qpdf_offset_t file_size{0}; // /L + int first_page_object{0}; // /O + qpdf_offset_t first_page_end{0}; // /E + int npages{0}; // /N + qpdf_offset_t xref_zero_offset{0}; // /T + int first_page{0}; // /P + qpdf_offset_t H_offset{0}; // offset of primary hint stream + qpdf_offset_t H_length{0}; // length of primary hint stream +}; + +// Computed hint table value data structures. These tables contain the computed values on which +// the hint table values are based. They exclude things like number of bits and store actual +// values instead of mins and deltas. File offsets are also absolute rather than being offset +// by the size of the primary hint table. We populate the hint table structures from these +// during writing and compare the hint table values with these during validation. We ignore +// some values for various reasons described in the code. Those values are omitted from these +// structures. Note also that object numbers are object numbers from the input file, not the +// output file. + +// Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. + +struct QPDF::CHPageOffsetEntry +{ + int nobjects{0}; + int nshared_objects{0}; + // vectors' sizes = nshared_objects + std::vector shared_identifiers; +}; + +struct QPDF::CHPageOffset +{ + // vector size is npages + std::vector entries; +}; + +struct QPDF::CHSharedObjectEntry +{ + CHSharedObjectEntry(int object) : + object(object) + { + } + + int object; +}; + +// PDF 1.4: Table F.5 +struct QPDF::CHSharedObject +{ + int first_shared_obj{0}; + int nshared_first_page{0}; + int nshared_total{0}; + // vector size is nshared_total + std::vector entries; +}; + +// No need for CHGeneric -- HGeneric is fine as is. + +// Data structures to support optimization -- implemented in QPDF_optimization.cc + +class QPDF::ObjUser +{ + public: + enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; + + ObjUser() = default; + + // type must be ou_root + ObjUser(user_e type); + + // type must be one of ou_page or ou_thumb + ObjUser(user_e type, int pageno); + + // type must be one of ou_trailer_key or ou_root_key + ObjUser(user_e type, std::string const& key); + + bool operator<(ObjUser const&) const; + + user_e ou_type{ou_bad}; + int pageno{0}; // if ou_page; + std::string key; // if ou_trailer_key or ou_root_key +}; + +struct QPDF::UpdateObjectMapsFrame +{ + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top); + + ObjUser const& ou; + QPDFObjectHandle oh; + bool top; +}; + +class QPDF::PatternFinder final: public InputSource::Finder +{ + public: + PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : + qpdf(qpdf), + checker(checker) + { + } + ~PatternFinder() final = default; + bool + check() final + { + return (this->qpdf.*checker)(); + } + + private: + QPDF& qpdf; + bool (QPDF::*checker)(); +}; + +class QPDF::Members +{ + friend class QPDF; + friend class ResolveRecorder; + + public: + Members(); + Members(Members const&) = delete; + ~Members() = default; + + private: + std::shared_ptr log; + unsigned long long unique_id{0}; + qpdf::Tokenizer tokenizer; + std::shared_ptr file; + std::string last_object_description; + bool provided_password_is_hex_key{false}; + bool ignore_xref_streams{false}; + bool suppress_warnings{false}; + size_t max_warnings{0}; + bool attempt_recovery{true}; + bool check_mode{false}; + std::shared_ptr encp; + std::string pdf_version; + std::map xref_table; + // Various tables are indexed by object id, with potential size id + 1 + int xref_table_max_id{std::numeric_limits::max() - 1}; + qpdf_offset_t xref_table_max_offset{0}; + std::set deleted_objects; + std::map obj_cache; + std::set resolving; + QPDFObjectHandle trailer; + std::vector all_pages; + bool invalid_page_found{false}; + std::map pageobj_to_pages_pos; + bool pushed_inherited_attributes_to_pages{false}; + bool ever_pushed_inherited_attributes_to_pages{false}; + bool ever_called_get_all_pages{false}; + std::vector warnings; + std::map object_copiers; + std::shared_ptr copied_streams; + // copied_stream_data_provider is owned by copied_streams + CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; + bool reconstructed_xref{false}; + bool fixed_dangling_refs{false}; + bool immediate_copy_from{false}; + bool in_parse{false}; + bool parsed{false}; + std::set resolved_object_streams; + + // Linearization data + qpdf_offset_t first_xref_item_offset{0}; // actual value from file + bool uncompressed_after_compressed{false}; + bool linearization_warnings{false}; + + // Linearization parameter dictionary and hint table data: may be read from file or computed + // prior to writing a linearized file + QPDFObjectHandle lindict; + LinParameters linp; + HPageOffset page_offset_hints; + HSharedObject shared_object_hints; + HGeneric outline_hints; + + // Computed linearization data: used to populate above tables during writing and to compare + // with them during validation. c_ means computed. + LinParameters c_linp; + CHPageOffset c_page_offset_data; + CHSharedObject c_shared_object_data; + HGeneric c_outline_data; + + // Object ordering data for linearized files: initialized by calculateLinearizationData(). + // Part numbers refer to the PDF 1.4 specification. + std::vector part4; + std::vector part6; + std::vector part7; + std::vector part8; + std::vector part9; + + // Optimization data + std::map> obj_user_to_objects; + std::map> object_to_obj_users; +}; + +// JobSetter class is restricted to QPDFJob. +class QPDF::JobSetter +{ + friend class QPDFJob; + + private: + // Enable enhanced warnings for pdf file checking. + static void + setCheckMode(QPDF& qpdf, bool val) + { + qpdf.m->check_mode = val; + } +}; + +class QPDF::ResolveRecorder +{ + public: + ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) : + qpdf(qpdf), + iter(qpdf->m->resolving.insert(og).first) + { + } + virtual ~ResolveRecorder() + { + this->qpdf->m->resolving.erase(iter); + } + + private: + QPDF* qpdf; + std::set::const_iterator iter; +}; + +#endif // QPDF_PRIVATE_HH