diff --git a/examples/pdf-custom-filter.cc b/examples/pdf-custom-filter.cc index 2cc4199..1cd4665 100644 --- a/examples/pdf-custom-filter.cc +++ b/examples/pdf-custom-filter.cc @@ -1,3 +1,5 @@ + +#include #include #include #include diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index dba648b..14cdbde 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -792,43 +791,9 @@ class QPDF bool is_root_metadata, std::unique_ptr& heap); - class PatternFinder; - - // Methods to support pattern finding - static bool validatePDFVersion(char const*&, std::string& version); - bool findHeader(); - bool findStartxref(); - bool findEndstream(); - // JSON import void importJSON(std::shared_ptr, bool must_be_complete); - // Type conversion helper methods - template - static qpdf_offset_t - toO(T const& i) - { - return QIntC::to_offset(i); - } - template - static size_t - toS(T const& i) - { - return QIntC::to_size(i); - } - template - static int - toI(T const& i) - { - return QIntC::to_int(i); - } - template - static unsigned long long - toULL(T const& i) - { - return QIntC::to_ulonglong(i); - } - class Members; // Keep all member variables inside the Members object, which we dynamically allocate. This diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index c9669b5..5c7338b 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -315,52 +315,6 @@ QPDF::numWarnings() const return m->warnings.size(); } -bool -QPDF::validatePDFVersion(char const*& p, std::string& version) -{ - if (!util::is_digit(*p)) { - return false; - } - while (util::is_digit(*p)) { - version.append(1, *p++); - } - if (!(*p == '.' && util::is_digit(*(p + 1)))) { - return false; - } - version.append(1, *p++); - while (util::is_digit(*p)) { - version.append(1, *p++); - } - return true; -} - -bool -QPDF::findHeader() -{ - qpdf_offset_t global_offset = m->file->tell(); - std::string line = m->file->readLine(1024); - char const* p = line.data(); - if (strncmp(p, "%PDF-", 5) != 0) { - throw std::logic_error("findHeader is not looking at %PDF-"); - } - p += 5; - std::string version; - // Note: The string returned by line.data() is always null-terminated. The code below never - // overruns the buffer because a null character always short-circuits further advancement. - if (!validatePDFVersion(p, version)) { - return false; - } - m->pdf_version = version; - if (global_offset != 0) { - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is - // leading material prior to the PDF header, all explicit offsets in the file are such that - // 0 points to the beginning of the header. - QTC::TC("qpdf", "QPDF global offset"); - m->file = std::make_shared(m->file, global_offset); - } - return true; -} - void QPDF::warn(QPDFExc const& e) { @@ -761,7 +715,10 @@ QPDF::pipeStreamData( auto buf = file->read(length, offset); if (buf.size() != length) { throw qpdf_for_warning.m->c.damagedPDF( - *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); + *file, + "", + offset + QIntC::to_offset(buf.size()), + "unexpected EOF reading stream data"); } pipeline->write(buf.data(), length); attempted_finish = true; diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 5a0f2c0..e0fe8dc 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -1010,7 +1010,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen og) // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an // exception. auto tmp = QUtil::make_unique_cstr(str); - RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); + RC4 rc4(QUtil::unsigned_char_pointer(key), QIntC::to_int(key.length())); auto data = QUtil::unsigned_char_pointer(tmp.get()); rc4.process(data, vlen, data); str = std::string(tmp.get(), vlen); diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index 663599a..ba3b137 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -482,7 +482,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) if (value.getString(v)) { std::string version; char const* p = v.c_str(); - if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { + if (objects.validatePDFVersion(p, version) && *p == '\0') { pdf.m->pdf_version = version; return true; } diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index b8c43dc..94b3e69 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -101,11 +102,73 @@ class QPDF::ResolveRecorder final std::set::const_iterator iter; }; +class Objects::PatternFinder final: public InputSource::Finder +{ + public: + PatternFinder(Objects& o, bool (Objects::*checker)()) : + o(o), + checker(checker) + { + } + ~PatternFinder() final = default; + bool + check() final + { + return (this->o.*checker)(); + } + + private: + Objects& o; + bool (Objects::*checker)(); +}; + +bool +Objects::validatePDFVersion(char const*& p, std::string& version) +{ + if (!util::is_digit(*p)) { + return false; + } + while (util::is_digit(*p)) { + version.append(1, *p++); + } + if (!(*p == '.' && util::is_digit(*(p + 1)))) { + return false; + } + version.append(1, *p++); + while (util::is_digit(*p)) { + version.append(1, *p++); + } + return true; +} + +bool +Objects::findHeader() +{ + qpdf_offset_t global_offset = m->file->tell(); + std::string line = m->file->readLine(1024); + char const* p = line.data(); + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-"); + p += 5; + std::string version; + // Note: The string returned by line.data() is always null-terminated. The code below never + // overruns the buffer because a null character always short-circuits further advancement. + if (!validatePDFVersion(p, version)) { + return false; + } + m->pdf_version = version; + if (global_offset != 0) { + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is + // leading material prior to the PDF header, all explicit offsets in the file are such that + // 0 points to the beginning of the header. + m->file = std::make_shared(m->file, global_offset); + } + return true; +} + bool -QPDF::findStartxref() +Objects ::findStartxref() { - if (m->objects.readToken(*m->file).isWord("startxref") && - m->objects.readToken(*m->file).isInteger()) { + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) { // Position in front of offset token m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -121,7 +184,7 @@ Objects::parse(char const* password) } // Find the header anywhere in the first 1024 bytes of the file. - PatternFinder hf(qpdf, &QPDF::findHeader); + PatternFinder hf(*this, &Objects::findHeader); if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { warn(damagedPDF("", -1, "can't find PDF header")); // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode @@ -139,7 +202,7 @@ Objects::parse(char const* password) m->xref_table_max_id = static_cast(m->xref_table_max_offset / 3); } qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); - PatternFinder sf(qpdf, &QPDF::findStartxref); + PatternFinder sf(*this, &Objects::findStartxref); qpdf_offset_t xref_offset = 0; if (m->file->findLast("startxref", start_offset, 0, sf)) { xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); @@ -164,7 +227,7 @@ Objects::parse(char const* password) } } - qpdf.initializeEncryption(); + m->encp->initialize(qpdf); m->parsed = true; if (!m->xref_table.empty() && !qpdf.getRoot().getKey("/Pages").isDictionary()) { // QPDFs created from JSON have an empty xref table and no root object yet. @@ -271,15 +334,13 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) auto xref_backup{m->xref_table}; try { m->file->seek(startxrefs.back(), SEEK_SET); - if (auto offset = - QUtil::string_to_ll(m->objects.readToken(*m->file).getValue().data())) { - m->objects.read_xref(offset); + if (auto offset = QUtil::string_to_ll(readToken(*m->file).getValue().data())) { + read_xref(offset); if (qpdf.getRoot().getKey("/Pages").isDictionary()) { - QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); warn(damagedPDF( "", -1, "startxref was more than 1024 bytes before end of file")); - qpdf.initializeEncryption(); + m->encp->initialize(qpdf); m->parsed = true; m->reconstructed_xref = false; return; @@ -1138,7 +1199,7 @@ QPDF::getObjectCount() if (!m->obj_cache.empty()) { og = (*(m->obj_cache.rbegin())).first; } - return toS(og.getObj()); + return QIntC::to_size(og.getObj()); } std::vector @@ -1324,10 +1385,10 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) } bool -QPDF::findEndstream() +Objects ::findEndstream() { // Find endstream or endobj. Position the input at that token. - auto t = m->objects.readToken(*m->file, 20); + auto t = readToken(*m->file, 20); if (t.isWord("endobj") || t.isWord("endstream")) { m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -1342,7 +1403,7 @@ Objects::recoverStreamLength( // Try to reconstruct stream length by looking for endstream or endobj warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); - PatternFinder ef(qpdf, &QPDF::findEndstream); + PatternFinder ef(*this, &Objects::findEndstream); size_t length = 0; if (m->file->findFirst("end", stream_offset, 0, ef)) { length = toS(m->file->tell() - stream_offset); diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 895b06f..a5738eb 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -241,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter QPDF* qpdf; QPDFObjGen og; }; -// Other linearization data structures - -class QPDF::PatternFinder final: public InputSource::Finder -{ - public: - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : - qpdf(qpdf), - checker(checker) - { - } - ~PatternFinder() final = default; - bool - check() final - { - return (this->qpdf.*checker)(); - } - - private: - QPDF& qpdf; - bool (QPDF::*checker)(); -}; // This class is used to represent a PDF document. // @@ -323,13 +303,39 @@ class QPDF::Doc } protected: + // Type conversion helper methods + template + static qpdf_offset_t + toO(T const& i) + { + return QIntC::to_offset(i); + } + template + static size_t + toS(T const& i) + { + return QIntC::to_size(i); + } + template + static int + toI(T const& i) + { + return QIntC::to_int(i); + } + template + static unsigned long long + toULL(T const& i) + { + return QIntC::to_ulonglong(i); + } + QPDF& qpdf; QPDF::Members* m; qpdf::Doc::Config& cf; QPDF::Doc::Pages& pages; QPDF::Doc::Objects& objects; - }; + }; // class qpdf::Doc::Common Doc() = delete; Doc(Doc const&) = delete; @@ -994,6 +1000,8 @@ class QPDF::Doc::Objects: Common std::shared_ptr getObjectForJSON(int id, int gen); size_t table_size(); + static bool validatePDFVersion(char const*&, std::string& version); + // For QPDFWriter: std::map const& xref_table(); @@ -1001,6 +1009,8 @@ class QPDF::Doc::Objects: Common std::vector compressible_set(); private: + class PatternFinder; + // Get a list of objects that would be permitted in an object stream. template std::vector compressible(); @@ -1044,6 +1054,11 @@ class QPDF::Doc::Objects: Common bool isUnresolved(QPDFObjGen og); void setLastObjectDescription(std::string const& description, QPDFObjGen og); + // Methods to support pattern finding + bool findHeader(); + bool findStartxref(); + bool findEndstream(); + Foreign foreign_; Streams streams_; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 3f5664b..92428c5 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -27,7 +27,6 @@ main QTest stream 0 QPDF lin write nshared_total > nshared_first_page 1 QPDFWriter encrypted hint stream 0 QPDF xref gen > 0 1 -QPDF startxref more than 1024 before end 0 QPDFParser bad brace 0 QPDFParser bad brace in parseRemainder 0 QPDFParser bad array close 0 @@ -129,7 +128,6 @@ QPDFObjectHandle trailing data in parse 0 QPDFTokenizer EOF reading token 0 QPDFTokenizer EOF reading appendable token 0 QPDFWriter extra header text no newline 0 -QPDF global offset 0 QPDFWriter make Extensions direct 0 QPDFWriter make ADBE direct 1 QPDFWriter preserve Extensions 0