diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 1ac5351..b9ae8f4 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -791,13 +791,7 @@ class QPDF bool is_root_metadata, std::unique_ptr& heap); - class PatternFinder; - - // Methods to support pattern finding static bool validatePDFVersion(char const*&, std::string& version); - bool findHeader(); - bool findStartxref(); - bool findEndstream(); // JSON import void importJSON(std::shared_ptr, bool must_be_complete); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 07faa88..f60e714 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*& p, std::string& version) return true; } -bool -QPDF::findHeader() -{ - qpdf_offset_t global_offset = m->file->tell(); - std::string line = m->file->readLine(1024); - char const* p = line.data(); - if (strncmp(p, "%PDF-", 5) != 0) { - throw std::logic_error("findHeader is not looking at %PDF-"); - } - p += 5; - std::string version; - // Note: The string returned by line.data() is always null-terminated. The code below never - // overruns the buffer because a null character always short-circuits further advancement. - if (!validatePDFVersion(p, version)) { - return false; - } - m->pdf_version = version; - if (global_offset != 0) { - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is - // leading material prior to the PDF header, all explicit offsets in the file are such that - // 0 points to the beginning of the header. - QTC::TC("qpdf", "QPDF global offset"); - m->file = std::make_shared(m->file, global_offset); - } - return true; -} - void QPDF::warn(QPDFExc const& e) { diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index b5653bc..2240862 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final std::set::const_iterator iter; }; +class Objects::PatternFinder final: public InputSource::Finder +{ + public: + PatternFinder(Objects& o, bool (Objects::*checker)()) : + o(o), + checker(checker) + { + } + ~PatternFinder() final = default; + bool + check() final + { + return (this->o.*checker)(); + } + + private: + Objects& o; + bool (Objects::*checker)(); +}; + +bool +Objects::findHeader() +{ + qpdf_offset_t global_offset = m->file->tell(); + std::string line = m->file->readLine(1024); + char const* p = line.data(); + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-"); + p += 5; + std::string version; + // Note: The string returned by line.data() is always null-terminated. The code below never + // overruns the buffer because a null character always short-circuits further advancement. + if (!validatePDFVersion(p, version)) { + return false; + } + m->pdf_version = version; + if (global_offset != 0) { + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is + // leading material prior to the PDF header, all explicit offsets in the file are such that + // 0 points to the beginning of the header. + m->file = std::make_shared(m->file, global_offset); + } + return true; +} + bool -QPDF::findStartxref() +Objects ::findStartxref() { - if (m->objects.readToken(*m->file).isWord("startxref") && - m->objects.readToken(*m->file).isInteger()) { + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) { // Position in front of offset token m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -121,7 +165,7 @@ Objects::parse(char const* password) } // Find the header anywhere in the first 1024 bytes of the file. - PatternFinder hf(qpdf, &QPDF::findHeader); + PatternFinder hf(*this, &Objects::findHeader); if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { warn(damagedPDF("", -1, "can't find PDF header")); // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode @@ -139,7 +183,7 @@ Objects::parse(char const* password) m->xref_table_max_id = static_cast(m->xref_table_max_offset / 3); } qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); - PatternFinder sf(qpdf, &QPDF::findStartxref); + PatternFinder sf(*this, &Objects::findStartxref); qpdf_offset_t xref_offset = 0; if (m->file->findLast("startxref", start_offset, 0, sf)) { xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); @@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) } bool -QPDF::findEndstream() +Objects ::findEndstream() { // Find endstream or endobj. Position the input at that token. - auto t = m->objects.readToken(*m->file, 20); + auto t = readToken(*m->file, 20); if (t.isWord("endobj") || t.isWord("endstream")) { m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -1342,7 +1386,7 @@ Objects::recoverStreamLength( // Try to reconstruct stream length by looking for endstream or endobj warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); - PatternFinder ef(qpdf, &QPDF::findEndstream); + PatternFinder ef(*this, &Objects::findEndstream); size_t length = 0; if (m->file->findFirst("end", stream_offset, 0, ef)) { length = toS(m->file->tell() - stream_offset); diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index fdc38c2..52414ea 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter QPDF* qpdf; QPDFObjGen og; }; -// Other linearization data structures - -class QPDF::PatternFinder final: public InputSource::Finder -{ - public: - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : - qpdf(qpdf), - checker(checker) - { - } - ~PatternFinder() final = default; - bool - check() final - { - return (this->qpdf.*checker)(); - } - - private: - QPDF& qpdf; - bool (QPDF::*checker)(); -}; // This class is used to represent a PDF document. // @@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common std::vector compressible_set(); private: + class PatternFinder; + // Get a list of objects that would be permitted in an object stream. template std::vector compressible(); @@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common bool isUnresolved(QPDFObjGen og); void setLastObjectDescription(std::string const& description, QPDFObjGen og); + // Methods to support pattern finding + bool findHeader(); + bool findStartxref(); + bool findEndstream(); + Foreign foreign_; Streams streams_; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 3f5664b..1af0a37 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0 QPDFTokenizer EOF reading token 0 QPDFTokenizer EOF reading appendable token 0 QPDFWriter extra header text no newline 0 -QPDF global offset 0 QPDFWriter make Extensions direct 0 QPDFWriter make ADBE direct 1 QPDFWriter preserve Extensions 0