Commit 85b968418be9104f8ac411f4c1565377c151591d
1 parent
8ded7ff5
Refactor `QPDF` pattern-finding methods: relocate `findHeader`, `findStartxref`,…
… and `findEndstream` to `Objects`, remove `QPDF::PatternFinder` class, and update related logic for improved encapsulation.
Showing
5 changed files
with
59 additions
and
63 deletions
include/qpdf/QPDF.hh
| @@ -791,13 +791,7 @@ class QPDF | @@ -791,13 +791,7 @@ class QPDF | ||
| 791 | bool is_root_metadata, | 791 | bool is_root_metadata, |
| 792 | std::unique_ptr<Pipeline>& heap); | 792 | std::unique_ptr<Pipeline>& heap); |
| 793 | 793 | ||
| 794 | - class PatternFinder; | ||
| 795 | - | ||
| 796 | - // Methods to support pattern finding | ||
| 797 | static bool validatePDFVersion(char const*&, std::string& version); | 794 | static bool validatePDFVersion(char const*&, std::string& version); |
| 798 | - bool findHeader(); | ||
| 799 | - bool findStartxref(); | ||
| 800 | - bool findEndstream(); | ||
| 801 | 795 | ||
| 802 | // JSON import | 796 | // JSON import |
| 803 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); | 797 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); |
libqpdf/QPDF.cc
| @@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*& p, std::string& version) | @@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*& p, std::string& version) | ||
| 334 | return true; | 334 | return true; |
| 335 | } | 335 | } |
| 336 | 336 | ||
| 337 | -bool | ||
| 338 | -QPDF::findHeader() | ||
| 339 | -{ | ||
| 340 | - qpdf_offset_t global_offset = m->file->tell(); | ||
| 341 | - std::string line = m->file->readLine(1024); | ||
| 342 | - char const* p = line.data(); | ||
| 343 | - if (strncmp(p, "%PDF-", 5) != 0) { | ||
| 344 | - throw std::logic_error("findHeader is not looking at %PDF-"); | ||
| 345 | - } | ||
| 346 | - p += 5; | ||
| 347 | - std::string version; | ||
| 348 | - // Note: The string returned by line.data() is always null-terminated. The code below never | ||
| 349 | - // overruns the buffer because a null character always short-circuits further advancement. | ||
| 350 | - if (!validatePDFVersion(p, version)) { | ||
| 351 | - return false; | ||
| 352 | - } | ||
| 353 | - m->pdf_version = version; | ||
| 354 | - if (global_offset != 0) { | ||
| 355 | - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is | ||
| 356 | - // leading material prior to the PDF header, all explicit offsets in the file are such that | ||
| 357 | - // 0 points to the beginning of the header. | ||
| 358 | - QTC::TC("qpdf", "QPDF global offset"); | ||
| 359 | - m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); | ||
| 360 | - } | ||
| 361 | - return true; | ||
| 362 | -} | ||
| 363 | - | ||
| 364 | void | 337 | void |
| 365 | QPDF::warn(QPDFExc const& e) | 338 | QPDF::warn(QPDFExc const& e) |
| 366 | { | 339 | { |
libqpdf/QPDF_objects.cc
| @@ -3,6 +3,7 @@ | @@ -3,6 +3,7 @@ | ||
| 3 | #include <qpdf/QPDF_private.hh> | 3 | #include <qpdf/QPDF_private.hh> |
| 4 | 4 | ||
| 5 | #include <qpdf/InputSource_private.hh> | 5 | #include <qpdf/InputSource_private.hh> |
| 6 | +#include <qpdf/OffsetInputSource.hh> | ||
| 6 | #include <qpdf/Pipeline.hh> | 7 | #include <qpdf/Pipeline.hh> |
| 7 | #include <qpdf/QPDFExc.hh> | 8 | #include <qpdf/QPDFExc.hh> |
| 8 | #include <qpdf/QPDFLogger.hh> | 9 | #include <qpdf/QPDFLogger.hh> |
| @@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final | @@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final | ||
| 101 | std::set<QPDFObjGen>::const_iterator iter; | 102 | std::set<QPDFObjGen>::const_iterator iter; |
| 102 | }; | 103 | }; |
| 103 | 104 | ||
| 105 | +class Objects::PatternFinder final: public InputSource::Finder | ||
| 106 | +{ | ||
| 107 | + public: | ||
| 108 | + PatternFinder(Objects& o, bool (Objects::*checker)()) : | ||
| 109 | + o(o), | ||
| 110 | + checker(checker) | ||
| 111 | + { | ||
| 112 | + } | ||
| 113 | + ~PatternFinder() final = default; | ||
| 114 | + bool | ||
| 115 | + check() final | ||
| 116 | + { | ||
| 117 | + return (this->o.*checker)(); | ||
| 118 | + } | ||
| 119 | + | ||
| 120 | + private: | ||
| 121 | + Objects& o; | ||
| 122 | + bool (Objects::*checker)(); | ||
| 123 | +}; | ||
| 124 | + | ||
| 125 | +bool | ||
| 126 | +Objects::findHeader() | ||
| 127 | +{ | ||
| 128 | + qpdf_offset_t global_offset = m->file->tell(); | ||
| 129 | + std::string line = m->file->readLine(1024); | ||
| 130 | + char const* p = line.data(); | ||
| 131 | + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-"); | ||
| 132 | + p += 5; | ||
| 133 | + std::string version; | ||
| 134 | + // Note: The string returned by line.data() is always null-terminated. The code below never | ||
| 135 | + // overruns the buffer because a null character always short-circuits further advancement. | ||
| 136 | + if (!validatePDFVersion(p, version)) { | ||
| 137 | + return false; | ||
| 138 | + } | ||
| 139 | + m->pdf_version = version; | ||
| 140 | + if (global_offset != 0) { | ||
| 141 | + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is | ||
| 142 | + // leading material prior to the PDF header, all explicit offsets in the file are such that | ||
| 143 | + // 0 points to the beginning of the header. | ||
| 144 | + m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); | ||
| 145 | + } | ||
| 146 | + return true; | ||
| 147 | +} | ||
| 148 | + | ||
| 104 | bool | 149 | bool |
| 105 | -QPDF::findStartxref() | 150 | +Objects ::findStartxref() |
| 106 | { | 151 | { |
| 107 | - if (m->objects.readToken(*m->file).isWord("startxref") && | ||
| 108 | - m->objects.readToken(*m->file).isInteger()) { | 152 | + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) { |
| 109 | // Position in front of offset token | 153 | // Position in front of offset token |
| 110 | m->file->seek(m->file->getLastOffset(), SEEK_SET); | 154 | m->file->seek(m->file->getLastOffset(), SEEK_SET); |
| 111 | return true; | 155 | return true; |
| @@ -121,7 +165,7 @@ Objects::parse(char const* password) | @@ -121,7 +165,7 @@ Objects::parse(char const* password) | ||
| 121 | } | 165 | } |
| 122 | 166 | ||
| 123 | // Find the header anywhere in the first 1024 bytes of the file. | 167 | // Find the header anywhere in the first 1024 bytes of the file. |
| 124 | - PatternFinder hf(qpdf, &QPDF::findHeader); | 168 | + PatternFinder hf(*this, &Objects::findHeader); |
| 125 | if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { | 169 | if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { |
| 126 | warn(damagedPDF("", -1, "can't find PDF header")); | 170 | warn(damagedPDF("", -1, "can't find PDF header")); |
| 127 | // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode | 171 | // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode |
| @@ -139,7 +183,7 @@ Objects::parse(char const* password) | @@ -139,7 +183,7 @@ Objects::parse(char const* password) | ||
| 139 | m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); | 183 | m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); |
| 140 | } | 184 | } |
| 141 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | 185 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); |
| 142 | - PatternFinder sf(qpdf, &QPDF::findStartxref); | 186 | + PatternFinder sf(*this, &Objects::findStartxref); |
| 143 | qpdf_offset_t xref_offset = 0; | 187 | qpdf_offset_t xref_offset = 0; |
| 144 | if (m->file->findLast("startxref", start_offset, 0, sf)) { | 188 | if (m->file->findLast("startxref", start_offset, 0, sf)) { |
| 145 | xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); | 189 | xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); |
| @@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) | @@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) | ||
| 1324 | } | 1368 | } |
| 1325 | 1369 | ||
| 1326 | bool | 1370 | bool |
| 1327 | -QPDF::findEndstream() | 1371 | +Objects ::findEndstream() |
| 1328 | { | 1372 | { |
| 1329 | // Find endstream or endobj. Position the input at that token. | 1373 | // Find endstream or endobj. Position the input at that token. |
| 1330 | - auto t = m->objects.readToken(*m->file, 20); | 1374 | + auto t = readToken(*m->file, 20); |
| 1331 | if (t.isWord("endobj") || t.isWord("endstream")) { | 1375 | if (t.isWord("endobj") || t.isWord("endstream")) { |
| 1332 | m->file->seek(m->file->getLastOffset(), SEEK_SET); | 1376 | m->file->seek(m->file->getLastOffset(), SEEK_SET); |
| 1333 | return true; | 1377 | return true; |
| @@ -1342,7 +1386,7 @@ Objects::recoverStreamLength( | @@ -1342,7 +1386,7 @@ Objects::recoverStreamLength( | ||
| 1342 | // Try to reconstruct stream length by looking for endstream or endobj | 1386 | // Try to reconstruct stream length by looking for endstream or endobj |
| 1343 | warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); | 1387 | warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); |
| 1344 | 1388 | ||
| 1345 | - PatternFinder ef(qpdf, &QPDF::findEndstream); | 1389 | + PatternFinder ef(*this, &Objects::findEndstream); |
| 1346 | size_t length = 0; | 1390 | size_t length = 0; |
| 1347 | if (m->file->findFirst("end", stream_offset, 0, ef)) { | 1391 | if (m->file->findFirst("end", stream_offset, 0, ef)) { |
| 1348 | length = toS(m->file->tell() - stream_offset); | 1392 | length = toS(m->file->tell() - stream_offset); |
libqpdf/qpdf/QPDF_private.hh
| @@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter | @@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter | ||
| 242 | QPDF* qpdf; | 242 | QPDF* qpdf; |
| 243 | QPDFObjGen og; | 243 | QPDFObjGen og; |
| 244 | }; | 244 | }; |
| 245 | -// Other linearization data structures | ||
| 246 | - | ||
| 247 | -class QPDF::PatternFinder final: public InputSource::Finder | ||
| 248 | -{ | ||
| 249 | - public: | ||
| 250 | - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : | ||
| 251 | - qpdf(qpdf), | ||
| 252 | - checker(checker) | ||
| 253 | - { | ||
| 254 | - } | ||
| 255 | - ~PatternFinder() final = default; | ||
| 256 | - bool | ||
| 257 | - check() final | ||
| 258 | - { | ||
| 259 | - return (this->qpdf.*checker)(); | ||
| 260 | - } | ||
| 261 | - | ||
| 262 | - private: | ||
| 263 | - QPDF& qpdf; | ||
| 264 | - bool (QPDF::*checker)(); | ||
| 265 | -}; | ||
| 266 | 245 | ||
| 267 | // This class is used to represent a PDF document. | 246 | // This class is used to represent a PDF document. |
| 268 | // | 247 | // |
| @@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common | @@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common | ||
| 1028 | std::vector<bool> compressible_set(); | 1007 | std::vector<bool> compressible_set(); |
| 1029 | 1008 | ||
| 1030 | private: | 1009 | private: |
| 1010 | + class PatternFinder; | ||
| 1011 | + | ||
| 1031 | // Get a list of objects that would be permitted in an object stream. | 1012 | // Get a list of objects that would be permitted in an object stream. |
| 1032 | template <typename T> | 1013 | template <typename T> |
| 1033 | std::vector<T> compressible(); | 1014 | std::vector<T> compressible(); |
| @@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common | @@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common | ||
| 1071 | bool isUnresolved(QPDFObjGen og); | 1052 | bool isUnresolved(QPDFObjGen og); |
| 1072 | void setLastObjectDescription(std::string const& description, QPDFObjGen og); | 1053 | void setLastObjectDescription(std::string const& description, QPDFObjGen og); |
| 1073 | 1054 | ||
| 1055 | + // Methods to support pattern finding | ||
| 1056 | + bool findHeader(); | ||
| 1057 | + bool findStartxref(); | ||
| 1058 | + bool findEndstream(); | ||
| 1059 | + | ||
| 1074 | Foreign foreign_; | 1060 | Foreign foreign_; |
| 1075 | Streams streams_; | 1061 | Streams streams_; |
| 1076 | 1062 |
qpdf/qpdf.testcov
| @@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0 | @@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0 | ||
| 129 | QPDFTokenizer EOF reading token 0 | 129 | QPDFTokenizer EOF reading token 0 |
| 130 | QPDFTokenizer EOF reading appendable token 0 | 130 | QPDFTokenizer EOF reading appendable token 0 |
| 131 | QPDFWriter extra header text no newline 0 | 131 | QPDFWriter extra header text no newline 0 |
| 132 | -QPDF global offset 0 | ||
| 133 | QPDFWriter make Extensions direct 0 | 132 | QPDFWriter make Extensions direct 0 |
| 134 | QPDFWriter make ADBE direct 1 | 133 | QPDFWriter make ADBE direct 1 |
| 135 | QPDFWriter preserve Extensions 0 | 134 | QPDFWriter preserve Extensions 0 |