Commit fd52eae8474cd9ca465894745051af82b961eec5
Committed by
GitHub
Merge pull request #1572 from m-holger/qpdf_hh
Remove implementation detail from QPDF header file
Showing
8 changed files
with
121 additions
and
123 deletions
examples/pdf-custom-filter.cc
include/qpdf/QPDF.hh
| ... | ... | @@ -37,7 +37,6 @@ |
| 37 | 37 | #include <qpdf/Buffer.hh> |
| 38 | 38 | #include <qpdf/InputSource.hh> |
| 39 | 39 | #include <qpdf/PDFVersion.hh> |
| 40 | -#include <qpdf/QIntC.hh> | |
| 41 | 40 | #include <qpdf/QPDFExc.hh> |
| 42 | 41 | #include <qpdf/QPDFObjGen.hh> |
| 43 | 42 | #include <qpdf/QPDFObjectHandle.hh> |
| ... | ... | @@ -792,43 +791,9 @@ class QPDF |
| 792 | 791 | bool is_root_metadata, |
| 793 | 792 | std::unique_ptr<Pipeline>& heap); |
| 794 | 793 | |
| 795 | - class PatternFinder; | |
| 796 | - | |
| 797 | - // Methods to support pattern finding | |
| 798 | - static bool validatePDFVersion(char const*&, std::string& version); | |
| 799 | - bool findHeader(); | |
| 800 | - bool findStartxref(); | |
| 801 | - bool findEndstream(); | |
| 802 | - | |
| 803 | 794 | // JSON import |
| 804 | 795 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); |
| 805 | 796 | |
| 806 | - // Type conversion helper methods | |
| 807 | - template <typename T> | |
| 808 | - static qpdf_offset_t | |
| 809 | - toO(T const& i) | |
| 810 | - { | |
| 811 | - return QIntC::to_offset(i); | |
| 812 | - } | |
| 813 | - template <typename T> | |
| 814 | - static size_t | |
| 815 | - toS(T const& i) | |
| 816 | - { | |
| 817 | - return QIntC::to_size(i); | |
| 818 | - } | |
| 819 | - template <typename T> | |
| 820 | - static int | |
| 821 | - toI(T const& i) | |
| 822 | - { | |
| 823 | - return QIntC::to_int(i); | |
| 824 | - } | |
| 825 | - template <typename T> | |
| 826 | - static unsigned long long | |
| 827 | - toULL(T const& i) | |
| 828 | - { | |
| 829 | - return QIntC::to_ulonglong(i); | |
| 830 | - } | |
| 831 | - | |
| 832 | 797 | class Members; |
| 833 | 798 | |
| 834 | 799 | // Keep all member variables inside the Members object, which we dynamically allocate. This | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -315,52 +315,6 @@ QPDF::numWarnings() const |
| 315 | 315 | return m->warnings.size(); |
| 316 | 316 | } |
| 317 | 317 | |
| 318 | -bool | |
| 319 | -QPDF::validatePDFVersion(char const*& p, std::string& version) | |
| 320 | -{ | |
| 321 | - if (!util::is_digit(*p)) { | |
| 322 | - return false; | |
| 323 | - } | |
| 324 | - while (util::is_digit(*p)) { | |
| 325 | - version.append(1, *p++); | |
| 326 | - } | |
| 327 | - if (!(*p == '.' && util::is_digit(*(p + 1)))) { | |
| 328 | - return false; | |
| 329 | - } | |
| 330 | - version.append(1, *p++); | |
| 331 | - while (util::is_digit(*p)) { | |
| 332 | - version.append(1, *p++); | |
| 333 | - } | |
| 334 | - return true; | |
| 335 | -} | |
| 336 | - | |
| 337 | -bool | |
| 338 | -QPDF::findHeader() | |
| 339 | -{ | |
| 340 | - qpdf_offset_t global_offset = m->file->tell(); | |
| 341 | - std::string line = m->file->readLine(1024); | |
| 342 | - char const* p = line.data(); | |
| 343 | - if (strncmp(p, "%PDF-", 5) != 0) { | |
| 344 | - throw std::logic_error("findHeader is not looking at %PDF-"); | |
| 345 | - } | |
| 346 | - p += 5; | |
| 347 | - std::string version; | |
| 348 | - // Note: The string returned by line.data() is always null-terminated. The code below never | |
| 349 | - // overruns the buffer because a null character always short-circuits further advancement. | |
| 350 | - if (!validatePDFVersion(p, version)) { | |
| 351 | - return false; | |
| 352 | - } | |
| 353 | - m->pdf_version = version; | |
| 354 | - if (global_offset != 0) { | |
| 355 | - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is | |
| 356 | - // leading material prior to the PDF header, all explicit offsets in the file are such that | |
| 357 | - // 0 points to the beginning of the header. | |
| 358 | - QTC::TC("qpdf", "QPDF global offset"); | |
| 359 | - m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); | |
| 360 | - } | |
| 361 | - return true; | |
| 362 | -} | |
| 363 | - | |
| 364 | 318 | void |
| 365 | 319 | QPDF::warn(QPDFExc const& e) |
| 366 | 320 | { |
| ... | ... | @@ -761,7 +715,10 @@ QPDF::pipeStreamData( |
| 761 | 715 | auto buf = file->read(length, offset); |
| 762 | 716 | if (buf.size() != length) { |
| 763 | 717 | throw qpdf_for_warning.m->c.damagedPDF( |
| 764 | - *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); | |
| 718 | + *file, | |
| 719 | + "", | |
| 720 | + offset + QIntC::to_offset(buf.size()), | |
| 721 | + "unexpected EOF reading stream data"); | |
| 765 | 722 | } |
| 766 | 723 | pipeline->write(buf.data(), length); |
| 767 | 724 | attempted_finish = true; | ... | ... |
libqpdf/QPDF_encryption.cc
| ... | ... | @@ -1010,7 +1010,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen og) |
| 1010 | 1010 | // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an |
| 1011 | 1011 | // exception. |
| 1012 | 1012 | auto tmp = QUtil::make_unique_cstr(str); |
| 1013 | - RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); | |
| 1013 | + RC4 rc4(QUtil::unsigned_char_pointer(key), QIntC::to_int(key.length())); | |
| 1014 | 1014 | auto data = QUtil::unsigned_char_pointer(tmp.get()); |
| 1015 | 1015 | rc4.process(data, vlen, data); |
| 1016 | 1016 | str = std::string(tmp.get(), vlen); | ... | ... |
libqpdf/QPDF_json.cc
| ... | ... | @@ -482,7 +482,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 482 | 482 | if (value.getString(v)) { |
| 483 | 483 | std::string version; |
| 484 | 484 | char const* p = v.c_str(); |
| 485 | - if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { | |
| 485 | + if (objects.validatePDFVersion(p, version) && *p == '\0') { | |
| 486 | 486 | pdf.m->pdf_version = version; |
| 487 | 487 | return true; |
| 488 | 488 | } | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -3,6 +3,7 @@ |
| 3 | 3 | #include <qpdf/QPDF_private.hh> |
| 4 | 4 | |
| 5 | 5 | #include <qpdf/InputSource_private.hh> |
| 6 | +#include <qpdf/OffsetInputSource.hh> | |
| 6 | 7 | #include <qpdf/Pipeline.hh> |
| 7 | 8 | #include <qpdf/QPDFExc.hh> |
| 8 | 9 | #include <qpdf/QPDFLogger.hh> |
| ... | ... | @@ -101,11 +102,73 @@ class QPDF::ResolveRecorder final |
| 101 | 102 | std::set<QPDFObjGen>::const_iterator iter; |
| 102 | 103 | }; |
| 103 | 104 | |
| 105 | +class Objects::PatternFinder final: public InputSource::Finder | |
| 106 | +{ | |
| 107 | + public: | |
| 108 | + PatternFinder(Objects& o, bool (Objects::*checker)()) : | |
| 109 | + o(o), | |
| 110 | + checker(checker) | |
| 111 | + { | |
| 112 | + } | |
| 113 | + ~PatternFinder() final = default; | |
| 114 | + bool | |
| 115 | + check() final | |
| 116 | + { | |
| 117 | + return (this->o.*checker)(); | |
| 118 | + } | |
| 119 | + | |
| 120 | + private: | |
| 121 | + Objects& o; | |
| 122 | + bool (Objects::*checker)(); | |
| 123 | +}; | |
| 124 | + | |
| 125 | +bool | |
| 126 | +Objects::validatePDFVersion(char const*& p, std::string& version) | |
| 127 | +{ | |
| 128 | + if (!util::is_digit(*p)) { | |
| 129 | + return false; | |
| 130 | + } | |
| 131 | + while (util::is_digit(*p)) { | |
| 132 | + version.append(1, *p++); | |
| 133 | + } | |
| 134 | + if (!(*p == '.' && util::is_digit(*(p + 1)))) { | |
| 135 | + return false; | |
| 136 | + } | |
| 137 | + version.append(1, *p++); | |
| 138 | + while (util::is_digit(*p)) { | |
| 139 | + version.append(1, *p++); | |
| 140 | + } | |
| 141 | + return true; | |
| 142 | +} | |
| 143 | + | |
| 144 | +bool | |
| 145 | +Objects::findHeader() | |
| 146 | +{ | |
| 147 | + qpdf_offset_t global_offset = m->file->tell(); | |
| 148 | + std::string line = m->file->readLine(1024); | |
| 149 | + char const* p = line.data(); | |
| 150 | + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-"); | |
| 151 | + p += 5; | |
| 152 | + std::string version; | |
| 153 | + // Note: The string returned by line.data() is always null-terminated. The code below never | |
| 154 | + // overruns the buffer because a null character always short-circuits further advancement. | |
| 155 | + if (!validatePDFVersion(p, version)) { | |
| 156 | + return false; | |
| 157 | + } | |
| 158 | + m->pdf_version = version; | |
| 159 | + if (global_offset != 0) { | |
| 160 | + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is | |
| 161 | + // leading material prior to the PDF header, all explicit offsets in the file are such that | |
| 162 | + // 0 points to the beginning of the header. | |
| 163 | + m->file = std::make_shared<OffsetInputSource>(m->file, global_offset); | |
| 164 | + } | |
| 165 | + return true; | |
| 166 | +} | |
| 167 | + | |
| 104 | 168 | bool |
| 105 | -QPDF::findStartxref() | |
| 169 | +Objects ::findStartxref() | |
| 106 | 170 | { |
| 107 | - if (m->objects.readToken(*m->file).isWord("startxref") && | |
| 108 | - m->objects.readToken(*m->file).isInteger()) { | |
| 171 | + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) { | |
| 109 | 172 | // Position in front of offset token |
| 110 | 173 | m->file->seek(m->file->getLastOffset(), SEEK_SET); |
| 111 | 174 | return true; |
| ... | ... | @@ -121,7 +184,7 @@ Objects::parse(char const* password) |
| 121 | 184 | } |
| 122 | 185 | |
| 123 | 186 | // Find the header anywhere in the first 1024 bytes of the file. |
| 124 | - PatternFinder hf(qpdf, &QPDF::findHeader); | |
| 187 | + PatternFinder hf(*this, &Objects::findHeader); | |
| 125 | 188 | if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { |
| 126 | 189 | warn(damagedPDF("", -1, "can't find PDF header")); |
| 127 | 190 | // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode |
| ... | ... | @@ -139,7 +202,7 @@ Objects::parse(char const* password) |
| 139 | 202 | m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); |
| 140 | 203 | } |
| 141 | 204 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); |
| 142 | - PatternFinder sf(qpdf, &QPDF::findStartxref); | |
| 205 | + PatternFinder sf(*this, &Objects::findStartxref); | |
| 143 | 206 | qpdf_offset_t xref_offset = 0; |
| 144 | 207 | if (m->file->findLast("startxref", start_offset, 0, sf)) { |
| 145 | 208 | xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); |
| ... | ... | @@ -164,7 +227,7 @@ Objects::parse(char const* password) |
| 164 | 227 | } |
| 165 | 228 | } |
| 166 | 229 | |
| 167 | - qpdf.initializeEncryption(); | |
| 230 | + m->encp->initialize(qpdf); | |
| 168 | 231 | m->parsed = true; |
| 169 | 232 | if (!m->xref_table.empty() && !qpdf.getRoot().getKey("/Pages").isDictionary()) { |
| 170 | 233 | // QPDFs created from JSON have an empty xref table and no root object yet. |
| ... | ... | @@ -271,15 +334,13 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 271 | 334 | auto xref_backup{m->xref_table}; |
| 272 | 335 | try { |
| 273 | 336 | m->file->seek(startxrefs.back(), SEEK_SET); |
| 274 | - if (auto offset = | |
| 275 | - QUtil::string_to_ll(m->objects.readToken(*m->file).getValue().data())) { | |
| 276 | - m->objects.read_xref(offset); | |
| 337 | + if (auto offset = QUtil::string_to_ll(readToken(*m->file).getValue().data())) { | |
| 338 | + read_xref(offset); | |
| 277 | 339 | |
| 278 | 340 | if (qpdf.getRoot().getKey("/Pages").isDictionary()) { |
| 279 | - QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); | |
| 280 | 341 | warn(damagedPDF( |
| 281 | 342 | "", -1, "startxref was more than 1024 bytes before end of file")); |
| 282 | - qpdf.initializeEncryption(); | |
| 343 | + m->encp->initialize(qpdf); | |
| 283 | 344 | m->parsed = true; |
| 284 | 345 | m->reconstructed_xref = false; |
| 285 | 346 | return; |
| ... | ... | @@ -1138,7 +1199,7 @@ QPDF::getObjectCount() |
| 1138 | 1199 | if (!m->obj_cache.empty()) { |
| 1139 | 1200 | og = (*(m->obj_cache.rbegin())).first; |
| 1140 | 1201 | } |
| 1141 | - return toS(og.getObj()); | |
| 1202 | + return QIntC::to_size(og.getObj()); | |
| 1142 | 1203 | } |
| 1143 | 1204 | |
| 1144 | 1205 | std::vector<QPDFObjectHandle> |
| ... | ... | @@ -1324,10 +1385,10 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) |
| 1324 | 1385 | } |
| 1325 | 1386 | |
| 1326 | 1387 | bool |
| 1327 | -QPDF::findEndstream() | |
| 1388 | +Objects ::findEndstream() | |
| 1328 | 1389 | { |
| 1329 | 1390 | // Find endstream or endobj. Position the input at that token. |
| 1330 | - auto t = m->objects.readToken(*m->file, 20); | |
| 1391 | + auto t = readToken(*m->file, 20); | |
| 1331 | 1392 | if (t.isWord("endobj") || t.isWord("endstream")) { |
| 1332 | 1393 | m->file->seek(m->file->getLastOffset(), SEEK_SET); |
| 1333 | 1394 | return true; |
| ... | ... | @@ -1342,7 +1403,7 @@ Objects::recoverStreamLength( |
| 1342 | 1403 | // Try to reconstruct stream length by looking for endstream or endobj |
| 1343 | 1404 | warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); |
| 1344 | 1405 | |
| 1345 | - PatternFinder ef(qpdf, &QPDF::findEndstream); | |
| 1406 | + PatternFinder ef(*this, &Objects::findEndstream); | |
| 1346 | 1407 | size_t length = 0; |
| 1347 | 1408 | if (m->file->findFirst("end", stream_offset, 0, ef)) { |
| 1348 | 1409 | length = toS(m->file->tell() - stream_offset); | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -3,6 +3,7 @@ |
| 3 | 3 | |
| 4 | 4 | #include <qpdf/QPDF.hh> |
| 5 | 5 | |
| 6 | +#include <qpdf/QIntC.hh> | |
| 6 | 7 | #include <qpdf/QPDFAcroFormDocumentHelper.hh> |
| 7 | 8 | #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh> |
| 8 | 9 | #include <qpdf/QPDFLogger.hh> |
| ... | ... | @@ -241,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter |
| 241 | 242 | QPDF* qpdf; |
| 242 | 243 | QPDFObjGen og; |
| 243 | 244 | }; |
| 244 | -// Other linearization data structures | |
| 245 | - | |
| 246 | -class QPDF::PatternFinder final: public InputSource::Finder | |
| 247 | -{ | |
| 248 | - public: | |
| 249 | - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) : | |
| 250 | - qpdf(qpdf), | |
| 251 | - checker(checker) | |
| 252 | - { | |
| 253 | - } | |
| 254 | - ~PatternFinder() final = default; | |
| 255 | - bool | |
| 256 | - check() final | |
| 257 | - { | |
| 258 | - return (this->qpdf.*checker)(); | |
| 259 | - } | |
| 260 | - | |
| 261 | - private: | |
| 262 | - QPDF& qpdf; | |
| 263 | - bool (QPDF::*checker)(); | |
| 264 | -}; | |
| 265 | 245 | |
| 266 | 246 | // This class is used to represent a PDF document. |
| 267 | 247 | // |
| ... | ... | @@ -323,13 +303,39 @@ class QPDF::Doc |
| 323 | 303 | } |
| 324 | 304 | |
| 325 | 305 | protected: |
| 306 | + // Type conversion helper methods | |
| 307 | + template <typename T> | |
| 308 | + static qpdf_offset_t | |
| 309 | + toO(T const& i) | |
| 310 | + { | |
| 311 | + return QIntC::to_offset(i); | |
| 312 | + } | |
| 313 | + template <typename T> | |
| 314 | + static size_t | |
| 315 | + toS(T const& i) | |
| 316 | + { | |
| 317 | + return QIntC::to_size(i); | |
| 318 | + } | |
| 319 | + template <typename T> | |
| 320 | + static int | |
| 321 | + toI(T const& i) | |
| 322 | + { | |
| 323 | + return QIntC::to_int(i); | |
| 324 | + } | |
| 325 | + template <typename T> | |
| 326 | + static unsigned long long | |
| 327 | + toULL(T const& i) | |
| 328 | + { | |
| 329 | + return QIntC::to_ulonglong(i); | |
| 330 | + } | |
| 331 | + | |
| 326 | 332 | QPDF& qpdf; |
| 327 | 333 | QPDF::Members* m; |
| 328 | 334 | |
| 329 | 335 | qpdf::Doc::Config& cf; |
| 330 | 336 | QPDF::Doc::Pages& pages; |
| 331 | 337 | QPDF::Doc::Objects& objects; |
| 332 | - }; | |
| 338 | + }; // class qpdf::Doc::Common | |
| 333 | 339 | |
| 334 | 340 | Doc() = delete; |
| 335 | 341 | Doc(Doc const&) = delete; |
| ... | ... | @@ -994,6 +1000,8 @@ class QPDF::Doc::Objects: Common |
| 994 | 1000 | std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen); |
| 995 | 1001 | size_t table_size(); |
| 996 | 1002 | |
| 1003 | + static bool validatePDFVersion(char const*&, std::string& version); | |
| 1004 | + | |
| 997 | 1005 | // For QPDFWriter: |
| 998 | 1006 | |
| 999 | 1007 | std::map<QPDFObjGen, QPDFXRefEntry> const& xref_table(); |
| ... | ... | @@ -1001,6 +1009,8 @@ class QPDF::Doc::Objects: Common |
| 1001 | 1009 | std::vector<bool> compressible_set(); |
| 1002 | 1010 | |
| 1003 | 1011 | private: |
| 1012 | + class PatternFinder; | |
| 1013 | + | |
| 1004 | 1014 | // Get a list of objects that would be permitted in an object stream. |
| 1005 | 1015 | template <typename T> |
| 1006 | 1016 | std::vector<T> compressible(); |
| ... | ... | @@ -1044,6 +1054,11 @@ class QPDF::Doc::Objects: Common |
| 1044 | 1054 | bool isUnresolved(QPDFObjGen og); |
| 1045 | 1055 | void setLastObjectDescription(std::string const& description, QPDFObjGen og); |
| 1046 | 1056 | |
| 1057 | + // Methods to support pattern finding | |
| 1058 | + bool findHeader(); | |
| 1059 | + bool findStartxref(); | |
| 1060 | + bool findEndstream(); | |
| 1061 | + | |
| 1047 | 1062 | Foreign foreign_; |
| 1048 | 1063 | Streams streams_; |
| 1049 | 1064 | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -27,7 +27,6 @@ main QTest stream 0 |
| 27 | 27 | QPDF lin write nshared_total > nshared_first_page 1 |
| 28 | 28 | QPDFWriter encrypted hint stream 0 |
| 29 | 29 | QPDF xref gen > 0 1 |
| 30 | -QPDF startxref more than 1024 before end 0 | |
| 31 | 30 | QPDFParser bad brace 0 |
| 32 | 31 | QPDFParser bad brace in parseRemainder 0 |
| 33 | 32 | QPDFParser bad array close 0 |
| ... | ... | @@ -129,7 +128,6 @@ QPDFObjectHandle trailing data in parse 0 |
| 129 | 128 | QPDFTokenizer EOF reading token 0 |
| 130 | 129 | QPDFTokenizer EOF reading appendable token 0 |
| 131 | 130 | QPDFWriter extra header text no newline 0 |
| 132 | -QPDF global offset 0 | |
| 133 | 131 | QPDFWriter make Extensions direct 0 |
| 134 | 132 | QPDFWriter make ADBE direct 1 |
| 135 | 133 | QPDFWriter preserve Extensions 0 | ... | ... |