Commit fd52eae8474cd9ca465894745051af82b961eec5

Authored by m-holger
Committed by GitHub
2 parents 32d14f31 b62367ae

Merge pull request #1572 from m-holger/qpdf_hh

Remove implementation detail from QPDF header file
examples/pdf-custom-filter.cc
  1 +
  2 +#include <qpdf/QIntC.hh>
1 3 #include <qpdf/QPDF.hh>
2 4 #include <qpdf/QPDFStreamFilter.hh>
3 5 #include <qpdf/QPDFWriter.hh>
... ...
include/qpdf/QPDF.hh
... ... @@ -37,7 +37,6 @@
37 37 #include <qpdf/Buffer.hh>
38 38 #include <qpdf/InputSource.hh>
39 39 #include <qpdf/PDFVersion.hh>
40   -#include <qpdf/QIntC.hh>
41 40 #include <qpdf/QPDFExc.hh>
42 41 #include <qpdf/QPDFObjGen.hh>
43 42 #include <qpdf/QPDFObjectHandle.hh>
... ... @@ -792,43 +791,9 @@ class QPDF
792 791 bool is_root_metadata,
793 792 std::unique_ptr<Pipeline>& heap);
794 793  
795   - class PatternFinder;
796   -
797   - // Methods to support pattern finding
798   - static bool validatePDFVersion(char const*&, std::string& version);
799   - bool findHeader();
800   - bool findStartxref();
801   - bool findEndstream();
802   -
803 794 // JSON import
804 795 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
805 796  
806   - // Type conversion helper methods
807   - template <typename T>
808   - static qpdf_offset_t
809   - toO(T const& i)
810   - {
811   - return QIntC::to_offset(i);
812   - }
813   - template <typename T>
814   - static size_t
815   - toS(T const& i)
816   - {
817   - return QIntC::to_size(i);
818   - }
819   - template <typename T>
820   - static int
821   - toI(T const& i)
822   - {
823   - return QIntC::to_int(i);
824   - }
825   - template <typename T>
826   - static unsigned long long
827   - toULL(T const& i)
828   - {
829   - return QIntC::to_ulonglong(i);
830   - }
831   -
832 797 class Members;
833 798  
834 799 // Keep all member variables inside the Members object, which we dynamically allocate. This
... ...
libqpdf/QPDF.cc
... ... @@ -315,52 +315,6 @@ QPDF::numWarnings() const
315 315 return m->warnings.size();
316 316 }
317 317  
318   -bool
319   -QPDF::validatePDFVersion(char const*& p, std::string& version)
320   -{
321   - if (!util::is_digit(*p)) {
322   - return false;
323   - }
324   - while (util::is_digit(*p)) {
325   - version.append(1, *p++);
326   - }
327   - if (!(*p == '.' && util::is_digit(*(p + 1)))) {
328   - return false;
329   - }
330   - version.append(1, *p++);
331   - while (util::is_digit(*p)) {
332   - version.append(1, *p++);
333   - }
334   - return true;
335   -}
336   -
337   -bool
338   -QPDF::findHeader()
339   -{
340   - qpdf_offset_t global_offset = m->file->tell();
341   - std::string line = m->file->readLine(1024);
342   - char const* p = line.data();
343   - if (strncmp(p, "%PDF-", 5) != 0) {
344   - throw std::logic_error("findHeader is not looking at %PDF-");
345   - }
346   - p += 5;
347   - std::string version;
348   - // Note: The string returned by line.data() is always null-terminated. The code below never
349   - // overruns the buffer because a null character always short-circuits further advancement.
350   - if (!validatePDFVersion(p, version)) {
351   - return false;
352   - }
353   - m->pdf_version = version;
354   - if (global_offset != 0) {
355   - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
356   - // leading material prior to the PDF header, all explicit offsets in the file are such that
357   - // 0 points to the beginning of the header.
358   - QTC::TC("qpdf", "QPDF global offset");
359   - m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
360   - }
361   - return true;
362   -}
363   -
364 318 void
365 319 QPDF::warn(QPDFExc const& e)
366 320 {
... ... @@ -761,7 +715,10 @@ QPDF::pipeStreamData(
761 715 auto buf = file->read(length, offset);
762 716 if (buf.size() != length) {
763 717 throw qpdf_for_warning.m->c.damagedPDF(
764   - *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
  718 + *file,
  719 + "",
  720 + offset + QIntC::to_offset(buf.size()),
  721 + "unexpected EOF reading stream data");
765 722 }
766 723 pipeline->write(buf.data(), length);
767 724 attempted_finish = true;
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -1010,7 +1010,7 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen og)
1010 1010 // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an
1011 1011 // exception.
1012 1012 auto tmp = QUtil::make_unique_cstr(str);
1013   - RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length()));
  1013 + RC4 rc4(QUtil::unsigned_char_pointer(key), QIntC::to_int(key.length()));
1014 1014 auto data = QUtil::unsigned_char_pointer(tmp.get());
1015 1015 rc4.process(data, vlen, data);
1016 1016 str = std::string(tmp.get(), vlen);
... ...
libqpdf/QPDF_json.cc
... ... @@ -482,7 +482,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
482 482 if (value.getString(v)) {
483 483 std::string version;
484 484 char const* p = v.c_str();
485   - if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
  485 + if (objects.validatePDFVersion(p, version) && *p == '\0') {
486 486 pdf.m->pdf_version = version;
487 487 return true;
488 488 }
... ...
libqpdf/QPDF_objects.cc
... ... @@ -3,6 +3,7 @@
3 3 #include <qpdf/QPDF_private.hh>
4 4  
5 5 #include <qpdf/InputSource_private.hh>
  6 +#include <qpdf/OffsetInputSource.hh>
6 7 #include <qpdf/Pipeline.hh>
7 8 #include <qpdf/QPDFExc.hh>
8 9 #include <qpdf/QPDFLogger.hh>
... ... @@ -101,11 +102,73 @@ class QPDF::ResolveRecorder final
101 102 std::set<QPDFObjGen>::const_iterator iter;
102 103 };
103 104  
  105 +class Objects::PatternFinder final: public InputSource::Finder
  106 +{
  107 + public:
  108 + PatternFinder(Objects& o, bool (Objects::*checker)()) :
  109 + o(o),
  110 + checker(checker)
  111 + {
  112 + }
  113 + ~PatternFinder() final = default;
  114 + bool
  115 + check() final
  116 + {
  117 + return (this->o.*checker)();
  118 + }
  119 +
  120 + private:
  121 + Objects& o;
  122 + bool (Objects::*checker)();
  123 +};
  124 +
  125 +bool
  126 +Objects::validatePDFVersion(char const*& p, std::string& version)
  127 +{
  128 + if (!util::is_digit(*p)) {
  129 + return false;
  130 + }
  131 + while (util::is_digit(*p)) {
  132 + version.append(1, *p++);
  133 + }
  134 + if (!(*p == '.' && util::is_digit(*(p + 1)))) {
  135 + return false;
  136 + }
  137 + version.append(1, *p++);
  138 + while (util::is_digit(*p)) {
  139 + version.append(1, *p++);
  140 + }
  141 + return true;
  142 +}
  143 +
  144 +bool
  145 +Objects::findHeader()
  146 +{
  147 + qpdf_offset_t global_offset = m->file->tell();
  148 + std::string line = m->file->readLine(1024);
  149 + char const* p = line.data();
  150 + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-");
  151 + p += 5;
  152 + std::string version;
  153 + // Note: The string returned by line.data() is always null-terminated. The code below never
  154 + // overruns the buffer because a null character always short-circuits further advancement.
  155 + if (!validatePDFVersion(p, version)) {
  156 + return false;
  157 + }
  158 + m->pdf_version = version;
  159 + if (global_offset != 0) {
  160 + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
  161 + // leading material prior to the PDF header, all explicit offsets in the file are such that
  162 + // 0 points to the beginning of the header.
  163 + m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
  164 + }
  165 + return true;
  166 +}
  167 +
104 168 bool
105   -QPDF::findStartxref()
  169 +Objects ::findStartxref()
106 170 {
107   - if (m->objects.readToken(*m->file).isWord("startxref") &&
108   - m->objects.readToken(*m->file).isInteger()) {
  171 + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) {
109 172 // Position in front of offset token
110 173 m->file->seek(m->file->getLastOffset(), SEEK_SET);
111 174 return true;
... ... @@ -121,7 +184,7 @@ Objects::parse(char const* password)
121 184 }
122 185  
123 186 // Find the header anywhere in the first 1024 bytes of the file.
124   - PatternFinder hf(qpdf, &QPDF::findHeader);
  187 + PatternFinder hf(*this, &Objects::findHeader);
125 188 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
126 189 warn(damagedPDF("", -1, "can't find PDF header"));
127 190 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
... ... @@ -139,7 +202,7 @@ Objects::parse(char const* password)
139 202 m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
140 203 }
141 204 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
142   - PatternFinder sf(qpdf, &QPDF::findStartxref);
  205 + PatternFinder sf(*this, &Objects::findStartxref);
143 206 qpdf_offset_t xref_offset = 0;
144 207 if (m->file->findLast("startxref", start_offset, 0, sf)) {
145 208 xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
... ... @@ -164,7 +227,7 @@ Objects::parse(char const* password)
164 227 }
165 228 }
166 229  
167   - qpdf.initializeEncryption();
  230 + m->encp->initialize(qpdf);
168 231 m->parsed = true;
169 232 if (!m->xref_table.empty() && !qpdf.getRoot().getKey("/Pages").isDictionary()) {
170 233 // QPDFs created from JSON have an empty xref table and no root object yet.
... ... @@ -271,15 +334,13 @@ Objects::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
271 334 auto xref_backup{m->xref_table};
272 335 try {
273 336 m->file->seek(startxrefs.back(), SEEK_SET);
274   - if (auto offset =
275   - QUtil::string_to_ll(m->objects.readToken(*m->file).getValue().data())) {
276   - m->objects.read_xref(offset);
  337 + if (auto offset = QUtil::string_to_ll(readToken(*m->file).getValue().data())) {
  338 + read_xref(offset);
277 339  
278 340 if (qpdf.getRoot().getKey("/Pages").isDictionary()) {
279   - QTC::TC("qpdf", "QPDF startxref more than 1024 before end");
280 341 warn(damagedPDF(
281 342 "", -1, "startxref was more than 1024 bytes before end of file"));
282   - qpdf.initializeEncryption();
  343 + m->encp->initialize(qpdf);
283 344 m->parsed = true;
284 345 m->reconstructed_xref = false;
285 346 return;
... ... @@ -1138,7 +1199,7 @@ QPDF::getObjectCount()
1138 1199 if (!m->obj_cache.empty()) {
1139 1200 og = (*(m->obj_cache.rbegin())).first;
1140 1201 }
1141   - return toS(og.getObj());
  1202 + return QIntC::to_size(og.getObj());
1142 1203 }
1143 1204  
1144 1205 std::vector<QPDFObjectHandle>
... ... @@ -1324,10 +1385,10 @@ Objects::readObjectInStream(is::OffsetBuffer&amp; input, int stream_id, int obj_id)
1324 1385 }
1325 1386  
1326 1387 bool
1327   -QPDF::findEndstream()
  1388 +Objects ::findEndstream()
1328 1389 {
1329 1390 // Find endstream or endobj. Position the input at that token.
1330   - auto t = m->objects.readToken(*m->file, 20);
  1391 + auto t = readToken(*m->file, 20);
1331 1392 if (t.isWord("endobj") || t.isWord("endstream")) {
1332 1393 m->file->seek(m->file->getLastOffset(), SEEK_SET);
1333 1394 return true;
... ... @@ -1342,7 +1403,7 @@ Objects::recoverStreamLength(
1342 1403 // Try to reconstruct stream length by looking for endstream or endobj
1343 1404 warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));
1344 1405  
1345   - PatternFinder ef(qpdf, &QPDF::findEndstream);
  1406 + PatternFinder ef(*this, &Objects::findEndstream);
1346 1407 size_t length = 0;
1347 1408 if (m->file->findFirst("end", stream_offset, 0, ef)) {
1348 1409 length = toS(m->file->tell() - stream_offset);
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -3,6 +3,7 @@
3 3  
4 4 #include <qpdf/QPDF.hh>
5 5  
  6 +#include <qpdf/QIntC.hh>
6 7 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
7 8 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
8 9 #include <qpdf/QPDFLogger.hh>
... ... @@ -241,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter
241 242 QPDF* qpdf;
242 243 QPDFObjGen og;
243 244 };
244   -// Other linearization data structures
245   -
246   -class QPDF::PatternFinder final: public InputSource::Finder
247   -{
248   - public:
249   - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
250   - qpdf(qpdf),
251   - checker(checker)
252   - {
253   - }
254   - ~PatternFinder() final = default;
255   - bool
256   - check() final
257   - {
258   - return (this->qpdf.*checker)();
259   - }
260   -
261   - private:
262   - QPDF& qpdf;
263   - bool (QPDF::*checker)();
264   -};
265 245  
266 246 // This class is used to represent a PDF document.
267 247 //
... ... @@ -323,13 +303,39 @@ class QPDF::Doc
323 303 }
324 304  
325 305 protected:
  306 + // Type conversion helper methods
  307 + template <typename T>
  308 + static qpdf_offset_t
  309 + toO(T const& i)
  310 + {
  311 + return QIntC::to_offset(i);
  312 + }
  313 + template <typename T>
  314 + static size_t
  315 + toS(T const& i)
  316 + {
  317 + return QIntC::to_size(i);
  318 + }
  319 + template <typename T>
  320 + static int
  321 + toI(T const& i)
  322 + {
  323 + return QIntC::to_int(i);
  324 + }
  325 + template <typename T>
  326 + static unsigned long long
  327 + toULL(T const& i)
  328 + {
  329 + return QIntC::to_ulonglong(i);
  330 + }
  331 +
326 332 QPDF& qpdf;
327 333 QPDF::Members* m;
328 334  
329 335 qpdf::Doc::Config& cf;
330 336 QPDF::Doc::Pages& pages;
331 337 QPDF::Doc::Objects& objects;
332   - };
  338 + }; // class qpdf::Doc::Common
333 339  
334 340 Doc() = delete;
335 341 Doc(Doc const&) = delete;
... ... @@ -994,6 +1000,8 @@ class QPDF::Doc::Objects: Common
994 1000 std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
995 1001 size_t table_size();
996 1002  
  1003 + static bool validatePDFVersion(char const*&, std::string& version);
  1004 +
997 1005 // For QPDFWriter:
998 1006  
999 1007 std::map<QPDFObjGen, QPDFXRefEntry> const& xref_table();
... ... @@ -1001,6 +1009,8 @@ class QPDF::Doc::Objects: Common
1001 1009 std::vector<bool> compressible_set();
1002 1010  
1003 1011 private:
  1012 + class PatternFinder;
  1013 +
1004 1014 // Get a list of objects that would be permitted in an object stream.
1005 1015 template <typename T>
1006 1016 std::vector<T> compressible();
... ... @@ -1044,6 +1054,11 @@ class QPDF::Doc::Objects: Common
1044 1054 bool isUnresolved(QPDFObjGen og);
1045 1055 void setLastObjectDescription(std::string const& description, QPDFObjGen og);
1046 1056  
  1057 + // Methods to support pattern finding
  1058 + bool findHeader();
  1059 + bool findStartxref();
  1060 + bool findEndstream();
  1061 +
1047 1062 Foreign foreign_;
1048 1063 Streams streams_;
1049 1064  
... ...
qpdf/qpdf.testcov
... ... @@ -27,7 +27,6 @@ main QTest stream 0
27 27 QPDF lin write nshared_total > nshared_first_page 1
28 28 QPDFWriter encrypted hint stream 0
29 29 QPDF xref gen > 0 1
30   -QPDF startxref more than 1024 before end 0
31 30 QPDFParser bad brace 0
32 31 QPDFParser bad brace in parseRemainder 0
33 32 QPDFParser bad array close 0
... ... @@ -129,7 +128,6 @@ QPDFObjectHandle trailing data in parse 0
129 128 QPDFTokenizer EOF reading token 0
130 129 QPDFTokenizer EOF reading appendable token 0
131 130 QPDFWriter extra header text no newline 0
132   -QPDF global offset 0
133 131 QPDFWriter make Extensions direct 0
134 132 QPDFWriter make ADBE direct 1
135 133 QPDFWriter preserve Extensions 0
... ...