Commit 85b968418be9104f8ac411f4c1565377c151591d

Authored by m-holger
1 parent 8ded7ff5

Refactor `QPDF` pattern-finding methods: relocate `findHeader`, `findStartxref`,…

… and `findEndstream` to `Objects`, remove `QPDF::PatternFinder` class, and update related logic for improved encapsulation.
include/qpdf/QPDF.hh
... ... @@ -791,13 +791,7 @@ class QPDF
791 791 bool is_root_metadata,
792 792 std::unique_ptr<Pipeline>& heap);
793 793  
794   - class PatternFinder;
795   -
796   - // Methods to support pattern finding
797 794 static bool validatePDFVersion(char const*&, std::string& version);
798   - bool findHeader();
799   - bool findStartxref();
800   - bool findEndstream();
801 795  
802 796 // JSON import
803 797 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
... ...
libqpdf/QPDF.cc
... ... @@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*&amp; p, std::string&amp; version)
334 334 return true;
335 335 }
336 336  
337   -bool
338   -QPDF::findHeader()
339   -{
340   - qpdf_offset_t global_offset = m->file->tell();
341   - std::string line = m->file->readLine(1024);
342   - char const* p = line.data();
343   - if (strncmp(p, "%PDF-", 5) != 0) {
344   - throw std::logic_error("findHeader is not looking at %PDF-");
345   - }
346   - p += 5;
347   - std::string version;
348   - // Note: The string returned by line.data() is always null-terminated. The code below never
349   - // overruns the buffer because a null character always short-circuits further advancement.
350   - if (!validatePDFVersion(p, version)) {
351   - return false;
352   - }
353   - m->pdf_version = version;
354   - if (global_offset != 0) {
355   - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
356   - // leading material prior to the PDF header, all explicit offsets in the file are such that
357   - // 0 points to the beginning of the header.
358   - QTC::TC("qpdf", "QPDF global offset");
359   - m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
360   - }
361   - return true;
362   -}
363   -
364 337 void
365 338 QPDF::warn(QPDFExc const& e)
366 339 {
... ...
libqpdf/QPDF_objects.cc
... ... @@ -3,6 +3,7 @@
3 3 #include <qpdf/QPDF_private.hh>
4 4  
5 5 #include <qpdf/InputSource_private.hh>
  6 +#include <qpdf/OffsetInputSource.hh>
6 7 #include <qpdf/Pipeline.hh>
7 8 #include <qpdf/QPDFExc.hh>
8 9 #include <qpdf/QPDFLogger.hh>
... ... @@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final
101 102 std::set<QPDFObjGen>::const_iterator iter;
102 103 };
103 104  
  105 +class Objects::PatternFinder final: public InputSource::Finder
  106 +{
  107 + public:
  108 + PatternFinder(Objects& o, bool (Objects::*checker)()) :
  109 + o(o),
  110 + checker(checker)
  111 + {
  112 + }
  113 + ~PatternFinder() final = default;
  114 + bool
  115 + check() final
  116 + {
  117 + return (this->o.*checker)();
  118 + }
  119 +
  120 + private:
  121 + Objects& o;
  122 + bool (Objects::*checker)();
  123 +};
  124 +
  125 +bool
  126 +Objects::findHeader()
  127 +{
  128 + qpdf_offset_t global_offset = m->file->tell();
  129 + std::string line = m->file->readLine(1024);
  130 + char const* p = line.data();
  131 + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-");
  132 + p += 5;
  133 + std::string version;
  134 + // Note: The string returned by line.data() is always null-terminated. The code below never
  135 + // overruns the buffer because a null character always short-circuits further advancement.
  136 + if (!validatePDFVersion(p, version)) {
  137 + return false;
  138 + }
  139 + m->pdf_version = version;
  140 + if (global_offset != 0) {
  141 + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
  142 + // leading material prior to the PDF header, all explicit offsets in the file are such that
  143 + // 0 points to the beginning of the header.
  144 + m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
  145 + }
  146 + return true;
  147 +}
  148 +
104 149 bool
105   -QPDF::findStartxref()
  150 +Objects ::findStartxref()
106 151 {
107   - if (m->objects.readToken(*m->file).isWord("startxref") &&
108   - m->objects.readToken(*m->file).isInteger()) {
  152 + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) {
109 153 // Position in front of offset token
110 154 m->file->seek(m->file->getLastOffset(), SEEK_SET);
111 155 return true;
... ... @@ -121,7 +165,7 @@ Objects::parse(char const* password)
121 165 }
122 166  
123 167 // Find the header anywhere in the first 1024 bytes of the file.
124   - PatternFinder hf(qpdf, &QPDF::findHeader);
  168 + PatternFinder hf(*this, &Objects::findHeader);
125 169 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
126 170 warn(damagedPDF("", -1, "can't find PDF header"));
127 171 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
... ... @@ -139,7 +183,7 @@ Objects::parse(char const* password)
139 183 m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
140 184 }
141 185 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
142   - PatternFinder sf(qpdf, &QPDF::findStartxref);
  186 + PatternFinder sf(*this, &Objects::findStartxref);
143 187 qpdf_offset_t xref_offset = 0;
144 188 if (m->file->findLast("startxref", start_offset, 0, sf)) {
145 189 xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
... ... @@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer&amp; input, int stream_id, int obj_id)
1324 1368 }
1325 1369  
1326 1370 bool
1327   -QPDF::findEndstream()
  1371 +Objects ::findEndstream()
1328 1372 {
1329 1373 // Find endstream or endobj. Position the input at that token.
1330   - auto t = m->objects.readToken(*m->file, 20);
  1374 + auto t = readToken(*m->file, 20);
1331 1375 if (t.isWord("endobj") || t.isWord("endstream")) {
1332 1376 m->file->seek(m->file->getLastOffset(), SEEK_SET);
1333 1377 return true;
... ... @@ -1342,7 +1386,7 @@ Objects::recoverStreamLength(
1342 1386 // Try to reconstruct stream length by looking for endstream or endobj
1343 1387 warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));
1344 1388  
1345   - PatternFinder ef(qpdf, &QPDF::findEndstream);
  1389 + PatternFinder ef(*this, &Objects::findEndstream);
1346 1390 size_t length = 0;
1347 1391 if (m->file->findFirst("end", stream_offset, 0, ef)) {
1348 1392 length = toS(m->file->tell() - stream_offset);
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter
242 242 QPDF* qpdf;
243 243 QPDFObjGen og;
244 244 };
245   -// Other linearization data structures
246   -
247   -class QPDF::PatternFinder final: public InputSource::Finder
248   -{
249   - public:
250   - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
251   - qpdf(qpdf),
252   - checker(checker)
253   - {
254   - }
255   - ~PatternFinder() final = default;
256   - bool
257   - check() final
258   - {
259   - return (this->qpdf.*checker)();
260   - }
261   -
262   - private:
263   - QPDF& qpdf;
264   - bool (QPDF::*checker)();
265   -};
266 245  
267 246 // This class is used to represent a PDF document.
268 247 //
... ... @@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common
1028 1007 std::vector<bool> compressible_set();
1029 1008  
1030 1009 private:
  1010 + class PatternFinder;
  1011 +
1031 1012 // Get a list of objects that would be permitted in an object stream.
1032 1013 template <typename T>
1033 1014 std::vector<T> compressible();
... ... @@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common
1071 1052 bool isUnresolved(QPDFObjGen og);
1072 1053 void setLastObjectDescription(std::string const& description, QPDFObjGen og);
1073 1054  
  1055 + // Methods to support pattern finding
  1056 + bool findHeader();
  1057 + bool findStartxref();
  1058 + bool findEndstream();
  1059 +
1074 1060 Foreign foreign_;
1075 1061 Streams streams_;
1076 1062  
... ...
qpdf/qpdf.testcov
... ... @@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0
129 129 QPDFTokenizer EOF reading token 0
130 130 QPDFTokenizer EOF reading appendable token 0
131 131 QPDFWriter extra header text no newline 0
132   -QPDF global offset 0
133 132 QPDFWriter make Extensions direct 0
134 133 QPDFWriter make ADBE direct 1
135 134 QPDFWriter preserve Extensions 0
... ...