Commit 85b968418be9104f8ac411f4c1565377c151591d

Authored by m-holger
1 parent 8ded7ff5

Refactor `QPDF` pattern-finding methods: relocate `findHeader`, `findStartxref`,…

… and `findEndstream` to `Objects`, remove `QPDF::PatternFinder` class, and update related logic for improved encapsulation.
include/qpdf/QPDF.hh
@@ -791,13 +791,7 @@ class QPDF @@ -791,13 +791,7 @@ class QPDF
791 bool is_root_metadata, 791 bool is_root_metadata,
792 std::unique_ptr<Pipeline>& heap); 792 std::unique_ptr<Pipeline>& heap);
793 793
794 - class PatternFinder;  
795 -  
796 - // Methods to support pattern finding  
797 static bool validatePDFVersion(char const*&, std::string& version); 794 static bool validatePDFVersion(char const*&, std::string& version);
798 - bool findHeader();  
799 - bool findStartxref();  
800 - bool findEndstream();  
801 795
802 // JSON import 796 // JSON import
803 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); 797 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
libqpdf/QPDF.cc
@@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*&amp; p, std::string&amp; version) @@ -334,33 +334,6 @@ QPDF::validatePDFVersion(char const*&amp; p, std::string&amp; version)
334 return true; 334 return true;
335 } 335 }
336 336
337 -bool  
338 -QPDF::findHeader()  
339 -{  
340 - qpdf_offset_t global_offset = m->file->tell();  
341 - std::string line = m->file->readLine(1024);  
342 - char const* p = line.data();  
343 - if (strncmp(p, "%PDF-", 5) != 0) {  
344 - throw std::logic_error("findHeader is not looking at %PDF-");  
345 - }  
346 - p += 5;  
347 - std::string version;  
348 - // Note: The string returned by line.data() is always null-terminated. The code below never  
349 - // overruns the buffer because a null character always short-circuits further advancement.  
350 - if (!validatePDFVersion(p, version)) {  
351 - return false;  
352 - }  
353 - m->pdf_version = version;  
354 - if (global_offset != 0) {  
355 - // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is  
356 - // leading material prior to the PDF header, all explicit offsets in the file are such that  
357 - // 0 points to the beginning of the header.  
358 - QTC::TC("qpdf", "QPDF global offset");  
359 - m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);  
360 - }  
361 - return true;  
362 -}  
363 -  
364 void 337 void
365 QPDF::warn(QPDFExc const& e) 338 QPDF::warn(QPDFExc const& e)
366 { 339 {
libqpdf/QPDF_objects.cc
@@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
3 #include <qpdf/QPDF_private.hh> 3 #include <qpdf/QPDF_private.hh>
4 4
5 #include <qpdf/InputSource_private.hh> 5 #include <qpdf/InputSource_private.hh>
  6 +#include <qpdf/OffsetInputSource.hh>
6 #include <qpdf/Pipeline.hh> 7 #include <qpdf/Pipeline.hh>
7 #include <qpdf/QPDFExc.hh> 8 #include <qpdf/QPDFExc.hh>
8 #include <qpdf/QPDFLogger.hh> 9 #include <qpdf/QPDFLogger.hh>
@@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final @@ -101,11 +102,54 @@ class QPDF::ResolveRecorder final
101 std::set<QPDFObjGen>::const_iterator iter; 102 std::set<QPDFObjGen>::const_iterator iter;
102 }; 103 };
103 104
  105 +class Objects::PatternFinder final: public InputSource::Finder
  106 +{
  107 + public:
  108 + PatternFinder(Objects& o, bool (Objects::*checker)()) :
  109 + o(o),
  110 + checker(checker)
  111 + {
  112 + }
  113 + ~PatternFinder() final = default;
  114 + bool
  115 + check() final
  116 + {
  117 + return (this->o.*checker)();
  118 + }
  119 +
  120 + private:
  121 + Objects& o;
  122 + bool (Objects::*checker)();
  123 +};
  124 +
  125 +bool
  126 +Objects::findHeader()
  127 +{
  128 + qpdf_offset_t global_offset = m->file->tell();
  129 + std::string line = m->file->readLine(1024);
  130 + char const* p = line.data();
  131 + util::assertion(strncmp(p, "%PDF-", 5) == 0, "findHeader is not looking at %PDF-");
  132 + p += 5;
  133 + std::string version;
  134 + // Note: The string returned by line.data() is always null-terminated. The code below never
  135 + // overruns the buffer because a null character always short-circuits further advancement.
  136 + if (!validatePDFVersion(p, version)) {
  137 + return false;
  138 + }
  139 + m->pdf_version = version;
  140 + if (global_offset != 0) {
  141 + // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
  142 + // leading material prior to the PDF header, all explicit offsets in the file are such that
  143 + // 0 points to the beginning of the header.
  144 + m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
  145 + }
  146 + return true;
  147 +}
  148 +
104 bool 149 bool
105 -QPDF::findStartxref() 150 +Objects ::findStartxref()
106 { 151 {
107 - if (m->objects.readToken(*m->file).isWord("startxref") &&  
108 - m->objects.readToken(*m->file).isInteger()) { 152 + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) {
109 // Position in front of offset token 153 // Position in front of offset token
110 m->file->seek(m->file->getLastOffset(), SEEK_SET); 154 m->file->seek(m->file->getLastOffset(), SEEK_SET);
111 return true; 155 return true;
@@ -121,7 +165,7 @@ Objects::parse(char const* password) @@ -121,7 +165,7 @@ Objects::parse(char const* password)
121 } 165 }
122 166
123 // Find the header anywhere in the first 1024 bytes of the file. 167 // Find the header anywhere in the first 1024 bytes of the file.
124 - PatternFinder hf(qpdf, &QPDF::findHeader); 168 + PatternFinder hf(*this, &Objects::findHeader);
125 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { 169 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
126 warn(damagedPDF("", -1, "can't find PDF header")); 170 warn(damagedPDF("", -1, "can't find PDF header"));
127 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode 171 // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
@@ -139,7 +183,7 @@ Objects::parse(char const* password) @@ -139,7 +183,7 @@ Objects::parse(char const* password)
139 m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); 183 m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
140 } 184 }
141 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); 185 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
142 - PatternFinder sf(qpdf, &QPDF::findStartxref); 186 + PatternFinder sf(*this, &Objects::findStartxref);
143 qpdf_offset_t xref_offset = 0; 187 qpdf_offset_t xref_offset = 0;
144 if (m->file->findLast("startxref", start_offset, 0, sf)) { 188 if (m->file->findLast("startxref", start_offset, 0, sf)) {
145 xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); 189 xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
@@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer&amp; input, int stream_id, int obj_id) @@ -1324,10 +1368,10 @@ Objects::readObjectInStream(is::OffsetBuffer&amp; input, int stream_id, int obj_id)
1324 } 1368 }
1325 1369
1326 bool 1370 bool
1327 -QPDF::findEndstream() 1371 +Objects ::findEndstream()
1328 { 1372 {
1329 // Find endstream or endobj. Position the input at that token. 1373 // Find endstream or endobj. Position the input at that token.
1330 - auto t = m->objects.readToken(*m->file, 20); 1374 + auto t = readToken(*m->file, 20);
1331 if (t.isWord("endobj") || t.isWord("endstream")) { 1375 if (t.isWord("endobj") || t.isWord("endstream")) {
1332 m->file->seek(m->file->getLastOffset(), SEEK_SET); 1376 m->file->seek(m->file->getLastOffset(), SEEK_SET);
1333 return true; 1377 return true;
@@ -1342,7 +1386,7 @@ Objects::recoverStreamLength( @@ -1342,7 +1386,7 @@ Objects::recoverStreamLength(
1342 // Try to reconstruct stream length by looking for endstream or endobj 1386 // Try to reconstruct stream length by looking for endstream or endobj
1343 warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); 1387 warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));
1344 1388
1345 - PatternFinder ef(qpdf, &QPDF::findEndstream); 1389 + PatternFinder ef(*this, &Objects::findEndstream);
1346 size_t length = 0; 1390 size_t length = 0;
1347 if (m->file->findFirst("end", stream_offset, 0, ef)) { 1391 if (m->file->findFirst("end", stream_offset, 0, ef)) {
1348 length = toS(m->file->tell() - stream_offset); 1392 length = toS(m->file->tell() - stream_offset);
libqpdf/qpdf/QPDF_private.hh
@@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter @@ -242,27 +242,6 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter
242 QPDF* qpdf; 242 QPDF* qpdf;
243 QPDFObjGen og; 243 QPDFObjGen og;
244 }; 244 };
245 -// Other linearization data structures  
246 -  
247 -class QPDF::PatternFinder final: public InputSource::Finder  
248 -{  
249 - public:  
250 - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :  
251 - qpdf(qpdf),  
252 - checker(checker)  
253 - {  
254 - }  
255 - ~PatternFinder() final = default;  
256 - bool  
257 - check() final  
258 - {  
259 - return (this->qpdf.*checker)();  
260 - }  
261 -  
262 - private:  
263 - QPDF& qpdf;  
264 - bool (QPDF::*checker)();  
265 -};  
266 245
267 // This class is used to represent a PDF document. 246 // This class is used to represent a PDF document.
268 // 247 //
@@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common @@ -1028,6 +1007,8 @@ class QPDF::Doc::Objects: Common
1028 std::vector<bool> compressible_set(); 1007 std::vector<bool> compressible_set();
1029 1008
1030 private: 1009 private:
  1010 + class PatternFinder;
  1011 +
1031 // Get a list of objects that would be permitted in an object stream. 1012 // Get a list of objects that would be permitted in an object stream.
1032 template <typename T> 1013 template <typename T>
1033 std::vector<T> compressible(); 1014 std::vector<T> compressible();
@@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common @@ -1071,6 +1052,11 @@ class QPDF::Doc::Objects: Common
1071 bool isUnresolved(QPDFObjGen og); 1052 bool isUnresolved(QPDFObjGen og);
1072 void setLastObjectDescription(std::string const& description, QPDFObjGen og); 1053 void setLastObjectDescription(std::string const& description, QPDFObjGen og);
1073 1054
  1055 + // Methods to support pattern finding
  1056 + bool findHeader();
  1057 + bool findStartxref();
  1058 + bool findEndstream();
  1059 +
1074 Foreign foreign_; 1060 Foreign foreign_;
1075 Streams streams_; 1061 Streams streams_;
1076 1062
qpdf/qpdf.testcov
@@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0 @@ -129,7 +129,6 @@ QPDFObjectHandle trailing data in parse 0
129 QPDFTokenizer EOF reading token 0 129 QPDFTokenizer EOF reading token 0
130 QPDFTokenizer EOF reading appendable token 0 130 QPDFTokenizer EOF reading appendable token 0
131 QPDFWriter extra header text no newline 0 131 QPDFWriter extra header text no newline 0
132 -QPDF global offset 0  
133 QPDFWriter make Extensions direct 0 132 QPDFWriter make Extensions direct 0
134 QPDFWriter make ADBE direct 1 133 QPDFWriter make ADBE direct 1
135 QPDFWriter preserve Extensions 0 134 QPDFWriter preserve Extensions 0