Commit 1765c6ec20625b99451acceb1ffcaaca812f379e
1 parent
296b679d
Find header without PCRE
Showing
4 changed files
with
57 additions
and
20 deletions
include/qpdf/QPDF.hh
| @@ -1027,6 +1027,9 @@ class QPDF | @@ -1027,6 +1027,9 @@ class QPDF | ||
| 1027 | bool (QPDF::*checker)(); | 1027 | bool (QPDF::*checker)(); |
| 1028 | }; | 1028 | }; |
| 1029 | 1029 | ||
| 1030 | + // Methods to support pattern finding | ||
| 1031 | + bool findHeader(); | ||
| 1032 | + | ||
| 1030 | // methods to support linearization checking -- implemented in | 1033 | // methods to support linearization checking -- implemented in |
| 1031 | // QPDF_linearization.cc | 1034 | // QPDF_linearization.cc |
| 1032 | void readLinearizationData(); | 1035 | void readLinearizationData(); |
libqpdf/QPDF.cc
| @@ -202,27 +202,45 @@ QPDF::getWarnings() | @@ -202,27 +202,45 @@ QPDF::getWarnings() | ||
| 202 | return result; | 202 | return result; |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | -void | ||
| 206 | -QPDF::parse(char const* password) | 205 | +bool |
| 206 | +QPDF::findHeader() | ||
| 207 | { | 207 | { |
| 208 | - PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b"); | ||
| 209 | - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | ||
| 210 | - | ||
| 211 | - if (password) | ||
| 212 | - { | ||
| 213 | - this->provided_password = password; | 208 | + qpdf_offset_t global_offset = this->file->tell(); |
| 209 | + std::string line = this->file->readLine(1024); | ||
| 210 | + char const* p = line.c_str(); | ||
| 211 | + if (strncmp(p, "%PDF-", 5) != 0) | ||
| 212 | + { | ||
| 213 | + throw std::logic_error("findHeader is not looking at %PDF-"); | ||
| 214 | + } | ||
| 215 | + p += 5; | ||
| 216 | + std::string version; | ||
| 217 | + // Note: The string returned by line.c_str() is always | ||
| 218 | + // null-terminated. The code below never overruns the buffer | ||
| 219 | + // because a null character always short-circuits further | ||
| 220 | + // advancement. | ||
| 221 | + bool valid = QUtil::is_digit(*p); | ||
| 222 | + if (valid) | ||
| 223 | + { | ||
| 224 | + while (QUtil::is_digit(*p)) | ||
| 225 | + { | ||
| 226 | + version.append(1, *p++); | ||
| 227 | + } | ||
| 228 | + if ((*p == '.') && QUtil::is_digit(*(p+1))) | ||
| 229 | + { | ||
| 230 | + version.append(1, *p++); | ||
| 231 | + while (QUtil::is_digit(*p)) | ||
| 232 | + { | ||
| 233 | + version.append(1, *p++); | ||
| 234 | + } | ||
| 235 | + } | ||
| 236 | + else | ||
| 237 | + { | ||
| 238 | + valid = false; | ||
| 239 | + } | ||
| 214 | } | 240 | } |
| 215 | - | ||
| 216 | - // Find the header anywhere in the first 1024 bytes of the file, | ||
| 217 | - // plus add a little extra space for the header itself. | ||
| 218 | - char buffer[1045]; | ||
| 219 | - memset(buffer, '\0', sizeof(buffer)); | ||
| 220 | - this->file->read(buffer, sizeof(buffer) - 1); | ||
| 221 | - std::string line(buffer); | ||
| 222 | - PCRE::Match m1 = header_re.match(line.c_str()); | ||
| 223 | - if (m1) | 241 | + if (valid) |
| 224 | { | 242 | { |
| 225 | - size_t global_offset = m1.getMatch(1).length(); | 243 | + this->pdf_version = version; |
| 226 | if (global_offset != 0) | 244 | if (global_offset != 0) |
| 227 | { | 245 | { |
| 228 | // Empirical evidence strongly suggests that when there is | 246 | // Empirical evidence strongly suggests that when there is |
| @@ -232,9 +250,23 @@ QPDF::parse(char const* password) | @@ -232,9 +250,23 @@ QPDF::parse(char const* password) | ||
| 232 | QTC::TC("qpdf", "QPDF global offset"); | 250 | QTC::TC("qpdf", "QPDF global offset"); |
| 233 | this->file = new OffsetInputSource(this->file, global_offset); | 251 | this->file = new OffsetInputSource(this->file, global_offset); |
| 234 | } | 252 | } |
| 235 | - this->pdf_version = m1.getMatch(2); | ||
| 236 | } | 253 | } |
| 237 | - else | 254 | + return valid; |
| 255 | +} | ||
| 256 | + | ||
| 257 | +void | ||
| 258 | +QPDF::parse(char const* password) | ||
| 259 | +{ | ||
| 260 | + PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | ||
| 261 | + | ||
| 262 | + if (password) | ||
| 263 | + { | ||
| 264 | + this->provided_password = password; | ||
| 265 | + } | ||
| 266 | + | ||
| 267 | + // Find the header anywhere in the first 1024 bytes of the file. | ||
| 268 | + PatternFinder hf(*this, &QPDF::findHeader); | ||
| 269 | + if (! this->file->findFirst("%PDF-", 0, 1024, hf)) | ||
| 238 | { | 270 | { |
| 239 | QTC::TC("qpdf", "QPDF not a pdf file"); | 271 | QTC::TC("qpdf", "QPDF not a pdf file"); |
| 240 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | 272 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), |
qpdf/qtest/qpdf/issue-118.out
| 1 | +WARNING: issue-118.pdf: can't find PDF header | ||
| 1 | WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0 | 2 | WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0 |
| 2 | WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream | 3 | WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream |
| 3 | issue-118.pdf (file position 732): unable to find /Root dictionary | 4 | issue-118.pdf (file position 732): unable to find /Root dictionary |
qpdf/qtest/qpdf/issue-51.out
| 1 | +WARNING: issue-51.pdf: can't find PDF header | ||
| 1 | WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9) | 2 | WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9) |
| 2 | WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj | 3 | WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj |
| 3 | WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj | 4 | WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj |