Commit 1765c6ec20625b99451acceb1ffcaaca812f379e
1 parent
296b679d
Find header without PCRE
Showing
4 changed files
with
57 additions
and
20 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -1027,6 +1027,9 @@ class QPDF |
| 1027 | 1027 | bool (QPDF::*checker)(); |
| 1028 | 1028 | }; |
| 1029 | 1029 | |
| 1030 | + // Methods to support pattern finding | |
| 1031 | + bool findHeader(); | |
| 1032 | + | |
| 1030 | 1033 | // methods to support linearization checking -- implemented in |
| 1031 | 1034 | // QPDF_linearization.cc |
| 1032 | 1035 | void readLinearizationData(); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -202,27 +202,45 @@ QPDF::getWarnings() |
| 202 | 202 | return result; |
| 203 | 203 | } |
| 204 | 204 | |
| 205 | -void | |
| 206 | -QPDF::parse(char const* password) | |
| 205 | +bool | |
| 206 | +QPDF::findHeader() | |
| 207 | 207 | { |
| 208 | - PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b"); | |
| 209 | - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | |
| 210 | - | |
| 211 | - if (password) | |
| 212 | - { | |
| 213 | - this->provided_password = password; | |
| 208 | + qpdf_offset_t global_offset = this->file->tell(); | |
| 209 | + std::string line = this->file->readLine(1024); | |
| 210 | + char const* p = line.c_str(); | |
| 211 | + if (strncmp(p, "%PDF-", 5) != 0) | |
| 212 | + { | |
| 213 | + throw std::logic_error("findHeader is not looking at %PDF-"); | |
| 214 | + } | |
| 215 | + p += 5; | |
| 216 | + std::string version; | |
| 217 | + // Note: The string returned by line.c_str() is always | |
| 218 | + // null-terminated. The code below never overruns the buffer | |
| 219 | + // because a null character always short-circuits further | |
| 220 | + // advancement. | |
| 221 | + bool valid = QUtil::is_digit(*p); | |
| 222 | + if (valid) | |
| 223 | + { | |
| 224 | + while (QUtil::is_digit(*p)) | |
| 225 | + { | |
| 226 | + version.append(1, *p++); | |
| 227 | + } | |
| 228 | + if ((*p == '.') && QUtil::is_digit(*(p+1))) | |
| 229 | + { | |
| 230 | + version.append(1, *p++); | |
| 231 | + while (QUtil::is_digit(*p)) | |
| 232 | + { | |
| 233 | + version.append(1, *p++); | |
| 234 | + } | |
| 235 | + } | |
| 236 | + else | |
| 237 | + { | |
| 238 | + valid = false; | |
| 239 | + } | |
| 214 | 240 | } |
| 215 | - | |
| 216 | - // Find the header anywhere in the first 1024 bytes of the file, | |
| 217 | - // plus add a little extra space for the header itself. | |
| 218 | - char buffer[1045]; | |
| 219 | - memset(buffer, '\0', sizeof(buffer)); | |
| 220 | - this->file->read(buffer, sizeof(buffer) - 1); | |
| 221 | - std::string line(buffer); | |
| 222 | - PCRE::Match m1 = header_re.match(line.c_str()); | |
| 223 | - if (m1) | |
| 241 | + if (valid) | |
| 224 | 242 | { |
| 225 | - size_t global_offset = m1.getMatch(1).length(); | |
| 243 | + this->pdf_version = version; | |
| 226 | 244 | if (global_offset != 0) |
| 227 | 245 | { |
| 228 | 246 | // Empirical evidence strongly suggests that when there is |
| ... | ... | @@ -232,9 +250,23 @@ QPDF::parse(char const* password) |
| 232 | 250 | QTC::TC("qpdf", "QPDF global offset"); |
| 233 | 251 | this->file = new OffsetInputSource(this->file, global_offset); |
| 234 | 252 | } |
| 235 | - this->pdf_version = m1.getMatch(2); | |
| 236 | 253 | } |
| 237 | - else | |
| 254 | + return valid; | |
| 255 | +} | |
| 256 | + | |
| 257 | +void | |
| 258 | +QPDF::parse(char const* password) | |
| 259 | +{ | |
| 260 | + PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | |
| 261 | + | |
| 262 | + if (password) | |
| 263 | + { | |
| 264 | + this->provided_password = password; | |
| 265 | + } | |
| 266 | + | |
| 267 | + // Find the header anywhere in the first 1024 bytes of the file. | |
| 268 | + PatternFinder hf(*this, &QPDF::findHeader); | |
| 269 | + if (! this->file->findFirst("%PDF-", 0, 1024, hf)) | |
| 238 | 270 | { |
| 239 | 271 | QTC::TC("qpdf", "QPDF not a pdf file"); |
| 240 | 272 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | ... | ... |
qpdf/qtest/qpdf/issue-118.out
| 1 | +WARNING: issue-118.pdf: can't find PDF header | |
| 1 | 2 | WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0 |
| 2 | 3 | WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream |
| 3 | 4 | issue-118.pdf (file position 732): unable to find /Root dictionary | ... | ... |
qpdf/qtest/qpdf/issue-51.out
| 1 | +WARNING: issue-51.pdf: can't find PDF header | |
| 1 | 2 | WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9) |
| 2 | 3 | WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj |
| 3 | 4 | WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj | ... | ... |