Commit 03aa9679ac16be44348f29a97c2c36145ae5a35a
1 parent
1765c6ec
Find starxref without PCRE
Showing
5 changed files
with
29 additions
and
34 deletions
include/qpdf/QPDF.hh
libqpdf/QPDF.cc
| ... | ... | @@ -254,11 +254,26 @@ QPDF::findHeader() |
| 254 | 254 | return valid; |
| 255 | 255 | } |
| 256 | 256 | |
| 257 | +bool | |
| 258 | +QPDF::findStartxref() | |
| 259 | +{ | |
| 260 | + QPDFTokenizer::Token t = readToken(this->file, true); | |
| 261 | + if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) | |
| 262 | + { | |
| 263 | + t = readToken(this->file, true); | |
| 264 | + if (t.getType() == QPDFTokenizer::tt_integer) | |
| 265 | + { | |
| 266 | + // Position in front of offset token | |
| 267 | + this->file->seek(this->file->getLastOffset(), SEEK_SET); | |
| 268 | + return true; | |
| 269 | + } | |
| 270 | + } | |
| 271 | + return false; | |
| 272 | +} | |
| 273 | + | |
| 257 | 274 | void |
| 258 | 275 | QPDF::parse(char const* password) |
| 259 | 276 | { |
| 260 | - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | |
| 261 | - | |
| 262 | 277 | if (password) |
| 263 | 278 | { |
| 264 | 279 | this->provided_password = password; |
| ... | ... | @@ -283,47 +298,25 @@ QPDF::parse(char const* password) |
| 283 | 298 | // PDF spec says %%EOF must be found within the last 1024 bytes of |
| 284 | 299 | // the file. We add an extra 30 characters to leave room for the |
| 285 | 300 | // startxref stuff. |
| 286 | - static int const tbuf_size = 1054; | |
| 287 | 301 | this->file->seek(0, SEEK_END); |
| 288 | - if (this->file->tell() > tbuf_size) | |
| 302 | + qpdf_offset_t end_offset = this->file->tell(); | |
| 303 | + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | |
| 304 | + PatternFinder sf(*this, &QPDF::findStartxref); | |
| 305 | + qpdf_offset_t xref_offset = 0; | |
| 306 | + if (this->file->findLast("startxref", start_offset, 0, sf)) | |
| 289 | 307 | { |
| 290 | - this->file->seek(-tbuf_size, SEEK_END); | |
| 291 | - } | |
| 292 | - else | |
| 293 | - { | |
| 294 | - this->file->rewind(); | |
| 295 | - } | |
| 296 | - char* buf = new char[tbuf_size + 1]; | |
| 297 | - // Put buf in an array-style PointerHolder to guarantee deletion | |
| 298 | - // of buf. | |
| 299 | - PointerHolder<char> b(true, buf); | |
| 300 | - memset(buf, '\0', tbuf_size + 1); | |
| 301 | - this->file->read(buf, tbuf_size); | |
| 302 | - | |
| 303 | - // Since buf may contain null characters, we can't do a regexp | |
| 304 | - // search on buf directly. Find the last occurrence within buf | |
| 305 | - // where the regexp matches. | |
| 306 | - char* p = buf; | |
| 307 | - char const* candidate = ""; | |
| 308 | - while ((p = static_cast<char*>(memchr(p, 's', tbuf_size - (p - buf)))) != 0) | |
| 309 | - { | |
| 310 | - if (eof_re.match(p)) | |
| 311 | - { | |
| 312 | - candidate = p; | |
| 313 | - } | |
| 314 | - ++p; | |
| 308 | + xref_offset = QUtil::string_to_ll( | |
| 309 | + readToken(this->file).getValue().c_str()); | |
| 315 | 310 | } |
| 316 | 311 | |
| 317 | 312 | try |
| 318 | 313 | { |
| 319 | - PCRE::Match m2 = eof_re.match(candidate); | |
| 320 | - if (! m2) | |
| 314 | + if (xref_offset == 0) | |
| 321 | 315 | { |
| 322 | 316 | QTC::TC("qpdf", "QPDF can't find startxref"); |
| 323 | 317 | throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, |
| 324 | 318 | "can't find startxref"); |
| 325 | 319 | } |
| 326 | - qpdf_offset_t xref_offset = QUtil::string_to_ll(m2.getMatch(1).c_str()); | |
| 327 | 320 | read_xref(xref_offset); |
| 328 | 321 | } |
| 329 | 322 | catch (QPDFExc& e) | ... | ... |
libqpdf/QPDFTokenizer.cc
qpdf/qpdf.testcov
qpdf/qtest/qpdf/issue-117.out
| 1 | 1 | WARNING: issue-117.pdf: file is damaged |
| 2 | -WARNING: issue-117.pdf: can't find startxref | |
| 2 | +WARNING: issue-117.pdf (file position 3526): xref not found | |
| 3 | 3 | WARNING: issue-117.pdf: Attempting to reconstruct cross-reference table |
| 4 | 4 | WARNING: issue-117.pdf (file position 66): loop detected resolving object 2 0 |
| 5 | 5 | WARNING: issue-117.pdf (object 2 0, file position 22): /Length key in stream dictionary is not an integer | ... | ... |