Commit 03aa9679ac16be44348f29a97c2c36145ae5a35a
1 parent
1765c6ec
Find starxref without PCRE
Showing
5 changed files
with
29 additions
and
34 deletions
include/qpdf/QPDF.hh
| @@ -1029,6 +1029,7 @@ class QPDF | @@ -1029,6 +1029,7 @@ class QPDF | ||
| 1029 | 1029 | ||
| 1030 | // Methods to support pattern finding | 1030 | // Methods to support pattern finding |
| 1031 | bool findHeader(); | 1031 | bool findHeader(); |
| 1032 | + bool findStartxref(); | ||
| 1032 | 1033 | ||
| 1033 | // methods to support linearization checking -- implemented in | 1034 | // methods to support linearization checking -- implemented in |
| 1034 | // QPDF_linearization.cc | 1035 | // QPDF_linearization.cc |
libqpdf/QPDF.cc
| @@ -254,11 +254,26 @@ QPDF::findHeader() | @@ -254,11 +254,26 @@ QPDF::findHeader() | ||
| 254 | return valid; | 254 | return valid; |
| 255 | } | 255 | } |
| 256 | 256 | ||
| 257 | +bool | ||
| 258 | +QPDF::findStartxref() | ||
| 259 | +{ | ||
| 260 | + QPDFTokenizer::Token t = readToken(this->file, true); | ||
| 261 | + if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) | ||
| 262 | + { | ||
| 263 | + t = readToken(this->file, true); | ||
| 264 | + if (t.getType() == QPDFTokenizer::tt_integer) | ||
| 265 | + { | ||
| 266 | + // Position in front of offset token | ||
| 267 | + this->file->seek(this->file->getLastOffset(), SEEK_SET); | ||
| 268 | + return true; | ||
| 269 | + } | ||
| 270 | + } | ||
| 271 | + return false; | ||
| 272 | +} | ||
| 273 | + | ||
| 257 | void | 274 | void |
| 258 | QPDF::parse(char const* password) | 275 | QPDF::parse(char const* password) |
| 259 | { | 276 | { |
| 260 | - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); | ||
| 261 | - | ||
| 262 | if (password) | 277 | if (password) |
| 263 | { | 278 | { |
| 264 | this->provided_password = password; | 279 | this->provided_password = password; |
| @@ -283,47 +298,25 @@ QPDF::parse(char const* password) | @@ -283,47 +298,25 @@ QPDF::parse(char const* password) | ||
| 283 | // PDF spec says %%EOF must be found within the last 1024 bytes of | 298 | // PDF spec says %%EOF must be found within the last 1024 bytes of |
| 284 | // the file. We add an extra 30 characters to leave room for the | 299 | // the file. We add an extra 30 characters to leave room for the |
| 285 | // startxref stuff. | 300 | // startxref stuff. |
| 286 | - static int const tbuf_size = 1054; | ||
| 287 | this->file->seek(0, SEEK_END); | 301 | this->file->seek(0, SEEK_END); |
| 288 | - if (this->file->tell() > tbuf_size) | 302 | + qpdf_offset_t end_offset = this->file->tell(); |
| 303 | + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | ||
| 304 | + PatternFinder sf(*this, &QPDF::findStartxref); | ||
| 305 | + qpdf_offset_t xref_offset = 0; | ||
| 306 | + if (this->file->findLast("startxref", start_offset, 0, sf)) | ||
| 289 | { | 307 | { |
| 290 | - this->file->seek(-tbuf_size, SEEK_END); | ||
| 291 | - } | ||
| 292 | - else | ||
| 293 | - { | ||
| 294 | - this->file->rewind(); | ||
| 295 | - } | ||
| 296 | - char* buf = new char[tbuf_size + 1]; | ||
| 297 | - // Put buf in an array-style PointerHolder to guarantee deletion | ||
| 298 | - // of buf. | ||
| 299 | - PointerHolder<char> b(true, buf); | ||
| 300 | - memset(buf, '\0', tbuf_size + 1); | ||
| 301 | - this->file->read(buf, tbuf_size); | ||
| 302 | - | ||
| 303 | - // Since buf may contain null characters, we can't do a regexp | ||
| 304 | - // search on buf directly. Find the last occurrence within buf | ||
| 305 | - // where the regexp matches. | ||
| 306 | - char* p = buf; | ||
| 307 | - char const* candidate = ""; | ||
| 308 | - while ((p = static_cast<char*>(memchr(p, 's', tbuf_size - (p - buf)))) != 0) | ||
| 309 | - { | ||
| 310 | - if (eof_re.match(p)) | ||
| 311 | - { | ||
| 312 | - candidate = p; | ||
| 313 | - } | ||
| 314 | - ++p; | 308 | + xref_offset = QUtil::string_to_ll( |
| 309 | + readToken(this->file).getValue().c_str()); | ||
| 315 | } | 310 | } |
| 316 | 311 | ||
| 317 | try | 312 | try |
| 318 | { | 313 | { |
| 319 | - PCRE::Match m2 = eof_re.match(candidate); | ||
| 320 | - if (! m2) | 314 | + if (xref_offset == 0) |
| 321 | { | 315 | { |
| 322 | QTC::TC("qpdf", "QPDF can't find startxref"); | 316 | QTC::TC("qpdf", "QPDF can't find startxref"); |
| 323 | throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, | 317 | throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, |
| 324 | "can't find startxref"); | 318 | "can't find startxref"); |
| 325 | } | 319 | } |
| 326 | - qpdf_offset_t xref_offset = QUtil::string_to_ll(m2.getMatch(1).c_str()); | ||
| 327 | read_xref(xref_offset); | 320 | read_xref(xref_offset); |
| 328 | } | 321 | } |
| 329 | catch (QPDFExc& e) | 322 | catch (QPDFExc& e) |
libqpdf/QPDFTokenizer.cc
| @@ -521,7 +521,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -521,7 +521,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 521 | { | 521 | { |
| 522 | if (allow_bad) | 522 | if (allow_bad) |
| 523 | { | 523 | { |
| 524 | -// QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | 524 | + QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); |
| 525 | } | 525 | } |
| 526 | else | 526 | else |
| 527 | { | 527 | { |
qpdf/qpdf.testcov
| @@ -290,3 +290,4 @@ qpdf read args from file 0 | @@ -290,3 +290,4 @@ qpdf read args from file 0 | ||
| 290 | qpdf single-pages %d 0 | 290 | qpdf single-pages %d 0 |
| 291 | qpdf single-pages .pdf 0 | 291 | qpdf single-pages .pdf 0 |
| 292 | qpdf single-pages other 0 | 292 | qpdf single-pages other 0 |
| 293 | +QPDFTokenizer allowing bad token 0 |
qpdf/qtest/qpdf/issue-117.out
| 1 | WARNING: issue-117.pdf: file is damaged | 1 | WARNING: issue-117.pdf: file is damaged |
| 2 | -WARNING: issue-117.pdf: can't find startxref | 2 | +WARNING: issue-117.pdf (file position 3526): xref not found |
| 3 | WARNING: issue-117.pdf: Attempting to reconstruct cross-reference table | 3 | WARNING: issue-117.pdf: Attempting to reconstruct cross-reference table |
| 4 | WARNING: issue-117.pdf (file position 66): loop detected resolving object 2 0 | 4 | WARNING: issue-117.pdf (file position 66): loop detected resolving object 2 0 |
| 5 | WARNING: issue-117.pdf (object 2 0, file position 22): /Length key in stream dictionary is not an integer | 5 | WARNING: issue-117.pdf (object 2 0, file position 22): /Length key in stream dictionary is not an integer |