Commit 98a843c2a2e09df6457c023a8da52faa0d977a12
1 parent
ca5b1d26
Reconstruct xref without PCRE
Showing
1 changed file
with
43 additions
and
35 deletions
libqpdf/QPDF.cc
| ... | ... | @@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 370 | 370 | |
| 371 | 371 | this->reconstructed_xref = true; |
| 372 | 372 | |
| 373 | - PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b"); | |
| 374 | - PCRE endobj_re("^\\s*endobj\\b"); | |
| 375 | - PCRE trailer_re("^\\s*trailer\\b"); | |
| 376 | - | |
| 377 | 373 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, |
| 378 | 374 | "file is damaged")); |
| 379 | 375 | warn(e); |
| ... | ... | @@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 401 | 397 | qpdf_offset_t eof = this->file->tell(); |
| 402 | 398 | this->file->seek(0, SEEK_SET); |
| 403 | 399 | bool in_obj = false; |
| 400 | + qpdf_offset_t line_start = 0; | |
| 404 | 401 | while (this->file->tell() < eof) |
| 405 | 402 | { |
| 406 | - std::string line = this->file->readLine(50); | |
| 407 | - if (in_obj) | |
| 403 | + this->file->findAndSkipNextEOL(); | |
| 404 | + qpdf_offset_t next_line_start = this->file->tell(); | |
| 405 | + this->file->seek(line_start, SEEK_SET); | |
| 406 | + QPDFTokenizer::Token t1 = readToken(this->file, true); | |
| 407 | + qpdf_offset_t token_start = this->file->tell() - t1.getValue().length(); | |
| 408 | + if (token_start >= next_line_start) | |
| 409 | + { | |
| 410 | + // don't process yet | |
| 411 | + } | |
| 412 | + else if (in_obj) | |
| 408 | 413 | { |
| 409 | - if (endobj_re.match(line.c_str())) | |
| 414 | + if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj")) | |
| 410 | 415 | { |
| 411 | 416 | in_obj = false; |
| 412 | 417 | } |
| 413 | 418 | } |
| 414 | - else | |
| 415 | - { | |
| 416 | - PCRE::Match m = obj_re.match(line.c_str()); | |
| 417 | - if (m) | |
| 418 | - { | |
| 419 | - in_obj = true; | |
| 420 | - int obj = atoi(m.getMatch(1).c_str()); | |
| 421 | - int gen = atoi(m.getMatch(2).c_str()); | |
| 422 | - qpdf_offset_t offset = this->file->getLastOffset(); | |
| 423 | - insertXrefEntry(obj, 1, offset, gen, true); | |
| 424 | - } | |
| 425 | - else if ((! this->trailer.isInitialized()) && | |
| 426 | - trailer_re.match(line.c_str())) | |
| 427 | - { | |
| 428 | - // read "trailer" | |
| 429 | - this->file->seek(this->file->getLastOffset(), SEEK_SET); | |
| 430 | - readToken(this->file); | |
| 431 | - QPDFObjectHandle t = | |
| 432 | - readObject(this->file, "trailer", 0, 0, false); | |
| 433 | - if (! t.isDictionary()) | |
| 434 | - { | |
| 435 | - // Oh well. It was worth a try. | |
| 436 | - } | |
| 437 | - else | |
| 438 | - { | |
| 439 | - setTrailer(t); | |
| 440 | - } | |
| 441 | - } | |
| 419 | + else | |
| 420 | + { | |
| 421 | + if (t1.getType() == QPDFTokenizer::tt_integer) | |
| 422 | + { | |
| 423 | + QPDFTokenizer::Token t2 = readToken(this->file, true); | |
| 424 | + QPDFTokenizer::Token t3 = readToken(this->file, true); | |
| 425 | + if ((t2.getType() == QPDFTokenizer::tt_integer) && | |
| 426 | + (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) | |
| 427 | + { | |
| 428 | + in_obj = true; | |
| 429 | + int obj = atoi(t1.getValue().c_str()); | |
| 430 | + int gen = atoi(t2.getValue().c_str()); | |
| 431 | + insertXrefEntry(obj, 1, token_start, gen, true); | |
| 432 | + } | |
| 433 | + } | |
| 434 | + else if ((! this->trailer.isInitialized()) && | |
| 435 | + (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) | |
| 436 | + { | |
| 437 | + QPDFObjectHandle t = | |
| 438 | + readObject(this->file, "trailer", 0, 0, false); | |
| 439 | + if (! t.isDictionary()) | |
| 440 | + { | |
| 441 | + // Oh well. It was worth a try. | |
| 442 | + } | |
| 443 | + else | |
| 444 | + { | |
| 445 | + setTrailer(t); | |
| 446 | + } | |
| 447 | + } | |
| 442 | 448 | } |
| 449 | + this->file->seek(next_line_start, SEEK_SET); | |
| 450 | + line_start = next_line_start; | |
| 443 | 451 | } |
| 444 | 452 | |
| 445 | 453 | if (! this->trailer.isInitialized()) | ... | ... |