Commit 98a843c2a2e09df6457c023a8da52faa0d977a12
1 parent
ca5b1d26
Reconstruct xref without PCRE
Showing
1 changed file
with
43 additions
and
35 deletions
libqpdf/QPDF.cc
| @@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 370 | 370 | ||
| 371 | this->reconstructed_xref = true; | 371 | this->reconstructed_xref = true; |
| 372 | 372 | ||
| 373 | - PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b"); | ||
| 374 | - PCRE endobj_re("^\\s*endobj\\b"); | ||
| 375 | - PCRE trailer_re("^\\s*trailer\\b"); | ||
| 376 | - | ||
| 377 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, | 373 | warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, |
| 378 | "file is damaged")); | 374 | "file is damaged")); |
| 379 | warn(e); | 375 | warn(e); |
| @@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 401 | qpdf_offset_t eof = this->file->tell(); | 397 | qpdf_offset_t eof = this->file->tell(); |
| 402 | this->file->seek(0, SEEK_SET); | 398 | this->file->seek(0, SEEK_SET); |
| 403 | bool in_obj = false; | 399 | bool in_obj = false; |
| 400 | + qpdf_offset_t line_start = 0; | ||
| 404 | while (this->file->tell() < eof) | 401 | while (this->file->tell() < eof) |
| 405 | { | 402 | { |
| 406 | - std::string line = this->file->readLine(50); | ||
| 407 | - if (in_obj) | 403 | + this->file->findAndSkipNextEOL(); |
| 404 | + qpdf_offset_t next_line_start = this->file->tell(); | ||
| 405 | + this->file->seek(line_start, SEEK_SET); | ||
| 406 | + QPDFTokenizer::Token t1 = readToken(this->file, true); | ||
| 407 | + qpdf_offset_t token_start = this->file->tell() - t1.getValue().length(); | ||
| 408 | + if (token_start >= next_line_start) | ||
| 409 | + { | ||
| 410 | + // don't process yet | ||
| 411 | + } | ||
| 412 | + else if (in_obj) | ||
| 408 | { | 413 | { |
| 409 | - if (endobj_re.match(line.c_str())) | 414 | + if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj")) |
| 410 | { | 415 | { |
| 411 | in_obj = false; | 416 | in_obj = false; |
| 412 | } | 417 | } |
| 413 | } | 418 | } |
| 414 | - else | ||
| 415 | - { | ||
| 416 | - PCRE::Match m = obj_re.match(line.c_str()); | ||
| 417 | - if (m) | ||
| 418 | - { | ||
| 419 | - in_obj = true; | ||
| 420 | - int obj = atoi(m.getMatch(1).c_str()); | ||
| 421 | - int gen = atoi(m.getMatch(2).c_str()); | ||
| 422 | - qpdf_offset_t offset = this->file->getLastOffset(); | ||
| 423 | - insertXrefEntry(obj, 1, offset, gen, true); | ||
| 424 | - } | ||
| 425 | - else if ((! this->trailer.isInitialized()) && | ||
| 426 | - trailer_re.match(line.c_str())) | ||
| 427 | - { | ||
| 428 | - // read "trailer" | ||
| 429 | - this->file->seek(this->file->getLastOffset(), SEEK_SET); | ||
| 430 | - readToken(this->file); | ||
| 431 | - QPDFObjectHandle t = | ||
| 432 | - readObject(this->file, "trailer", 0, 0, false); | ||
| 433 | - if (! t.isDictionary()) | ||
| 434 | - { | ||
| 435 | - // Oh well. It was worth a try. | ||
| 436 | - } | ||
| 437 | - else | ||
| 438 | - { | ||
| 439 | - setTrailer(t); | ||
| 440 | - } | ||
| 441 | - } | 419 | + else |
| 420 | + { | ||
| 421 | + if (t1.getType() == QPDFTokenizer::tt_integer) | ||
| 422 | + { | ||
| 423 | + QPDFTokenizer::Token t2 = readToken(this->file, true); | ||
| 424 | + QPDFTokenizer::Token t3 = readToken(this->file, true); | ||
| 425 | + if ((t2.getType() == QPDFTokenizer::tt_integer) && | ||
| 426 | + (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) | ||
| 427 | + { | ||
| 428 | + in_obj = true; | ||
| 429 | + int obj = atoi(t1.getValue().c_str()); | ||
| 430 | + int gen = atoi(t2.getValue().c_str()); | ||
| 431 | + insertXrefEntry(obj, 1, token_start, gen, true); | ||
| 432 | + } | ||
| 433 | + } | ||
| 434 | + else if ((! this->trailer.isInitialized()) && | ||
| 435 | + (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) | ||
| 436 | + { | ||
| 437 | + QPDFObjectHandle t = | ||
| 438 | + readObject(this->file, "trailer", 0, 0, false); | ||
| 439 | + if (! t.isDictionary()) | ||
| 440 | + { | ||
| 441 | + // Oh well. It was worth a try. | ||
| 442 | + } | ||
| 443 | + else | ||
| 444 | + { | ||
| 445 | + setTrailer(t); | ||
| 446 | + } | ||
| 447 | + } | ||
| 442 | } | 448 | } |
| 449 | + this->file->seek(next_line_start, SEEK_SET); | ||
| 450 | + line_start = next_line_start; | ||
| 443 | } | 451 | } |
| 444 | 452 | ||
| 445 | if (! this->trailer.isInitialized()) | 453 | if (! this->trailer.isInitialized()) |