Commit 98a843c2a2e09df6457c023a8da52faa0d977a12

Authored by Jay Berkenbilt
1 parent ca5b1d26

Reconstruct xref without PCRE

Showing 1 changed file with 43 additions and 35 deletions
libqpdf/QPDF.cc
... ... @@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e)
370 370  
371 371 this->reconstructed_xref = true;
372 372  
373   - PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b");
374   - PCRE endobj_re("^\\s*endobj\\b");
375   - PCRE trailer_re("^\\s*trailer\\b");
376   -
377 373 warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
378 374 "file is damaged"));
379 375 warn(e);
... ... @@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e)
401 397 qpdf_offset_t eof = this->file->tell();
402 398 this->file->seek(0, SEEK_SET);
403 399 bool in_obj = false;
  400 + qpdf_offset_t line_start = 0;
404 401 while (this->file->tell() < eof)
405 402 {
406   - std::string line = this->file->readLine(50);
407   - if (in_obj)
  403 + this->file->findAndSkipNextEOL();
  404 + qpdf_offset_t next_line_start = this->file->tell();
  405 + this->file->seek(line_start, SEEK_SET);
  406 + QPDFTokenizer::Token t1 = readToken(this->file, true);
  407 + qpdf_offset_t token_start = this->file->tell() - t1.getValue().length();
  408 + if (token_start >= next_line_start)
  409 + {
  410 + // don't process yet
  411 + }
  412 + else if (in_obj)
408 413 {
409   - if (endobj_re.match(line.c_str()))
  414 + if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))
410 415 {
411 416 in_obj = false;
412 417 }
413 418 }
414   - else
415   - {
416   - PCRE::Match m = obj_re.match(line.c_str());
417   - if (m)
418   - {
419   - in_obj = true;
420   - int obj = atoi(m.getMatch(1).c_str());
421   - int gen = atoi(m.getMatch(2).c_str());
422   - qpdf_offset_t offset = this->file->getLastOffset();
423   - insertXrefEntry(obj, 1, offset, gen, true);
424   - }
425   - else if ((! this->trailer.isInitialized()) &&
426   - trailer_re.match(line.c_str()))
427   - {
428   - // read "trailer"
429   - this->file->seek(this->file->getLastOffset(), SEEK_SET);
430   - readToken(this->file);
431   - QPDFObjectHandle t =
432   - readObject(this->file, "trailer", 0, 0, false);
433   - if (! t.isDictionary())
434   - {
435   - // Oh well. It was worth a try.
436   - }
437   - else
438   - {
439   - setTrailer(t);
440   - }
441   - }
  419 + else
  420 + {
  421 + if (t1.getType() == QPDFTokenizer::tt_integer)
  422 + {
  423 + QPDFTokenizer::Token t2 = readToken(this->file, true);
  424 + QPDFTokenizer::Token t3 = readToken(this->file, true);
  425 + if ((t2.getType() == QPDFTokenizer::tt_integer) &&
  426 + (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
  427 + {
  428 + in_obj = true;
  429 + int obj = atoi(t1.getValue().c_str());
  430 + int gen = atoi(t2.getValue().c_str());
  431 + insertXrefEntry(obj, 1, token_start, gen, true);
  432 + }
  433 + }
  434 + else if ((! this->trailer.isInitialized()) &&
  435 + (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")))
  436 + {
  437 + QPDFObjectHandle t =
  438 + readObject(this->file, "trailer", 0, 0, false);
  439 + if (! t.isDictionary())
  440 + {
  441 + // Oh well. It was worth a try.
  442 + }
  443 + else
  444 + {
  445 + setTrailer(t);
  446 + }
  447 + }
442 448 }
  449 + this->file->seek(next_line_start, SEEK_SET);
  450 + line_start = next_line_start;
443 451 }
444 452  
445 453 if (! this->trailer.isInitialized())
... ...