Commit 98a843c2a2e09df6457c023a8da52faa0d977a12

Authored by Jay Berkenbilt
1 parent ca5b1d26

Reconstruct xref without PCRE

Showing 1 changed file with 43 additions and 35 deletions
libqpdf/QPDF.cc
@@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e)
370 370
371 this->reconstructed_xref = true; 371 this->reconstructed_xref = true;
372 372
373 - PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b");  
374 - PCRE endobj_re("^\\s*endobj\\b");  
375 - PCRE trailer_re("^\\s*trailer\\b");  
376 -  
377 warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, 373 warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
378 "file is damaged")); 374 "file is damaged"));
379 warn(e); 375 warn(e);
@@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e)
401 qpdf_offset_t eof = this->file->tell(); 397 qpdf_offset_t eof = this->file->tell();
402 this->file->seek(0, SEEK_SET); 398 this->file->seek(0, SEEK_SET);
403 bool in_obj = false; 399 bool in_obj = false;
  400 + qpdf_offset_t line_start = 0;
404 while (this->file->tell() < eof) 401 while (this->file->tell() < eof)
405 { 402 {
406 - std::string line = this->file->readLine(50);  
407 - if (in_obj) 403 + this->file->findAndSkipNextEOL();
  404 + qpdf_offset_t next_line_start = this->file->tell();
  405 + this->file->seek(line_start, SEEK_SET);
  406 + QPDFTokenizer::Token t1 = readToken(this->file, true);
  407 + qpdf_offset_t token_start = this->file->tell() - t1.getValue().length();
  408 + if (token_start >= next_line_start)
  409 + {
  410 + // don't process yet
  411 + }
  412 + else if (in_obj)
408 { 413 {
409 - if (endobj_re.match(line.c_str())) 414 + if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))
410 { 415 {
411 in_obj = false; 416 in_obj = false;
412 } 417 }
413 } 418 }
414 - else  
415 - {  
416 - PCRE::Match m = obj_re.match(line.c_str());  
417 - if (m)  
418 - {  
419 - in_obj = true;  
420 - int obj = atoi(m.getMatch(1).c_str());  
421 - int gen = atoi(m.getMatch(2).c_str());  
422 - qpdf_offset_t offset = this->file->getLastOffset();  
423 - insertXrefEntry(obj, 1, offset, gen, true);  
424 - }  
425 - else if ((! this->trailer.isInitialized()) &&  
426 - trailer_re.match(line.c_str()))  
427 - {  
428 - // read "trailer"  
429 - this->file->seek(this->file->getLastOffset(), SEEK_SET);  
430 - readToken(this->file);  
431 - QPDFObjectHandle t =  
432 - readObject(this->file, "trailer", 0, 0, false);  
433 - if (! t.isDictionary())  
434 - {  
435 - // Oh well. It was worth a try.  
436 - }  
437 - else  
438 - {  
439 - setTrailer(t);  
440 - }  
441 - } 419 + else
  420 + {
  421 + if (t1.getType() == QPDFTokenizer::tt_integer)
  422 + {
  423 + QPDFTokenizer::Token t2 = readToken(this->file, true);
  424 + QPDFTokenizer::Token t3 = readToken(this->file, true);
  425 + if ((t2.getType() == QPDFTokenizer::tt_integer) &&
  426 + (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
  427 + {
  428 + in_obj = true;
  429 + int obj = atoi(t1.getValue().c_str());
  430 + int gen = atoi(t2.getValue().c_str());
  431 + insertXrefEntry(obj, 1, token_start, gen, true);
  432 + }
  433 + }
  434 + else if ((! this->trailer.isInitialized()) &&
  435 + (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")))
  436 + {
  437 + QPDFObjectHandle t =
  438 + readObject(this->file, "trailer", 0, 0, false);
  439 + if (! t.isDictionary())
  440 + {
  441 + // Oh well. It was worth a try.
  442 + }
  443 + else
  444 + {
  445 + setTrailer(t);
  446 + }
  447 + }
442 } 448 }
  449 + this->file->seek(next_line_start, SEEK_SET);
  450 + line_start = next_line_start;
443 } 451 }
444 452
445 if (! this->trailer.isInitialized()) 453 if (! this->trailer.isInitialized())