Commit 03aa9679ac16be44348f29a97c2c36145ae5a35a

Authored by Jay Berkenbilt
1 parent 1765c6ec

Find starxref without PCRE

include/qpdf/QPDF.hh
... ... @@ -1029,6 +1029,7 @@ class QPDF
1029 1029  
1030 1030 // Methods to support pattern finding
1031 1031 bool findHeader();
  1032 + bool findStartxref();
1032 1033  
1033 1034 // methods to support linearization checking -- implemented in
1034 1035 // QPDF_linearization.cc
... ...
libqpdf/QPDF.cc
... ... @@ -254,11 +254,26 @@ QPDF::findHeader()
254 254 return valid;
255 255 }
256 256  
  257 +bool
  258 +QPDF::findStartxref()
  259 +{
  260 + QPDFTokenizer::Token t = readToken(this->file, true);
  261 + if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref"))
  262 + {
  263 + t = readToken(this->file, true);
  264 + if (t.getType() == QPDFTokenizer::tt_integer)
  265 + {
  266 + // Position in front of offset token
  267 + this->file->seek(this->file->getLastOffset(), SEEK_SET);
  268 + return true;
  269 + }
  270 + }
  271 + return false;
  272 +}
  273 +
257 274 void
258 275 QPDF::parse(char const* password)
259 276 {
260   - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
261   -
262 277 if (password)
263 278 {
264 279 this->provided_password = password;
... ... @@ -283,47 +298,25 @@ QPDF::parse(char const* password)
283 298 // PDF spec says %%EOF must be found within the last 1024 bytes of
284 299 // the file. We add an extra 30 characters to leave room for the
285 300 // startxref stuff.
286   - static int const tbuf_size = 1054;
287 301 this->file->seek(0, SEEK_END);
288   - if (this->file->tell() > tbuf_size)
  302 + qpdf_offset_t end_offset = this->file->tell();
  303 + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
  304 + PatternFinder sf(*this, &QPDF::findStartxref);
  305 + qpdf_offset_t xref_offset = 0;
  306 + if (this->file->findLast("startxref", start_offset, 0, sf))
289 307 {
290   - this->file->seek(-tbuf_size, SEEK_END);
291   - }
292   - else
293   - {
294   - this->file->rewind();
295   - }
296   - char* buf = new char[tbuf_size + 1];
297   - // Put buf in an array-style PointerHolder to guarantee deletion
298   - // of buf.
299   - PointerHolder<char> b(true, buf);
300   - memset(buf, '\0', tbuf_size + 1);
301   - this->file->read(buf, tbuf_size);
302   -
303   - // Since buf may contain null characters, we can't do a regexp
304   - // search on buf directly. Find the last occurrence within buf
305   - // where the regexp matches.
306   - char* p = buf;
307   - char const* candidate = "";
308   - while ((p = static_cast<char*>(memchr(p, 's', tbuf_size - (p - buf)))) != 0)
309   - {
310   - if (eof_re.match(p))
311   - {
312   - candidate = p;
313   - }
314   - ++p;
  308 + xref_offset = QUtil::string_to_ll(
  309 + readToken(this->file).getValue().c_str());
315 310 }
316 311  
317 312 try
318 313 {
319   - PCRE::Match m2 = eof_re.match(candidate);
320   - if (! m2)
  314 + if (xref_offset == 0)
321 315 {
322 316 QTC::TC("qpdf", "QPDF can't find startxref");
323 317 throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
324 318 "can't find startxref");
325 319 }
326   - qpdf_offset_t xref_offset = QUtil::string_to_ll(m2.getMatch(1).c_str());
327 320 read_xref(xref_offset);
328 321 }
329 322 catch (QPDFExc& e)
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -521,7 +521,7 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
521 521 {
522 522 if (allow_bad)
523 523 {
524   -// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
  524 + QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
525 525 }
526 526 else
527 527 {
... ...
qpdf/qpdf.testcov
... ... @@ -290,3 +290,4 @@ qpdf read args from file 0
290 290 qpdf single-pages %d 0
291 291 qpdf single-pages .pdf 0
292 292 qpdf single-pages other 0
  293 +QPDFTokenizer allowing bad token 0
... ...
qpdf/qtest/qpdf/issue-117.out
1 1 WARNING: issue-117.pdf: file is damaged
2   -WARNING: issue-117.pdf: can't find startxref
  2 +WARNING: issue-117.pdf (file position 3526): xref not found
3 3 WARNING: issue-117.pdf: Attempting to reconstruct cross-reference table
4 4 WARNING: issue-117.pdf (file position 66): loop detected resolving object 2 0
5 5 WARNING: issue-117.pdf (object 2 0, file position 22): /Length key in stream dictionary is not an integer
... ...