Commit 1765c6ec20625b99451acceb1ffcaaca812f379e

Authored by Jay Berkenbilt
1 parent 296b679d

Find header without PCRE

include/qpdf/QPDF.hh
... ... @@ -1027,6 +1027,9 @@ class QPDF
1027 1027 bool (QPDF::*checker)();
1028 1028 };
1029 1029  
  1030 + // Methods to support pattern finding
  1031 + bool findHeader();
  1032 +
1030 1033 // methods to support linearization checking -- implemented in
1031 1034 // QPDF_linearization.cc
1032 1035 void readLinearizationData();
... ...
libqpdf/QPDF.cc
... ... @@ -202,27 +202,45 @@ QPDF::getWarnings()
202 202 return result;
203 203 }
204 204  
205   -void
206   -QPDF::parse(char const* password)
  205 +bool
  206 +QPDF::findHeader()
207 207 {
208   - PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b");
209   - PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
210   -
211   - if (password)
212   - {
213   - this->provided_password = password;
  208 + qpdf_offset_t global_offset = this->file->tell();
  209 + std::string line = this->file->readLine(1024);
  210 + char const* p = line.c_str();
  211 + if (strncmp(p, "%PDF-", 5) != 0)
  212 + {
  213 + throw std::logic_error("findHeader is not looking at %PDF-");
  214 + }
  215 + p += 5;
  216 + std::string version;
  217 + // Note: The string returned by line.c_str() is always
  218 + // null-terminated. The code below never overruns the buffer
  219 + // because a null character always short-circuits further
  220 + // advancement.
  221 + bool valid = QUtil::is_digit(*p);
  222 + if (valid)
  223 + {
  224 + while (QUtil::is_digit(*p))
  225 + {
  226 + version.append(1, *p++);
  227 + }
  228 + if ((*p == '.') && QUtil::is_digit(*(p+1)))
  229 + {
  230 + version.append(1, *p++);
  231 + while (QUtil::is_digit(*p))
  232 + {
  233 + version.append(1, *p++);
  234 + }
  235 + }
  236 + else
  237 + {
  238 + valid = false;
  239 + }
214 240 }
215   -
216   - // Find the header anywhere in the first 1024 bytes of the file,
217   - // plus add a little extra space for the header itself.
218   - char buffer[1045];
219   - memset(buffer, '\0', sizeof(buffer));
220   - this->file->read(buffer, sizeof(buffer) - 1);
221   - std::string line(buffer);
222   - PCRE::Match m1 = header_re.match(line.c_str());
223   - if (m1)
  241 + if (valid)
224 242 {
225   - size_t global_offset = m1.getMatch(1).length();
  243 + this->pdf_version = version;
226 244 if (global_offset != 0)
227 245 {
228 246 // Empirical evidence strongly suggests that when there is
... ... @@ -232,9 +250,23 @@ QPDF::parse(char const* password)
232 250 QTC::TC("qpdf", "QPDF global offset");
233 251 this->file = new OffsetInputSource(this->file, global_offset);
234 252 }
235   - this->pdf_version = m1.getMatch(2);
236 253 }
237   - else
  254 + return valid;
  255 +}
  256 +
  257 +void
  258 +QPDF::parse(char const* password)
  259 +{
  260 + PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
  261 +
  262 + if (password)
  263 + {
  264 + this->provided_password = password;
  265 + }
  266 +
  267 + // Find the header anywhere in the first 1024 bytes of the file.
  268 + PatternFinder hf(*this, &QPDF::findHeader);
  269 + if (! this->file->findFirst("%PDF-", 0, 1024, hf))
238 270 {
239 271 QTC::TC("qpdf", "QPDF not a pdf file");
240 272 warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
... ...
qpdf/qtest/qpdf/issue-118.out
  1 +WARNING: issue-118.pdf: can't find PDF header
1 2 WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0
2 3 WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream
3 4 issue-118.pdf (file position 732): unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-51.out
  1 +WARNING: issue-51.pdf: can't find PDF header
1 2 WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9)
2 3 WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj
3 4 WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj
... ...