Commit c422b918b1402ac9e2052ff426c7e64d4342bc99

Authored by m-holger
1 parent 2643ed4d

Add new private method QPDF::readStream

include/qpdf/QPDF.hh
... ... @@ -1008,6 +1008,7 @@ class QPDF
1008 1008 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
1009 1009 QPDFObjectHandle readTrailer();
1010 1010 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
  1011 + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
1011 1012 QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
1012 1013 size_t recoverStreamLength(
1013 1014 std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
... ...
libqpdf/QPDF.cc
... ... @@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1296 1296 std::shared_ptr<StringDecrypter> decrypter_ph;
1297 1297 StringDecrypter* decrypter = nullptr;
1298 1298 if (m->encp->encrypted) {
1299   - decrypter_ph = std::make_shared<StringDecrypter>(this, og);
  1299 + decrypter_ph = std::make_unique<StringDecrypter>(this, og);
1300 1300 decrypter = decrypter_ph.get();
1301 1301 }
1302 1302 auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
... ... @@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1309 1309 // check for stream
1310 1310 qpdf_offset_t cur_offset = m->file->tell();
1311 1311 if (readToken(m->file).isWord("stream")) {
1312   - // The PDF specification states that the word "stream" should be followed by either a
1313   - // carriage return and a newline or by a newline alone. It specifically disallowed
1314   - // following it by a carriage return alone since, in that case, there would be no way to
1315   - // tell whether the NL in a CR NL sequence was part of the stream data. However, some
1316   - // readers, including Adobe reader, accept a carriage return by itself when followed by
1317   - // a non-newline character, so that's what we do here. We have also seen files that have
1318   - // extraneous whitespace between the stream keyword and the newline.
1319   - bool done = false;
1320   - while (!done) {
1321   - done = true;
1322   - char ch;
1323   - if (m->file->read(&ch, 1) == 0) {
1324   - // A premature EOF here will result in some other problem that will get reported
1325   - // at another time.
1326   - } else if (ch == '\n') {
1327   - // ready to read stream data
1328   - QTC::TC("qpdf", "QPDF stream with NL only");
1329   - } else if (ch == '\r') {
1330   - // Read another character
1331   - if (m->file->read(&ch, 1) != 0) {
1332   - if (ch == '\n') {
1333   - // Ready to read stream data
1334   - QTC::TC("qpdf", "QPDF stream with CRNL");
1335   - } else {
1336   - // Treat the \r by itself as the whitespace after endstream and start
1337   - // reading stream data in spite of not having seen a newline.
1338   - QTC::TC("qpdf", "QPDF stream with CR only");
1339   - m->file->unreadCh(ch);
1340   - warn(damagedPDF(
1341   - m->file->tell(),
1342   - "stream keyword followed by carriage return only"));
1343   - }
1344   - }
1345   - } else if (QUtil::is_space(ch)) {
1346   - warn(damagedPDF(
1347   - m->file->tell(), "stream keyword followed by extraneous whitespace"));
1348   - done = false;
  1312 + readStream(object, og, offset);
  1313 + } else {
  1314 + m->file->seek(cur_offset, SEEK_SET);
  1315 + }
  1316 + }
  1317 +
  1318 + // Override last_offset so that it points to the beginning of the object we just read
  1319 + m->file->setLastOffset(offset);
  1320 + return object;
  1321 +}
  1322 +
  1323 +// After reading stream dictionary and stream keyword, read rest of stream.
  1324 +void
  1325 +QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1326 +{
  1327 + // The PDF specification states that the word "stream" should be followed by either a carriage
  1328 + // return and a newline or by a newline alone. It specifically disallowed following it by a
  1329 + // carriage return alone since, in that case, there would be no way to tell whether the NL in a
  1330 + // CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
  1331 + // accept a carriage return by itself when followed by a non-newline character, so that's what
  1332 + // we do here. We have also seen files that have extraneous whitespace between the stream
  1333 + // keyword and the newline.
  1334 + bool done = false;
  1335 + while (!done) {
  1336 + done = true;
  1337 + char ch;
  1338 + if (m->file->read(&ch, 1) == 0) {
  1339 + // A premature EOF here will result in some other problem that will get reported at
  1340 + // another time.
  1341 + } else if (ch == '\n') {
  1342 + // ready to read stream data
  1343 + QTC::TC("qpdf", "QPDF stream with NL only");
  1344 + } else if (ch == '\r') {
  1345 + // Read another character
  1346 + if (m->file->read(&ch, 1) != 0) {
  1347 + if (ch == '\n') {
  1348 + // Ready to read stream data
  1349 + QTC::TC("qpdf", "QPDF stream with CRNL");
1349 1350 } else {
1350   - QTC::TC("qpdf", "QPDF stream without newline");
  1351 + // Treat the \r by itself as the whitespace after endstream and start reading
  1352 + // stream data in spite of not having seen a newline.
  1353 + QTC::TC("qpdf", "QPDF stream with CR only");
1351 1354 m->file->unreadCh(ch);
1352 1355 warn(damagedPDF(
1353   - m->file->tell(), "stream keyword not followed by proper line terminator"));
  1356 + m->file->tell(), "stream keyword followed by carriage return only"));
1354 1357 }
1355 1358 }
  1359 + } else if (QUtil::is_space(ch)) {
  1360 + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
  1361 + done = false;
  1362 + } else {
  1363 + QTC::TC("qpdf", "QPDF stream without newline");
  1364 + m->file->unreadCh(ch);
  1365 + warn(damagedPDF(
  1366 + m->file->tell(), "stream keyword not followed by proper line terminator"));
  1367 + }
  1368 + }
1356 1369  
1357   - // Must get offset before accessing any additional objects since resolving a previously
1358   - // unresolved indirect object will change file position.
1359   - qpdf_offset_t stream_offset = m->file->tell();
1360   - size_t length = 0;
1361   -
1362   - try {
1363   - auto length_obj = object.getKey("/Length");
  1370 + // Must get offset before accessing any additional objects since resolving a previously
  1371 + // unresolved indirect object will change file position.
  1372 + qpdf_offset_t stream_offset = m->file->tell();
  1373 + size_t length = 0;
1364 1374  
1365   - if (!length_obj.isInteger()) {
1366   - if (length_obj.isNull()) {
1367   - QTC::TC("qpdf", "QPDF stream without length");
1368   - throw damagedPDF(offset, "stream dictionary lacks /Length key");
1369   - }
1370   - QTC::TC("qpdf", "QPDF stream length not integer");
1371   - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
1372   - }
  1375 + try {
  1376 + auto length_obj = object.getKey("/Length");
1373 1377  
1374   - length = toS(length_obj.getUIntValue());
1375   - // Seek in two steps to avoid potential integer overflow
1376   - m->file->seek(stream_offset, SEEK_SET);
1377   - m->file->seek(toO(length), SEEK_CUR);
1378   - if (!readToken(m->file).isWord("endstream")) {
1379   - QTC::TC("qpdf", "QPDF missing endstream");
1380   - throw damagedPDF("expected endstream");
1381   - }
1382   - } catch (QPDFExc& e) {
1383   - if (m->attempt_recovery) {
1384   - warn(e);
1385   - length = recoverStreamLength(m->file, og, stream_offset);
1386   - } else {
1387   - throw;
1388   - }
  1378 + if (!length_obj.isInteger()) {
  1379 + if (length_obj.isNull()) {
  1380 + QTC::TC("qpdf", "QPDF stream without length");
  1381 + throw damagedPDF(offset, "stream dictionary lacks /Length key");
1389 1382 }
1390   - object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
  1383 + QTC::TC("qpdf", "QPDF stream length not integer");
  1384 + throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
  1385 + }
  1386 +
  1387 + length = toS(length_obj.getUIntValue());
  1388 + // Seek in two steps to avoid potential integer overflow
  1389 + m->file->seek(stream_offset, SEEK_SET);
  1390 + m->file->seek(toO(length), SEEK_CUR);
  1391 + if (!readToken(m->file).isWord("endstream")) {
  1392 + QTC::TC("qpdf", "QPDF missing endstream");
  1393 + throw damagedPDF("expected endstream");
  1394 + }
  1395 + } catch (QPDFExc& e) {
  1396 + if (m->attempt_recovery) {
  1397 + warn(e);
  1398 + length = recoverStreamLength(m->file, og, stream_offset);
1391 1399 } else {
1392   - m->file->seek(cur_offset, SEEK_SET);
  1400 + throw;
1393 1401 }
1394 1402 }
1395   -
1396   - // Override last_offset so that it points to the beginning of the object we just read
1397   - m->file->setLastOffset(offset);
1398   - return object;
  1403 + object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
1399 1404 }
1400 1405  
1401 1406 QPDFObjectHandle
... ...