Commit c422b918b1402ac9e2052ff426c7e64d4342bc99

Authored by m-holger
1 parent 2643ed4d

Add new private method QPDF::readStream

include/qpdf/QPDF.hh
@@ -1008,6 +1008,7 @@ class QPDF @@ -1008,6 +1008,7 @@ class QPDF
1008 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); 1008 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
1009 QPDFObjectHandle readTrailer(); 1009 QPDFObjectHandle readTrailer();
1010 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); 1010 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
  1011 + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
1011 QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og); 1012 QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
1012 size_t recoverStreamLength( 1013 size_t recoverStreamLength(
1013 std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset); 1014 std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
libqpdf/QPDF.cc
@@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1296 std::shared_ptr<StringDecrypter> decrypter_ph; 1296 std::shared_ptr<StringDecrypter> decrypter_ph;
1297 StringDecrypter* decrypter = nullptr; 1297 StringDecrypter* decrypter = nullptr;
1298 if (m->encp->encrypted) { 1298 if (m->encp->encrypted) {
1299 - decrypter_ph = std::make_shared<StringDecrypter>(this, og); 1299 + decrypter_ph = std::make_unique<StringDecrypter>(this, og);
1300 decrypter = decrypter_ph.get(); 1300 decrypter = decrypter_ph.get();
1301 } 1301 }
1302 auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this) 1302 auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
@@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1309 // check for stream 1309 // check for stream
1310 qpdf_offset_t cur_offset = m->file->tell(); 1310 qpdf_offset_t cur_offset = m->file->tell();
1311 if (readToken(m->file).isWord("stream")) { 1311 if (readToken(m->file).isWord("stream")) {
1312 - // The PDF specification states that the word "stream" should be followed by either a  
1313 - // carriage return and a newline or by a newline alone. It specifically disallowed  
1314 - // following it by a carriage return alone since, in that case, there would be no way to  
1315 - // tell whether the NL in a CR NL sequence was part of the stream data. However, some  
1316 - // readers, including Adobe reader, accept a carriage return by itself when followed by  
1317 - // a non-newline character, so that's what we do here. We have also seen files that have  
1318 - // extraneous whitespace between the stream keyword and the newline.  
1319 - bool done = false;  
1320 - while (!done) {  
1321 - done = true;  
1322 - char ch;  
1323 - if (m->file->read(&ch, 1) == 0) {  
1324 - // A premature EOF here will result in some other problem that will get reported  
1325 - // at another time.  
1326 - } else if (ch == '\n') {  
1327 - // ready to read stream data  
1328 - QTC::TC("qpdf", "QPDF stream with NL only");  
1329 - } else if (ch == '\r') {  
1330 - // Read another character  
1331 - if (m->file->read(&ch, 1) != 0) {  
1332 - if (ch == '\n') {  
1333 - // Ready to read stream data  
1334 - QTC::TC("qpdf", "QPDF stream with CRNL");  
1335 - } else {  
1336 - // Treat the \r by itself as the whitespace after endstream and start  
1337 - // reading stream data in spite of not having seen a newline.  
1338 - QTC::TC("qpdf", "QPDF stream with CR only");  
1339 - m->file->unreadCh(ch);  
1340 - warn(damagedPDF(  
1341 - m->file->tell(),  
1342 - "stream keyword followed by carriage return only"));  
1343 - }  
1344 - }  
1345 - } else if (QUtil::is_space(ch)) {  
1346 - warn(damagedPDF(  
1347 - m->file->tell(), "stream keyword followed by extraneous whitespace"));  
1348 - done = false; 1312 + readStream(object, og, offset);
  1313 + } else {
  1314 + m->file->seek(cur_offset, SEEK_SET);
  1315 + }
  1316 + }
  1317 +
  1318 + // Override last_offset so that it points to the beginning of the object we just read
  1319 + m->file->setLastOffset(offset);
  1320 + return object;
  1321 +}
  1322 +
  1323 +// After reading stream dictionary and stream keyword, read rest of stream.
  1324 +void
  1325 +QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1326 +{
  1327 + // The PDF specification states that the word "stream" should be followed by either a carriage
  1328 + // return and a newline or by a newline alone. It specifically disallowed following it by a
  1329 + // carriage return alone since, in that case, there would be no way to tell whether the NL in a
  1330 + // CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
  1331 + // accept a carriage return by itself when followed by a non-newline character, so that's what
  1332 + // we do here. We have also seen files that have extraneous whitespace between the stream
  1333 + // keyword and the newline.
  1334 + bool done = false;
  1335 + while (!done) {
  1336 + done = true;
  1337 + char ch;
  1338 + if (m->file->read(&ch, 1) == 0) {
  1339 + // A premature EOF here will result in some other problem that will get reported at
  1340 + // another time.
  1341 + } else if (ch == '\n') {
  1342 + // ready to read stream data
  1343 + QTC::TC("qpdf", "QPDF stream with NL only");
  1344 + } else if (ch == '\r') {
  1345 + // Read another character
  1346 + if (m->file->read(&ch, 1) != 0) {
  1347 + if (ch == '\n') {
  1348 + // Ready to read stream data
  1349 + QTC::TC("qpdf", "QPDF stream with CRNL");
1349 } else { 1350 } else {
1350 - QTC::TC("qpdf", "QPDF stream without newline"); 1351 + // Treat the \r by itself as the whitespace after endstream and start reading
  1352 + // stream data in spite of not having seen a newline.
  1353 + QTC::TC("qpdf", "QPDF stream with CR only");
1351 m->file->unreadCh(ch); 1354 m->file->unreadCh(ch);
1352 warn(damagedPDF( 1355 warn(damagedPDF(
1353 - m->file->tell(), "stream keyword not followed by proper line terminator")); 1356 + m->file->tell(), "stream keyword followed by carriage return only"));
1354 } 1357 }
1355 } 1358 }
  1359 + } else if (QUtil::is_space(ch)) {
  1360 + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
  1361 + done = false;
  1362 + } else {
  1363 + QTC::TC("qpdf", "QPDF stream without newline");
  1364 + m->file->unreadCh(ch);
  1365 + warn(damagedPDF(
  1366 + m->file->tell(), "stream keyword not followed by proper line terminator"));
  1367 + }
  1368 + }
1356 1369
1357 - // Must get offset before accessing any additional objects since resolving a previously  
1358 - // unresolved indirect object will change file position.  
1359 - qpdf_offset_t stream_offset = m->file->tell();  
1360 - size_t length = 0;  
1361 -  
1362 - try {  
1363 - auto length_obj = object.getKey("/Length"); 1370 + // Must get offset before accessing any additional objects since resolving a previously
  1371 + // unresolved indirect object will change file position.
  1372 + qpdf_offset_t stream_offset = m->file->tell();
  1373 + size_t length = 0;
1364 1374
1365 - if (!length_obj.isInteger()) {  
1366 - if (length_obj.isNull()) {  
1367 - QTC::TC("qpdf", "QPDF stream without length");  
1368 - throw damagedPDF(offset, "stream dictionary lacks /Length key");  
1369 - }  
1370 - QTC::TC("qpdf", "QPDF stream length not integer");  
1371 - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");  
1372 - } 1375 + try {
  1376 + auto length_obj = object.getKey("/Length");
1373 1377
1374 - length = toS(length_obj.getUIntValue());  
1375 - // Seek in two steps to avoid potential integer overflow  
1376 - m->file->seek(stream_offset, SEEK_SET);  
1377 - m->file->seek(toO(length), SEEK_CUR);  
1378 - if (!readToken(m->file).isWord("endstream")) {  
1379 - QTC::TC("qpdf", "QPDF missing endstream");  
1380 - throw damagedPDF("expected endstream");  
1381 - }  
1382 - } catch (QPDFExc& e) {  
1383 - if (m->attempt_recovery) {  
1384 - warn(e);  
1385 - length = recoverStreamLength(m->file, og, stream_offset);  
1386 - } else {  
1387 - throw;  
1388 - } 1378 + if (!length_obj.isInteger()) {
  1379 + if (length_obj.isNull()) {
  1380 + QTC::TC("qpdf", "QPDF stream without length");
  1381 + throw damagedPDF(offset, "stream dictionary lacks /Length key");
1389 } 1382 }
1390 - object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); 1383 + QTC::TC("qpdf", "QPDF stream length not integer");
  1384 + throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
  1385 + }
  1386 +
  1387 + length = toS(length_obj.getUIntValue());
  1388 + // Seek in two steps to avoid potential integer overflow
  1389 + m->file->seek(stream_offset, SEEK_SET);
  1390 + m->file->seek(toO(length), SEEK_CUR);
  1391 + if (!readToken(m->file).isWord("endstream")) {
  1392 + QTC::TC("qpdf", "QPDF missing endstream");
  1393 + throw damagedPDF("expected endstream");
  1394 + }
  1395 + } catch (QPDFExc& e) {
  1396 + if (m->attempt_recovery) {
  1397 + warn(e);
  1398 + length = recoverStreamLength(m->file, og, stream_offset);
1391 } else { 1399 } else {
1392 - m->file->seek(cur_offset, SEEK_SET); 1400 + throw;
1393 } 1401 }
1394 } 1402 }
1395 -  
1396 - // Override last_offset so that it points to the beginning of the object we just read  
1397 - m->file->setLastOffset(offset);  
1398 - return object; 1403 + object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
1399 } 1404 }
1400 1405
1401 QPDFObjectHandle 1406 QPDFObjectHandle