Commit c422b918b1402ac9e2052ff426c7e64d4342bc99
1 parent
2643ed4d
Add new private method QPDF::readStream
Showing
2 changed files
with
82 additions
and
76 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -1008,6 +1008,7 @@ class QPDF |
| 1008 | 1008 | void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); |
| 1009 | 1009 | QPDFObjectHandle readTrailer(); |
| 1010 | 1010 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
| 1011 | + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | |
| 1011 | 1012 | QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og); |
| 1012 | 1013 | size_t recoverStreamLength( |
| 1013 | 1014 | std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) |
| 1296 | 1296 | std::shared_ptr<StringDecrypter> decrypter_ph; |
| 1297 | 1297 | StringDecrypter* decrypter = nullptr; |
| 1298 | 1298 | if (m->encp->encrypted) { |
| 1299 | - decrypter_ph = std::make_shared<StringDecrypter>(this, og); | |
| 1299 | + decrypter_ph = std::make_unique<StringDecrypter>(this, og); | |
| 1300 | 1300 | decrypter = decrypter_ph.get(); |
| 1301 | 1301 | } |
| 1302 | 1302 | auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this) |
| ... | ... | @@ -1309,93 +1309,98 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) |
| 1309 | 1309 | // check for stream |
| 1310 | 1310 | qpdf_offset_t cur_offset = m->file->tell(); |
| 1311 | 1311 | if (readToken(m->file).isWord("stream")) { |
| 1312 | - // The PDF specification states that the word "stream" should be followed by either a | |
| 1313 | - // carriage return and a newline or by a newline alone. It specifically disallowed | |
| 1314 | - // following it by a carriage return alone since, in that case, there would be no way to | |
| 1315 | - // tell whether the NL in a CR NL sequence was part of the stream data. However, some | |
| 1316 | - // readers, including Adobe reader, accept a carriage return by itself when followed by | |
| 1317 | - // a non-newline character, so that's what we do here. We have also seen files that have | |
| 1318 | - // extraneous whitespace between the stream keyword and the newline. | |
| 1319 | - bool done = false; | |
| 1320 | - while (!done) { | |
| 1321 | - done = true; | |
| 1322 | - char ch; | |
| 1323 | - if (m->file->read(&ch, 1) == 0) { | |
| 1324 | - // A premature EOF here will result in some other problem that will get reported | |
| 1325 | - // at another time. | |
| 1326 | - } else if (ch == '\n') { | |
| 1327 | - // ready to read stream data | |
| 1328 | - QTC::TC("qpdf", "QPDF stream with NL only"); | |
| 1329 | - } else if (ch == '\r') { | |
| 1330 | - // Read another character | |
| 1331 | - if (m->file->read(&ch, 1) != 0) { | |
| 1332 | - if (ch == '\n') { | |
| 1333 | - // Ready to read stream data | |
| 1334 | - QTC::TC("qpdf", "QPDF stream with CRNL"); | |
| 1335 | - } else { | |
| 1336 | - // Treat the \r by itself as the whitespace after endstream and start | |
| 1337 | - // reading stream data in spite of not having seen a newline. | |
| 1338 | - QTC::TC("qpdf", "QPDF stream with CR only"); | |
| 1339 | - m->file->unreadCh(ch); | |
| 1340 | - warn(damagedPDF( | |
| 1341 | - m->file->tell(), | |
| 1342 | - "stream keyword followed by carriage return only")); | |
| 1343 | - } | |
| 1344 | - } | |
| 1345 | - } else if (QUtil::is_space(ch)) { | |
| 1346 | - warn(damagedPDF( | |
| 1347 | - m->file->tell(), "stream keyword followed by extraneous whitespace")); | |
| 1348 | - done = false; | |
| 1312 | + readStream(object, og, offset); | |
| 1313 | + } else { | |
| 1314 | + m->file->seek(cur_offset, SEEK_SET); | |
| 1315 | + } | |
| 1316 | + } | |
| 1317 | + | |
| 1318 | + // Override last_offset so that it points to the beginning of the object we just read | |
| 1319 | + m->file->setLastOffset(offset); | |
| 1320 | + return object; | |
| 1321 | +} | |
| 1322 | + | |
| 1323 | +// After reading stream dictionary and stream keyword, read rest of stream. | |
| 1324 | +void | |
| 1325 | +QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) | |
| 1326 | +{ | |
| 1327 | + // The PDF specification states that the word "stream" should be followed by either a carriage | |
| 1328 | + // return and a newline or by a newline alone. It specifically disallowed following it by a | |
| 1329 | + // carriage return alone since, in that case, there would be no way to tell whether the NL in a | |
| 1330 | + // CR NL sequence was part of the stream data. However, some readers, including Adobe reader, | |
| 1331 | + // accept a carriage return by itself when followed by a non-newline character, so that's what | |
| 1332 | + // we do here. We have also seen files that have extraneous whitespace between the stream | |
| 1333 | + // keyword and the newline. | |
| 1334 | + bool done = false; | |
| 1335 | + while (!done) { | |
| 1336 | + done = true; | |
| 1337 | + char ch; | |
| 1338 | + if (m->file->read(&ch, 1) == 0) { | |
| 1339 | + // A premature EOF here will result in some other problem that will get reported at | |
| 1340 | + // another time. | |
| 1341 | + } else if (ch == '\n') { | |
| 1342 | + // ready to read stream data | |
| 1343 | + QTC::TC("qpdf", "QPDF stream with NL only"); | |
| 1344 | + } else if (ch == '\r') { | |
| 1345 | + // Read another character | |
| 1346 | + if (m->file->read(&ch, 1) != 0) { | |
| 1347 | + if (ch == '\n') { | |
| 1348 | + // Ready to read stream data | |
| 1349 | + QTC::TC("qpdf", "QPDF stream with CRNL"); | |
| 1349 | 1350 | } else { |
| 1350 | - QTC::TC("qpdf", "QPDF stream without newline"); | |
| 1351 | + // Treat the \r by itself as the whitespace after endstream and start reading | |
| 1352 | + // stream data in spite of not having seen a newline. | |
| 1353 | + QTC::TC("qpdf", "QPDF stream with CR only"); | |
| 1351 | 1354 | m->file->unreadCh(ch); |
| 1352 | 1355 | warn(damagedPDF( |
| 1353 | - m->file->tell(), "stream keyword not followed by proper line terminator")); | |
| 1356 | + m->file->tell(), "stream keyword followed by carriage return only")); | |
| 1354 | 1357 | } |
| 1355 | 1358 | } |
| 1359 | + } else if (QUtil::is_space(ch)) { | |
| 1360 | + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); | |
| 1361 | + done = false; | |
| 1362 | + } else { | |
| 1363 | + QTC::TC("qpdf", "QPDF stream without newline"); | |
| 1364 | + m->file->unreadCh(ch); | |
| 1365 | + warn(damagedPDF( | |
| 1366 | + m->file->tell(), "stream keyword not followed by proper line terminator")); | |
| 1367 | + } | |
| 1368 | + } | |
| 1356 | 1369 | |
| 1357 | - // Must get offset before accessing any additional objects since resolving a previously | |
| 1358 | - // unresolved indirect object will change file position. | |
| 1359 | - qpdf_offset_t stream_offset = m->file->tell(); | |
| 1360 | - size_t length = 0; | |
| 1361 | - | |
| 1362 | - try { | |
| 1363 | - auto length_obj = object.getKey("/Length"); | |
| 1370 | + // Must get offset before accessing any additional objects since resolving a previously | |
| 1371 | + // unresolved indirect object will change file position. | |
| 1372 | + qpdf_offset_t stream_offset = m->file->tell(); | |
| 1373 | + size_t length = 0; | |
| 1364 | 1374 | |
| 1365 | - if (!length_obj.isInteger()) { | |
| 1366 | - if (length_obj.isNull()) { | |
| 1367 | - QTC::TC("qpdf", "QPDF stream without length"); | |
| 1368 | - throw damagedPDF(offset, "stream dictionary lacks /Length key"); | |
| 1369 | - } | |
| 1370 | - QTC::TC("qpdf", "QPDF stream length not integer"); | |
| 1371 | - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); | |
| 1372 | - } | |
| 1375 | + try { | |
| 1376 | + auto length_obj = object.getKey("/Length"); | |
| 1373 | 1377 | |
| 1374 | - length = toS(length_obj.getUIntValue()); | |
| 1375 | - // Seek in two steps to avoid potential integer overflow | |
| 1376 | - m->file->seek(stream_offset, SEEK_SET); | |
| 1377 | - m->file->seek(toO(length), SEEK_CUR); | |
| 1378 | - if (!readToken(m->file).isWord("endstream")) { | |
| 1379 | - QTC::TC("qpdf", "QPDF missing endstream"); | |
| 1380 | - throw damagedPDF("expected endstream"); | |
| 1381 | - } | |
| 1382 | - } catch (QPDFExc& e) { | |
| 1383 | - if (m->attempt_recovery) { | |
| 1384 | - warn(e); | |
| 1385 | - length = recoverStreamLength(m->file, og, stream_offset); | |
| 1386 | - } else { | |
| 1387 | - throw; | |
| 1388 | - } | |
| 1378 | + if (!length_obj.isInteger()) { | |
| 1379 | + if (length_obj.isNull()) { | |
| 1380 | + QTC::TC("qpdf", "QPDF stream without length"); | |
| 1381 | + throw damagedPDF(offset, "stream dictionary lacks /Length key"); | |
| 1389 | 1382 | } |
| 1390 | - object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); | |
| 1383 | + QTC::TC("qpdf", "QPDF stream length not integer"); | |
| 1384 | + throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); | |
| 1385 | + } | |
| 1386 | + | |
| 1387 | + length = toS(length_obj.getUIntValue()); | |
| 1388 | + // Seek in two steps to avoid potential integer overflow | |
| 1389 | + m->file->seek(stream_offset, SEEK_SET); | |
| 1390 | + m->file->seek(toO(length), SEEK_CUR); | |
| 1391 | + if (!readToken(m->file).isWord("endstream")) { | |
| 1392 | + QTC::TC("qpdf", "QPDF missing endstream"); | |
| 1393 | + throw damagedPDF("expected endstream"); | |
| 1394 | + } | |
| 1395 | + } catch (QPDFExc& e) { | |
| 1396 | + if (m->attempt_recovery) { | |
| 1397 | + warn(e); | |
| 1398 | + length = recoverStreamLength(m->file, og, stream_offset); | |
| 1391 | 1399 | } else { |
| 1392 | - m->file->seek(cur_offset, SEEK_SET); | |
| 1400 | + throw; | |
| 1393 | 1401 | } |
| 1394 | 1402 | } |
| 1395 | - | |
| 1396 | - // Override last_offset so that it points to the beginning of the object we just read | |
| 1397 | - m->file->setLastOffset(offset); | |
| 1398 | - return object; | |
| 1403 | + object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); | |
| 1399 | 1404 | } |
| 1400 | 1405 | |
| 1401 | 1406 | QPDFObjectHandle | ... | ... |