Commit 6d640c569a1aea2e38aeb3cbe2527a586cc864ea

Authored by m-holger
1 parent 42c51119

Add additional object id sanity checks

Ensure objects with impossibly large ids are ignored.
include/qpdf/QPDF.hh
... ... @@ -1502,6 +1502,9 @@ class QPDF
1502 1502 std::shared_ptr<EncryptionParameters> encp;
1503 1503 std::string pdf_version;
1504 1504 std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
  1505 + // Various tables are indexed by object id, with potential size id + 1
  1506 + int xref_table_max_id{std::numeric_limits<int>::max() - 1};
  1507 + qpdf_offset_t xref_table_max_offset{0};
1505 1508 std::set<int> deleted_objects;
1506 1509 std::map<QPDFObjGen, ObjCache> obj_cache;
1507 1510 std::set<QPDFObjGen> resolving;
... ...
libqpdf/QPDF.cc
... ... @@ -441,6 +441,12 @@ QPDF::parse(char const* password)
441 441 // 30 characters to leave room for the startxref stuff.
442 442 m->file->seek(0, SEEK_END);
443 443 qpdf_offset_t end_offset = m->file->tell();
  444 + m->xref_table_max_offset = end_offset;
  445 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  446 + // scenarios at least 3 bytes are required.
  447 + if (m->xref_table_max_id > m->xref_table_max_offset / 3) {
  448 + m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
  449 + }
444 450 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
445 451 PatternFinder sf(*this, &QPDF::findStartxref);
446 452 qpdf_offset_t xref_offset = 0;
... ... @@ -554,9 +560,6 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
554 560  
555 561 m->file->seek(0, SEEK_END);
556 562 qpdf_offset_t eof = m->file->tell();
557   - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
558   - // scenarios at leat 3 bytes are required.
559   - auto max_obj_id = eof / 3;
560 563 m->file->seek(0, SEEK_SET);
561 564 qpdf_offset_t line_start = 0;
562 565 // Don't allow very long tokens here during recovery.
... ... @@ -574,7 +577,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
574 577 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
575 578 int obj = QUtil::string_to_int(t1.getValue().c_str());
576 579 int gen = QUtil::string_to_int(t2.getValue().c_str());
577   - if (obj <= max_obj_id) {
  580 + if (obj <= m->xref_table_max_id) {
578 581 insertReconstructedXrefEntry(obj, token_start, gen);
579 582 } else {
580 583 warn(damagedPDF(
... ... @@ -709,7 +712,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
709 712 int size = m->trailer.getKey("/Size").getIntValueAsInt();
710 713 int max_obj = 0;
711 714 if (!m->xref_table.empty()) {
712   - max_obj = (*(m->xref_table.rbegin())).first.getObj();
  715 + max_obj = m->xref_table.rbegin()->first.getObj();
713 716 }
714 717 if (!m->deleted_objects.empty()) {
715 718 max_obj = std::max(max_obj, *(m->deleted_objects.rbegin()));
... ... @@ -1262,11 +1265,21 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1262 1265 // If there is already an entry for this object and generation in the table, it means that a
1263 1266 // later xref table has registered this object. Disregard this one.
1264 1267  
  1268 + if (obj > m->xref_table_max_id) {
  1269 + // ignore impossibly large object ids or object ids > Size.
  1270 + return;
  1271 + }
  1272 +
1265 1273 if (m->deleted_objects.count(obj)) {
1266 1274 QTC::TC("qpdf", "QPDF xref deleted object");
1267 1275 return;
1268 1276 }
1269 1277  
  1278 + if (f0 == 2 && static_cast<int>(f1) == obj) {
  1279 + warn(damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj)));
  1280 + return;
  1281 + }
  1282 +
1270 1283 auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
1271 1284 if (!created) {
1272 1285 QTC::TC("qpdf", "QPDF xref reused object");
... ... @@ -1303,12 +1316,11 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og)
1303 1316 void
1304 1317 QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
1305 1318 {
1306   - // Various tables are indexed by object id, with potential size id + 1
1307   - constexpr static int max_id = std::numeric_limits<int>::max() - 1;
1308   - if (!(obj > 0 && obj <= max_id && 0 <= f2 && f2 < 65535)) {
  1319 + if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) {
1309 1320 QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1310 1321 return;
1311 1322 }
  1323 +
1312 1324 QPDFObjGen og(obj, f2);
1313 1325 if (!m->deleted_objects.count(obj)) {
1314 1326 // deleted_objects stores the uncompressed objects removed from the xref table at the start
... ... @@ -1918,6 +1930,17 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1918 1930  
1919 1931 int num = QUtil::string_to_int(tnum.getValue().c_str());
1920 1932 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
  1933 + if (num > m->xref_table_max_id) {
  1934 + continue;
  1935 + }
  1936 + if (num == obj_stream_number) {
  1937 + warn(damagedPDF(
  1938 + input,
  1939 + m->last_object_description,
  1940 + input->getLastOffset(),
  1941 + "object stream claims to contain itself"));
  1942 + continue;
  1943 + }
1921 1944 offsets[num] = toI(offset + first);
1922 1945 }
1923 1946  
... ...
qpdf/qtest/qpdf/issue-118.out
1 1 WARNING: issue-118.pdf: can't find PDF header
2   -WARNING: issue-118.pdf (offset 732): loop detected resolving object 2 0
3   -WARNING: issue-118.pdf (xref stream: object 8 0, offset 732): supposed object stream 2 is not a stream
  2 +WARNING: issue-118.pdf (xref stream, offset 732): self-referential object stream 2
4 3 issue-118.pdf: unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-120.out
  1 +WARNING: issue-120.pdf (xref stream, offset 712): self-referential object stream 3
1 2 qpdf: issue-120.pdf: unable to find page tree
... ...
qpdf/qtest/qpdf/issue-143.out
... ... @@ -3,6 +3,7 @@ WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): stream keyword not
3 3 WARNING: issue-143.pdf (xref stream: object 3 0, offset 607): stream dictionary lacks /Length key
4 4 WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): attempting to recover stream length
5 5 WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): recovered stream length: 36
  6 +WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream 3
6 7 WARNING: issue-143.pdf: file is damaged
7 8 WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj
8 9 WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table
... ...