Commit 0ac37bc9561f1cf1aca2c55fc0e4702d3febcf75
1 parent
f8e6274a
Add new class QPDF::Xref_table
Showing
7 changed files
with
85 additions
and
74 deletions
include/qpdf/QPDF.hh
| @@ -733,6 +733,7 @@ class QPDF | @@ -733,6 +733,7 @@ class QPDF | ||
| 733 | class ParseGuard; | 733 | class ParseGuard; |
| 734 | class Pipe; | 734 | class Pipe; |
| 735 | class JobSetter; | 735 | class JobSetter; |
| 736 | + class Xref_table; | ||
| 736 | 737 | ||
| 737 | // For testing only -- do not add to DLL | 738 | // For testing only -- do not add to DLL |
| 738 | static bool test_json_validators(); | 739 | static bool test_json_validators(); |
libqpdf/QPDF.cc
| @@ -303,7 +303,7 @@ QPDF::registerStreamFilter( | @@ -303,7 +303,7 @@ QPDF::registerStreamFilter( | ||
| 303 | void | 303 | void |
| 304 | QPDF::setIgnoreXRefStreams(bool val) | 304 | QPDF::setIgnoreXRefStreams(bool val) |
| 305 | { | 305 | { |
| 306 | - m->ignore_xref_streams = val; | 306 | + m->xref_table.ignore_streams = val; |
| 307 | } | 307 | } |
| 308 | 308 | ||
| 309 | std::shared_ptr<QPDFLogger> | 309 | std::shared_ptr<QPDFLogger> |
| @@ -341,6 +341,7 @@ void | @@ -341,6 +341,7 @@ void | ||
| 341 | QPDF::setAttemptRecovery(bool val) | 341 | QPDF::setAttemptRecovery(bool val) |
| 342 | { | 342 | { |
| 343 | m->attempt_recovery = val; | 343 | m->attempt_recovery = val; |
| 344 | + m->xref_table.attempt_recovery = val; | ||
| 344 | } | 345 | } |
| 345 | 346 | ||
| 346 | void | 347 | void |
| @@ -447,11 +448,11 @@ QPDF::parse(char const* password) | @@ -447,11 +448,11 @@ QPDF::parse(char const* password) | ||
| 447 | // 30 characters to leave room for the startxref stuff. | 448 | // 30 characters to leave room for the startxref stuff. |
| 448 | m->file->seek(0, SEEK_END); | 449 | m->file->seek(0, SEEK_END); |
| 449 | qpdf_offset_t end_offset = m->file->tell(); | 450 | qpdf_offset_t end_offset = m->file->tell(); |
| 450 | - m->xref_table_max_offset = end_offset; | 451 | + m->xref_table.max_offset = end_offset; |
| 451 | // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic | 452 | // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic |
| 452 | // scenarios at least 3 bytes are required. | 453 | // scenarios at least 3 bytes are required. |
| 453 | - if (m->xref_table_max_id > m->xref_table_max_offset / 3) { | ||
| 454 | - m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); | 454 | + if (m->xref_table.max_id > m->xref_table.max_offset / 3) { |
| 455 | + m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3); | ||
| 455 | } | 456 | } |
| 456 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | 457 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); |
| 457 | PatternFinder sf(*this, &QPDF::findStartxref); | 458 | PatternFinder sf(*this, &QPDF::findStartxref); |
| @@ -482,7 +483,7 @@ QPDF::parse(char const* password) | @@ -482,7 +483,7 @@ QPDF::parse(char const* password) | ||
| 482 | } | 483 | } |
| 483 | 484 | ||
| 484 | initializeEncryption(); | 485 | initializeEncryption(); |
| 485 | - m->parsed = true; | 486 | + m->xref_table.parsed = true; |
| 486 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { | 487 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { |
| 487 | // QPDFs created from JSON have an empty xref table and no root object yet. | 488 | // QPDFs created from JSON have an empty xref table and no root object yet. |
| 488 | throw damagedPDF("", 0, "unable to find page tree"); | 489 | throw damagedPDF("", 0, "unable to find page tree"); |
| @@ -526,16 +527,16 @@ QPDF::warn( | @@ -526,16 +527,16 @@ QPDF::warn( | ||
| 526 | void | 527 | void |
| 527 | QPDF::setTrailer(QPDFObjectHandle obj) | 528 | QPDF::setTrailer(QPDFObjectHandle obj) |
| 528 | { | 529 | { |
| 529 | - if (m->trailer) { | 530 | + if (m->xref_table.trailer) { |
| 530 | return; | 531 | return; |
| 531 | } | 532 | } |
| 532 | - m->trailer = obj; | 533 | + m->xref_table.trailer = obj; |
| 533 | } | 534 | } |
| 534 | 535 | ||
| 535 | void | 536 | void |
| 536 | QPDF::reconstruct_xref(QPDFExc& e) | 537 | QPDF::reconstruct_xref(QPDFExc& e) |
| 537 | { | 538 | { |
| 538 | - if (m->reconstructed_xref) { | 539 | + if (m->xref_table.reconstructed) { |
| 539 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because | 540 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because |
| 540 | // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. | 541 | // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. |
| 541 | throw e; | 542 | throw e; |
| @@ -550,7 +551,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -550,7 +551,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 550 | } | 551 | } |
| 551 | }; | 552 | }; |
| 552 | 553 | ||
| 553 | - m->reconstructed_xref = true; | 554 | + m->xref_table.reconstructed = true; |
| 554 | // We may find more objects, which may contain dangling references. | 555 | // We may find more objects, which may contain dangling references. |
| 555 | m->fixed_dangling_refs = false; | 556 | m->fixed_dangling_refs = false; |
| 556 | 557 | ||
| @@ -583,7 +584,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -583,7 +584,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 583 | if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) { | 584 | if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) { |
| 584 | int obj = QUtil::string_to_int(t1.getValue().c_str()); | 585 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 585 | int gen = QUtil::string_to_int(t2.getValue().c_str()); | 586 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 586 | - if (obj <= m->xref_table_max_id) { | 587 | + if (obj <= m->xref_table.max_id) { |
| 587 | insertReconstructedXrefEntry(obj, token_start, gen); | 588 | insertReconstructedXrefEntry(obj, token_start, gen); |
| 588 | } else { | 589 | } else { |
| 589 | warn(damagedPDF( | 590 | warn(damagedPDF( |
| @@ -591,7 +592,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -591,7 +592,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 591 | } | 592 | } |
| 592 | } | 593 | } |
| 593 | m->file->seek(pos, SEEK_SET); | 594 | m->file->seek(pos, SEEK_SET); |
| 594 | - } else if (!m->trailer && t1.isWord("trailer")) { | 595 | + } else if (!m->xref_table.trailer && t1.isWord("trailer")) { |
| 595 | auto pos = m->file->tell(); | 596 | auto pos = m->file->tell(); |
| 596 | QPDFObjectHandle t = readTrailer(); | 597 | QPDFObjectHandle t = readTrailer(); |
| 597 | if (!t.isDictionary()) { | 598 | if (!t.isDictionary()) { |
| @@ -604,9 +605,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -604,9 +605,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 604 | check_warnings(); | 605 | check_warnings(); |
| 605 | m->file->findAndSkipNextEOL(); | 606 | m->file->findAndSkipNextEOL(); |
| 606 | } | 607 | } |
| 607 | - m->deleted_objects.clear(); | 608 | + m->xref_table.deleted_objects.clear(); |
| 608 | 609 | ||
| 609 | - if (!m->trailer) { | 610 | + if (!m->xref_table.trailer) { |
| 610 | qpdf_offset_t max_offset{0}; | 611 | qpdf_offset_t max_offset{0}; |
| 611 | // If there are any xref streams, take the last one to appear. | 612 | // If there are any xref streams, take the last one to appear. |
| 612 | for (auto const& iter: m->xref_table) { | 613 | for (auto const& iter: m->xref_table) { |
| @@ -640,7 +641,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -640,7 +641,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 640 | } | 641 | } |
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | - if (!m->trailer) { | 644 | + if (!m->xref_table.trailer) { |
| 644 | // We could check the last encountered object to see if it was an xref stream. If so, we | 645 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| 645 | // could try to get the trailer from there. This may make it possible to recover files with | 646 | // could try to get the trailer from there. This may make it possible to recover files with |
| 646 | // bad startxref pointers even when they have object streams. | 647 | // bad startxref pointers even when they have object streams. |
| @@ -653,12 +654,12 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -653,12 +654,12 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 653 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); | 654 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); |
| 654 | } | 655 | } |
| 655 | check_warnings(); | 656 | check_warnings(); |
| 656 | - if (!m->parsed) { | ||
| 657 | - m->parsed = true; | 657 | + if (!m->xref_table.parsed) { |
| 658 | + m->xref_table.parsed = true; | ||
| 658 | getAllPages(); | 659 | getAllPages(); |
| 659 | check_warnings(); | 660 | check_warnings(); |
| 660 | if (m->all_pages.empty()) { | 661 | if (m->all_pages.empty()) { |
| 661 | - m->parsed = false; | 662 | + m->xref_table.parsed = false; |
| 662 | throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); | 663 | throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); |
| 663 | } | 664 | } |
| 664 | } | 665 | } |
| @@ -730,16 +731,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -730,16 +731,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 730 | } | 731 | } |
| 731 | } | 732 | } |
| 732 | 733 | ||
| 733 | - if (!m->trailer) { | 734 | + if (!m->xref_table.trailer) { |
| 734 | throw damagedPDF("", 0, "unable to find trailer while reading xref"); | 735 | throw damagedPDF("", 0, "unable to find trailer while reading xref"); |
| 735 | } | 736 | } |
| 736 | - int size = m->trailer.getKey("/Size").getIntValueAsInt(); | 737 | + int size = m->xref_table.trailer.getKey("/Size").getIntValueAsInt(); |
| 737 | int max_obj = 0; | 738 | int max_obj = 0; |
| 738 | if (!m->xref_table.empty()) { | 739 | if (!m->xref_table.empty()) { |
| 739 | max_obj = m->xref_table.rbegin()->first.getObj(); | 740 | max_obj = m->xref_table.rbegin()->first.getObj(); |
| 740 | } | 741 | } |
| 741 | - if (!m->deleted_objects.empty()) { | ||
| 742 | - max_obj = std::max(max_obj, *(m->deleted_objects.rbegin())); | 742 | + if (!m->xref_table.deleted_objects.empty()) { |
| 743 | + max_obj = std::max(max_obj, *(m->xref_table.deleted_objects.rbegin())); | ||
| 743 | } | 744 | } |
| 744 | if ((size < 1) || (size - 1 != max_obj)) { | 745 | if ((size < 1) || (size - 1 != max_obj)) { |
| 745 | QTC::TC("qpdf", "QPDF xref size mismatch"); | 746 | QTC::TC("qpdf", "QPDF xref size mismatch"); |
| @@ -752,7 +753,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -752,7 +753,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 752 | 753 | ||
| 753 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we | 754 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we |
| 754 | // never depend on its being set. | 755 | // never depend on its being set. |
| 755 | - m->deleted_objects.clear(); | 756 | + m->xref_table.deleted_objects.clear(); |
| 756 | 757 | ||
| 757 | // Make sure we keep only the highest generation for any object. | 758 | // Make sure we keep only the highest generation for any object. |
| 758 | QPDFObjGen last_og{-1, 0}; | 759 | QPDFObjGen last_og{-1, 0}; |
| @@ -968,7 +969,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -968,7 +969,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 968 | for (qpdf_offset_t i = obj; i - num < obj; ++i) { | 969 | for (qpdf_offset_t i = obj; i - num < obj; ++i) { |
| 969 | if (i == 0) { | 970 | if (i == 0) { |
| 970 | // This is needed by checkLinearization() | 971 | // This is needed by checkLinearization() |
| 971 | - m->first_xref_item_offset = m->file->tell(); | 972 | + m->xref_table.first_item_offset = m->file->tell(); |
| 972 | } | 973 | } |
| 973 | // For xref_table, these will always be small enough to be ints | 974 | // For xref_table, these will always be small enough to be ints |
| 974 | qpdf_offset_t f1 = 0; | 975 | qpdf_offset_t f1 = 0; |
| @@ -1000,21 +1001,21 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -1000,21 +1001,21 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 1000 | throw damagedPDF("", "expected trailer dictionary"); | 1001 | throw damagedPDF("", "expected trailer dictionary"); |
| 1001 | } | 1002 | } |
| 1002 | 1003 | ||
| 1003 | - if (!m->trailer) { | 1004 | + if (!m->xref_table.trailer) { |
| 1004 | setTrailer(cur_trailer); | 1005 | setTrailer(cur_trailer); |
| 1005 | 1006 | ||
| 1006 | - if (!m->trailer.hasKey("/Size")) { | 1007 | + if (!m->xref_table.trailer.hasKey("/Size")) { |
| 1007 | QTC::TC("qpdf", "QPDF trailer lacks size"); | 1008 | QTC::TC("qpdf", "QPDF trailer lacks size"); |
| 1008 | throw damagedPDF("trailer", "trailer dictionary lacks /Size key"); | 1009 | throw damagedPDF("trailer", "trailer dictionary lacks /Size key"); |
| 1009 | } | 1010 | } |
| 1010 | - if (!m->trailer.getKey("/Size").isInteger()) { | 1011 | + if (!m->xref_table.trailer.getKey("/Size").isInteger()) { |
| 1011 | QTC::TC("qpdf", "QPDF trailer size not integer"); | 1012 | QTC::TC("qpdf", "QPDF trailer size not integer"); |
| 1012 | throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); | 1013 | throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); |
| 1013 | } | 1014 | } |
| 1014 | } | 1015 | } |
| 1015 | 1016 | ||
| 1016 | if (cur_trailer.hasKey("/XRefStm")) { | 1017 | if (cur_trailer.hasKey("/XRefStm")) { |
| 1017 | - if (m->ignore_xref_streams) { | 1018 | + if (m->xref_table.ignore_streams) { |
| 1018 | QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); | 1019 | QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); |
| 1019 | } else { | 1020 | } else { |
| 1020 | if (cur_trailer.getKey("/XRefStm").isInteger()) { | 1021 | if (cur_trailer.getKey("/XRefStm").isInteger()) { |
| @@ -1043,7 +1044,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -1043,7 +1044,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 1043 | qpdf_offset_t | 1044 | qpdf_offset_t |
| 1044 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) | 1045 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 1045 | { | 1046 | { |
| 1046 | - if (!m->ignore_xref_streams) { | 1047 | + if (!m->xref_table.ignore_streams) { |
| 1047 | QPDFObjGen x_og; | 1048 | QPDFObjGen x_og; |
| 1048 | QPDFObjectHandle xref_obj; | 1049 | QPDFObjectHandle xref_obj; |
| 1049 | try { | 1050 | try { |
| @@ -1238,14 +1239,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1238,14 +1239,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1238 | // object record, in which case the generation number appears as the third field. | 1239 | // object record, in which case the generation number appears as the third field. |
| 1239 | if (saw_first_compressed_object) { | 1240 | if (saw_first_compressed_object) { |
| 1240 | if (fields[0] != 2) { | 1241 | if (fields[0] != 2) { |
| 1241 | - m->uncompressed_after_compressed = true; | 1242 | + m->xref_table.uncompressed_after_compressed = true; |
| 1242 | } | 1243 | } |
| 1243 | } else if (fields[0] == 2) { | 1244 | } else if (fields[0] == 2) { |
| 1244 | saw_first_compressed_object = true; | 1245 | saw_first_compressed_object = true; |
| 1245 | } | 1246 | } |
| 1246 | if (obj == 0) { | 1247 | if (obj == 0) { |
| 1247 | // This is needed by checkLinearization() | 1248 | // This is needed by checkLinearization() |
| 1248 | - m->first_xref_item_offset = xref_offset; | 1249 | + m->xref_table.first_item_offset = xref_offset; |
| 1249 | } else if (fields[0] == 0) { | 1250 | } else if (fields[0] == 0) { |
| 1250 | // Ignore fields[2], which we don't care about in this case. This works around the | 1251 | // Ignore fields[2], which we don't care about in this case. This works around the |
| 1251 | // issue of some PDF files that put invalid values, like -1, here for deleted | 1252 | // issue of some PDF files that put invalid values, like -1, here for deleted |
| @@ -1258,7 +1259,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1258,7 +1259,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1258 | } | 1259 | } |
| 1259 | } | 1260 | } |
| 1260 | 1261 | ||
| 1261 | - if (!m->trailer) { | 1262 | + if (!m->xref_table.trailer) { |
| 1262 | setTrailer(dict); | 1263 | setTrailer(dict); |
| 1263 | } | 1264 | } |
| 1264 | 1265 | ||
| @@ -1284,12 +1285,12 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) | @@ -1284,12 +1285,12 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) | ||
| 1284 | // If there is already an entry for this object and generation in the table, it means that a | 1285 | // If there is already an entry for this object and generation in the table, it means that a |
| 1285 | // later xref table has registered this object. Disregard this one. | 1286 | // later xref table has registered this object. Disregard this one. |
| 1286 | 1287 | ||
| 1287 | - if (obj > m->xref_table_max_id) { | 1288 | + if (obj > m->xref_table.max_id) { |
| 1288 | // ignore impossibly large object ids or object ids > Size. | 1289 | // ignore impossibly large object ids or object ids > Size. |
| 1289 | return; | 1290 | return; |
| 1290 | } | 1291 | } |
| 1291 | 1292 | ||
| 1292 | - if (m->deleted_objects.count(obj)) { | 1293 | + if (m->xref_table.deleted_objects.count(obj)) { |
| 1293 | QTC::TC("qpdf", "QPDF xref deleted object"); | 1294 | QTC::TC("qpdf", "QPDF xref deleted object"); |
| 1294 | return; | 1295 | return; |
| 1295 | } | 1296 | } |
| @@ -1326,7 +1327,7 @@ void | @@ -1326,7 +1327,7 @@ void | ||
| 1326 | QPDF::insertFreeXrefEntry(QPDFObjGen og) | 1327 | QPDF::insertFreeXrefEntry(QPDFObjGen og) |
| 1327 | { | 1328 | { |
| 1328 | if (!m->xref_table.count(og)) { | 1329 | if (!m->xref_table.count(og)) { |
| 1329 | - m->deleted_objects.insert(og.getObj()); | 1330 | + m->xref_table.deleted_objects.insert(og.getObj()); |
| 1330 | } | 1331 | } |
| 1331 | } | 1332 | } |
| 1332 | 1333 | ||
| @@ -1335,13 +1336,13 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og) | @@ -1335,13 +1336,13 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og) | ||
| 1335 | void | 1336 | void |
| 1336 | QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) | 1337 | QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) |
| 1337 | { | 1338 | { |
| 1338 | - if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) { | 1339 | + if (!(obj > 0 && obj <= m->xref_table.max_id && 0 <= f2 && f2 < 65535)) { |
| 1339 | QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); | 1340 | QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); |
| 1340 | return; | 1341 | return; |
| 1341 | } | 1342 | } |
| 1342 | 1343 | ||
| 1343 | QPDFObjGen og(obj, f2); | 1344 | QPDFObjGen og(obj, f2); |
| 1344 | - if (!m->deleted_objects.count(obj)) { | 1345 | + if (!m->xref_table.deleted_objects.count(obj)) { |
| 1345 | // deleted_objects stores the uncompressed objects removed from the xref table at the start | 1346 | // deleted_objects stores the uncompressed objects removed from the xref table at the start |
| 1346 | // of recovery. | 1347 | // of recovery. |
| 1347 | QTC::TC("qpdf", "QPDF xref overwrite object"); | 1348 | QTC::TC("qpdf", "QPDF xref overwrite object"); |
| @@ -1381,11 +1382,11 @@ QPDF::showXRefTable() | @@ -1381,11 +1382,11 @@ QPDF::showXRefTable() | ||
| 1381 | bool | 1382 | bool |
| 1382 | QPDF::resolveXRefTable() | 1383 | QPDF::resolveXRefTable() |
| 1383 | { | 1384 | { |
| 1384 | - bool may_change = !m->reconstructed_xref; | 1385 | + bool may_change = !m->xref_table.reconstructed; |
| 1385 | for (auto& iter: m->xref_table) { | 1386 | for (auto& iter: m->xref_table) { |
| 1386 | if (isUnresolved(iter.first)) { | 1387 | if (isUnresolved(iter.first)) { |
| 1387 | resolve(iter.first); | 1388 | resolve(iter.first); |
| 1388 | - if (may_change && m->reconstructed_xref) { | 1389 | + if (may_change && m->xref_table.reconstructed) { |
| 1389 | return false; | 1390 | return false; |
| 1390 | } | 1391 | } |
| 1391 | } | 1392 | } |
| @@ -1958,7 +1959,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1958,7 +1959,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1958 | 1959 | ||
| 1959 | int num = QUtil::string_to_int(tnum.getValue().c_str()); | 1960 | int num = QUtil::string_to_int(tnum.getValue().c_str()); |
| 1960 | long long offset = QUtil::string_to_int(toffset.getValue().c_str()); | 1961 | long long offset = QUtil::string_to_int(toffset.getValue().c_str()); |
| 1961 | - if (num > m->xref_table_max_id) { | 1962 | + if (num > m->xref_table.max_id) { |
| 1962 | continue; | 1963 | continue; |
| 1963 | } | 1964 | } |
| 1964 | if (num == obj_stream_number) { | 1965 | if (num == obj_stream_number) { |
| @@ -2101,7 +2102,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) | @@ -2101,7 +2102,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) | ||
| 2101 | if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { | 2102 | if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { |
| 2102 | return iter->second.object; | 2103 | return iter->second.object; |
| 2103 | } | 2104 | } |
| 2104 | - if (m->xref_table.count(og) || !m->parsed) { | 2105 | + if (m->xref_table.count(og) || !m->xref_table.parsed) { |
| 2105 | return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; | 2106 | return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; |
| 2106 | } | 2107 | } |
| 2107 | if (parse_pdf) { | 2108 | if (parse_pdf) { |
| @@ -2117,8 +2118,9 @@ QPDF::getObjectForJSON(int id, int gen) | @@ -2117,8 +2118,9 @@ QPDF::getObjectForJSON(int id, int gen) | ||
| 2117 | auto [it, inserted] = m->obj_cache.try_emplace(og); | 2118 | auto [it, inserted] = m->obj_cache.try_emplace(og); |
| 2118 | auto& obj = it->second.object; | 2119 | auto& obj = it->second.object; |
| 2119 | if (inserted) { | 2120 | if (inserted) { |
| 2120 | - obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og) | ||
| 2121 | - : QPDF_Unresolved::create(this, og); | 2121 | + obj = (m->xref_table.parsed && !m->xref_table.count(og)) |
| 2122 | + ? QPDF_Null::create(this, og) | ||
| 2123 | + : QPDF_Unresolved::create(this, og); | ||
| 2122 | } | 2124 | } |
| 2123 | return obj; | 2125 | return obj; |
| 2124 | } | 2126 | } |
| @@ -2128,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) | @@ -2128,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) | ||
| 2128 | { | 2130 | { |
| 2129 | if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { | 2131 | if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { |
| 2130 | return {it->second.object}; | 2132 | return {it->second.object}; |
| 2131 | - } else if (m->parsed && !m->xref_table.count(og)) { | 2133 | + } else if (m->xref_table.parsed && !m->xref_table.count(og)) { |
| 2132 | return QPDF_Null::create(); | 2134 | return QPDF_Null::create(); |
| 2133 | } else { | 2135 | } else { |
| 2134 | auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); | 2136 | auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); |
| @@ -2526,13 +2528,13 @@ QPDF::getExtensionLevel() | @@ -2526,13 +2528,13 @@ QPDF::getExtensionLevel() | ||
| 2526 | QPDFObjectHandle | 2528 | QPDFObjectHandle |
| 2527 | QPDF::getTrailer() | 2529 | QPDF::getTrailer() |
| 2528 | { | 2530 | { |
| 2529 | - return m->trailer; | 2531 | + return m->xref_table.trailer; |
| 2530 | } | 2532 | } |
| 2531 | 2533 | ||
| 2532 | QPDFObjectHandle | 2534 | QPDFObjectHandle |
| 2533 | QPDF::getRoot() | 2535 | QPDF::getRoot() |
| 2534 | { | 2536 | { |
| 2535 | - QPDFObjectHandle root = m->trailer.getKey("/Root"); | 2537 | + QPDFObjectHandle root = m->xref_table.trailer.getKey("/Root"); |
| 2536 | if (!root.isDictionary()) { | 2538 | if (!root.isDictionary()) { |
| 2537 | throw damagedPDF("", 0, "unable to find /Root dictionary"); | 2539 | throw damagedPDF("", 0, "unable to find /Root dictionary"); |
| 2538 | } else if ( | 2540 | } else if ( |
| @@ -2554,7 +2556,7 @@ QPDF::getXRefTable() | @@ -2554,7 +2556,7 @@ QPDF::getXRefTable() | ||
| 2554 | std::map<QPDFObjGen, QPDFXRefEntry> const& | 2556 | std::map<QPDFObjGen, QPDFXRefEntry> const& |
| 2555 | QPDF::getXRefTableInternal() | 2557 | QPDF::getXRefTableInternal() |
| 2556 | { | 2558 | { |
| 2557 | - if (!m->parsed) { | 2559 | + if (!m->xref_table.parsed) { |
| 2558 | throw std::logic_error("QPDF::getXRefTable called before parsing."); | 2560 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
| 2559 | } | 2561 | } |
| 2560 | 2562 | ||
| @@ -2604,14 +2606,14 @@ QPDF::getCompressibleObjGens() | @@ -2604,14 +2606,14 @@ QPDF::getCompressibleObjGens() | ||
| 2604 | // iterating through the xref table since it avoids preserving orphaned items. | 2606 | // iterating through the xref table since it avoids preserving orphaned items. |
| 2605 | 2607 | ||
| 2606 | // Exclude encryption dictionary, if any | 2608 | // Exclude encryption dictionary, if any |
| 2607 | - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); | 2609 | + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt"); |
| 2608 | QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); | 2610 | QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); |
| 2609 | 2611 | ||
| 2610 | const size_t max_obj = getObjectCount(); | 2612 | const size_t max_obj = getObjectCount(); |
| 2611 | std::vector<bool> visited(max_obj, false); | 2613 | std::vector<bool> visited(max_obj, false); |
| 2612 | std::vector<QPDFObjectHandle> queue; | 2614 | std::vector<QPDFObjectHandle> queue; |
| 2613 | queue.reserve(512); | 2615 | queue.reserve(512); |
| 2614 | - queue.push_back(m->trailer); | 2616 | + queue.push_back(m->xref_table.trailer); |
| 2615 | std::vector<T> result; | 2617 | std::vector<T> result; |
| 2616 | if constexpr (std::is_same_v<T, QPDFObjGen>) { | 2618 | if constexpr (std::is_same_v<T, QPDFObjGen>) { |
| 2617 | result.reserve(m->obj_cache.size()); | 2619 | result.reserve(m->obj_cache.size()); |
libqpdf/QPDF_encryption.cc
| @@ -727,7 +727,7 @@ QPDF::initializeEncryption() | @@ -727,7 +727,7 @@ QPDF::initializeEncryption() | ||
| 727 | // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption | 727 | // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption |
| 728 | // dictionary. | 728 | // dictionary. |
| 729 | 729 | ||
| 730 | - if (!m->trailer.hasKey("/Encrypt")) { | 730 | + if (!m->xref_table.trailer.hasKey("/Encrypt")) { |
| 731 | return; | 731 | return; |
| 732 | } | 732 | } |
| 733 | 733 | ||
| @@ -736,7 +736,7 @@ QPDF::initializeEncryption() | @@ -736,7 +736,7 @@ QPDF::initializeEncryption() | ||
| 736 | m->encp->encrypted = true; | 736 | m->encp->encrypted = true; |
| 737 | 737 | ||
| 738 | std::string id1; | 738 | std::string id1; |
| 739 | - QPDFObjectHandle id_obj = m->trailer.getKey("/ID"); | 739 | + QPDFObjectHandle id_obj = m->xref_table.trailer.getKey("/ID"); |
| 740 | if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { | 740 | if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { |
| 741 | id1 = id_obj.getArrayItem(0).getStringValue(); | 741 | id1 = id_obj.getArrayItem(0).getStringValue(); |
| 742 | } else { | 742 | } else { |
| @@ -745,7 +745,7 @@ QPDF::initializeEncryption() | @@ -745,7 +745,7 @@ QPDF::initializeEncryption() | ||
| 745 | warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); | 745 | warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); |
| 746 | } | 746 | } |
| 747 | 747 | ||
| 748 | - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); | 748 | + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt"); |
| 749 | if (!encryption_dict.isDictionary()) { | 749 | if (!encryption_dict.isDictionary()) { |
| 750 | throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); | 750 | throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); |
| 751 | } | 751 | } |
libqpdf/QPDF_json.cc
| @@ -593,8 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -593,8 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 593 | this->saw_value = true; | 593 | this->saw_value = true; |
| 594 | // The trailer must be a dictionary, so we can use setNextStateIfDictionary. | 594 | // The trailer must be a dictionary, so we can use setNextStateIfDictionary. |
| 595 | if (setNextStateIfDictionary("trailer.value", value, st_object)) { | 595 | if (setNextStateIfDictionary("trailer.value", value, st_object)) { |
| 596 | - this->pdf.m->trailer = makeObject(value); | ||
| 597 | - setObjectDescription(this->pdf.m->trailer, value); | 596 | + pdf.m->xref_table.trailer = makeObject(value); |
| 597 | + setObjectDescription(this->pdf.m->xref_table.trailer, value); | ||
| 598 | } | 598 | } |
| 599 | } else if (key == "stream") { | 599 | } else if (key == "stream") { |
| 600 | // Don't need to set saw_stream here since there's already an error. | 600 | // Don't need to set saw_stream here since there's already an error. |
libqpdf/QPDF_linearization.cc
| @@ -461,12 +461,11 @@ QPDF::checkLinearizationInternal() | @@ -461,12 +461,11 @@ QPDF::checkLinearizationInternal() | ||
| 461 | break; | 461 | break; |
| 462 | } | 462 | } |
| 463 | } | 463 | } |
| 464 | - if (m->file->tell() != m->first_xref_item_offset) { | 464 | + if (m->file->tell() != m->xref_table.first_item_offset) { |
| 465 | QTC::TC("qpdf", "QPDF err /T mismatch"); | 465 | QTC::TC("qpdf", "QPDF err /T mismatch"); |
| 466 | linearizationWarning( | 466 | linearizationWarning( |
| 467 | - "space before first xref item (/T) mismatch " | ||
| 468 | - "(computed = " + | ||
| 469 | - std::to_string(m->first_xref_item_offset) + | 467 | + "space before first xref item (/T) mismatch (computed = " + |
| 468 | + std::to_string(m->xref_table.first_item_offset) + | ||
| 470 | "; file = " + std::to_string(m->file->tell())); | 469 | "; file = " + std::to_string(m->file->tell())); |
| 471 | } | 470 | } |
| 472 | 471 | ||
| @@ -477,7 +476,7 @@ QPDF::checkLinearizationInternal() | @@ -477,7 +476,7 @@ QPDF::checkLinearizationInternal() | ||
| 477 | // compressed objects are supposed to be at the end of the containing xref section if any object | 476 | // compressed objects are supposed to be at the end of the containing xref section if any object |
| 478 | // streams are in use. | 477 | // streams are in use. |
| 479 | 478 | ||
| 480 | - if (m->uncompressed_after_compressed) { | 479 | + if (m->xref_table.uncompressed_after_compressed) { |
| 481 | linearizationWarning("linearized file contains an uncompressed object after a compressed " | 480 | linearizationWarning("linearized file contains an uncompressed object after a compressed " |
| 482 | "one in a cross-reference stream"); | 481 | "one in a cross-reference stream"); |
| 483 | } | 482 | } |
libqpdf/QPDF_optimization.cc
| @@ -115,13 +115,13 @@ QPDF::optimize_internal( | @@ -115,13 +115,13 @@ QPDF::optimize_internal( | ||
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | // Traverse document-level items | 117 | // Traverse document-level items |
| 118 | - for (auto const& key: m->trailer.getKeys()) { | 118 | + for (auto const& key: m->xref_table.trailer.getKeys()) { |
| 119 | if (key == "/Root") { | 119 | if (key == "/Root") { |
| 120 | // handled separately | 120 | // handled separately |
| 121 | } else { | 121 | } else { |
| 122 | updateObjectMaps( | 122 | updateObjectMaps( |
| 123 | ObjUser(ObjUser::ou_trailer_key, key), | 123 | ObjUser(ObjUser::ou_trailer_key, key), |
| 124 | - m->trailer.getKey(key), | 124 | + m->xref_table.trailer.getKey(key), |
| 125 | skip_stream_parameters); | 125 | skip_stream_parameters); |
| 126 | } | 126 | } |
| 127 | } | 127 | } |
| @@ -169,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) | @@ -169,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) | ||
| 169 | // values for them. | 169 | // values for them. |
| 170 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; | 170 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
| 171 | pushInheritedAttributesToPageInternal( | 171 | pushInheritedAttributesToPageInternal( |
| 172 | - m->trailer.getKey("/Root").getKey("/Pages"), | 172 | + m->xref_table.trailer.getKey("/Root").getKey("/Pages"), |
| 173 | key_ancestors, | 173 | key_ancestors, |
| 174 | allow_changes, | 174 | allow_changes, |
| 175 | warn_skipped_keys); | 175 | warn_skipped_keys); |
| 176 | if (!key_ancestors.empty()) { | 176 | if (!key_ancestors.empty()) { |
| 177 | - throw std::logic_error("key_ancestors not empty after" | ||
| 178 | - " pushing inherited attributes to pages"); | 177 | + throw std::logic_error( |
| 178 | + "key_ancestors not empty after pushing inherited attributes to pages"); | ||
| 179 | } | 179 | } |
| 180 | m->pushed_inherited_attributes_to_pages = true; | 180 | m->pushed_inherited_attributes_to_pages = true; |
| 181 | m->ever_pushed_inherited_attributes_to_pages = true; | 181 | m->ever_pushed_inherited_attributes_to_pages = true; |
libqpdf/qpdf/QPDF_private.hh
| @@ -3,6 +3,25 @@ | @@ -3,6 +3,25 @@ | ||
| 3 | 3 | ||
| 4 | #include <qpdf/QPDF.hh> | 4 | #include <qpdf/QPDF.hh> |
| 5 | 5 | ||
| 6 | +// Xref_table encapsulates the pdf's xref table and trailer. | ||
| 7 | +class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | ||
| 8 | +{ | ||
| 9 | + public: | ||
| 10 | + QPDFObjectHandle trailer; | ||
| 11 | + bool reconstructed{false}; | ||
| 12 | + // Various tables are indexed by object id, with potential size id + 1 | ||
| 13 | + int max_id{std::numeric_limits<int>::max() - 1}; | ||
| 14 | + qpdf_offset_t max_offset{0}; | ||
| 15 | + std::set<int> deleted_objects; | ||
| 16 | + bool ignore_streams{false}; | ||
| 17 | + bool parsed{false}; | ||
| 18 | + bool attempt_recovery{true}; | ||
| 19 | + | ||
| 20 | + // Linearization data | ||
| 21 | + bool uncompressed_after_compressed{false}; | ||
| 22 | + qpdf_offset_t first_item_offset{0}; // actual value from file | ||
| 23 | +}; | ||
| 24 | + | ||
| 6 | // Writer class is restricted to QPDFWriter so that only it can call certain methods. | 25 | // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
| 7 | class QPDF::Writer | 26 | class QPDF::Writer |
| 8 | { | 27 | { |
| @@ -459,21 +478,15 @@ class QPDF::Members | @@ -459,21 +478,15 @@ class QPDF::Members | ||
| 459 | std::shared_ptr<InputSource> file; | 478 | std::shared_ptr<InputSource> file; |
| 460 | std::string last_object_description; | 479 | std::string last_object_description; |
| 461 | bool provided_password_is_hex_key{false}; | 480 | bool provided_password_is_hex_key{false}; |
| 462 | - bool ignore_xref_streams{false}; | ||
| 463 | bool suppress_warnings{false}; | 481 | bool suppress_warnings{false}; |
| 464 | size_t max_warnings{0}; | 482 | size_t max_warnings{0}; |
| 465 | bool attempt_recovery{true}; | 483 | bool attempt_recovery{true}; |
| 466 | bool check_mode{false}; | 484 | bool check_mode{false}; |
| 467 | std::shared_ptr<EncryptionParameters> encp; | 485 | std::shared_ptr<EncryptionParameters> encp; |
| 468 | std::string pdf_version; | 486 | std::string pdf_version; |
| 469 | - std::map<QPDFObjGen, QPDFXRefEntry> xref_table; | ||
| 470 | - // Various tables are indexed by object id, with potential size id + 1 | ||
| 471 | - int xref_table_max_id{std::numeric_limits<int>::max() - 1}; | ||
| 472 | - qpdf_offset_t xref_table_max_offset{0}; | ||
| 473 | - std::set<int> deleted_objects; | 487 | + Xref_table xref_table; |
| 474 | std::map<QPDFObjGen, ObjCache> obj_cache; | 488 | std::map<QPDFObjGen, ObjCache> obj_cache; |
| 475 | std::set<QPDFObjGen> resolving; | 489 | std::set<QPDFObjGen> resolving; |
| 476 | - QPDFObjectHandle trailer; | ||
| 477 | std::vector<QPDFObjectHandle> all_pages; | 490 | std::vector<QPDFObjectHandle> all_pages; |
| 478 | bool invalid_page_found{false}; | 491 | bool invalid_page_found{false}; |
| 479 | std::map<QPDFObjGen, int> pageobj_to_pages_pos; | 492 | std::map<QPDFObjGen, int> pageobj_to_pages_pos; |
| @@ -485,16 +498,12 @@ class QPDF::Members | @@ -485,16 +498,12 @@ class QPDF::Members | ||
| 485 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams; | 498 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams; |
| 486 | // copied_stream_data_provider is owned by copied_streams | 499 | // copied_stream_data_provider is owned by copied_streams |
| 487 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; | 500 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; |
| 488 | - bool reconstructed_xref{false}; | ||
| 489 | bool fixed_dangling_refs{false}; | 501 | bool fixed_dangling_refs{false}; |
| 490 | bool immediate_copy_from{false}; | 502 | bool immediate_copy_from{false}; |
| 491 | bool in_parse{false}; | 503 | bool in_parse{false}; |
| 492 | - bool parsed{false}; | ||
| 493 | std::set<int> resolved_object_streams; | 504 | std::set<int> resolved_object_streams; |
| 494 | 505 | ||
| 495 | // Linearization data | 506 | // Linearization data |
| 496 | - qpdf_offset_t first_xref_item_offset{0}; // actual value from file | ||
| 497 | - bool uncompressed_after_compressed{false}; | ||
| 498 | bool linearization_warnings{false}; | 507 | bool linearization_warnings{false}; |
| 499 | 508 | ||
| 500 | // Linearization parameter dictionary and hint table data: may be read from file or computed | 509 | // Linearization parameter dictionary and hint table data: may be read from file or computed |