Commit ddd78ac7c62d4494e7d6098f395f64bdf5950b00

Authored by Jay Berkenbilt
Committed by GitHub
2 parents e51a1948 098b98c8

Merge pull request #1002 from m-holger/ixe

 Add new private method QPDF::insertReconstructedXrefEntry
include/qpdf/QPDF.hh
@@ -1002,7 +1002,9 @@ class QPDF @@ -1002,7 +1002,9 @@ class QPDF
1002 qpdf_offset_t read_xrefTable(qpdf_offset_t offset); 1002 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
1003 qpdf_offset_t read_xrefStream(qpdf_offset_t offset); 1003 qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
1004 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); 1004 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
1005 - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); 1005 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  1006 + void insertFreeXrefEntry(QPDFObjGen);
  1007 + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
1006 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); 1008 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
1007 QPDFObjectHandle readObject( 1009 QPDFObjectHandle readObject(
1008 std::shared_ptr<InputSource>, 1010 std::shared_ptr<InputSource>,
libqpdf/QPDF.cc
@@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
564 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { 564 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
565 int obj = QUtil::string_to_int(t1.getValue().c_str()); 565 int obj = QUtil::string_to_int(t1.getValue().c_str());
566 int gen = QUtil::string_to_int(t2.getValue().c_str()); 566 int gen = QUtil::string_to_int(t2.getValue().c_str());
567 - insertXrefEntry(obj, 1, token_start, gen, true); 567 + insertReconstructedXrefEntry(obj, token_start, gen);
568 } 568 }
569 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { 569 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
570 QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false); 570 QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false);
@@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
577 m->file->seek(next_line_start, SEEK_SET); 577 m->file->seek(next_line_start, SEEK_SET);
578 line_start = next_line_start; 578 line_start = next_line_start;
579 } 579 }
  580 + m->deleted_objects.clear();
580 581
581 if (!m->trailer.isInitialized()) { 582 if (!m->trailer.isInitialized()) {
582 // We could check the last encountered object to see if it was an xref stream. If so, we 583 // We could check the last encountered object to see if it was an xref stream. If so, we
@@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
889 890
890 // Handle any deleted items now that we've read the /XRefStm. 891 // Handle any deleted items now that we've read the /XRefStm.
891 for (auto const& og: deleted_items) { 892 for (auto const& og: deleted_items) {
892 - insertXrefEntry(og.getObj(), 0, 0, og.getGen()); 893 + insertFreeXrefEntry(og);
893 } 894 }
894 895
895 if (cur_trailer.hasKey("/Prev")) { 896 if (cur_trailer.hasKey("/Prev")) {
@@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
909 qpdf_offset_t 910 qpdf_offset_t
910 QPDF::read_xrefStream(qpdf_offset_t xref_offset) 911 QPDF::read_xrefStream(qpdf_offset_t xref_offset)
911 { 912 {
912 - bool found = false;  
913 if (!m->ignore_xref_streams) { 913 if (!m->ignore_xref_streams) {
914 QPDFObjGen x_og; 914 QPDFObjGen x_og;
915 QPDFObjectHandle xref_obj; 915 QPDFObjectHandle xref_obj;
@@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) @@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
921 } 921 }
922 if (xref_obj.isStreamOfType("/XRef")) { 922 if (xref_obj.isStreamOfType("/XRef")) {
923 QTC::TC("qpdf", "QPDF found xref stream"); 923 QTC::TC("qpdf", "QPDF found xref stream");
924 - found = true;  
925 - xref_offset = processXRefStream(xref_offset, xref_obj); 924 + return processXRefStream(xref_offset, xref_obj);
926 } 925 }
927 } 926 }
928 927
929 - if (!found) {  
930 - QTC::TC("qpdf", "QPDF can't find xref");  
931 - throw damagedPDF("", xref_offset, "xref not found");  
932 - }  
933 -  
934 - return xref_offset; 928 + QTC::TC("qpdf", "QPDF can't find xref");
  929 + throw damagedPDF("", xref_offset, "xref not found");
  930 + return 0; // unreachable
935 } 931 }
936 932
937 qpdf_offset_t 933 qpdf_offset_t
@@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1087 if (fields[0] == 0) { 1083 if (fields[0] == 0) {
1088 // Ignore fields[2], which we don't care about in this case. This works around the issue 1084 // Ignore fields[2], which we don't care about in this case. This works around the issue
1089 // of some PDF files that put invalid values, like -1, here for deleted objects. 1085 // of some PDF files that put invalid values, like -1, here for deleted objects.
1090 - fields[2] = 0; 1086 + insertFreeXrefEntry(QPDFObjGen(obj, 0));
  1087 + } else {
  1088 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1091 } 1089 }
1092 - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));  
1093 } 1090 }
1094 1091
1095 if (!m->trailer.isInitialized()) { 1092 if (!m->trailer.isInitialized()) {
@@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1111 } 1108 }
1112 1109
1113 void 1110 void
1114 -QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) 1111 +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1115 { 1112 {
1116 // Populate the xref table in such a way that the first reference to an object that we see, 1113 // Populate the xref table in such a way that the first reference to an object that we see,
1117 // which is the one in the latest xref table in which it appears, is the one that gets stored. 1114 // which is the one in the latest xref table in which it appears, is the one that gets stored.
1118 - // This works because we are reading more recent appends before older ones. Exception: if  
1119 - // overwrite is true, then replace any existing object. This is used in xref recovery mode,  
1120 - // which reads the file from beginning to end. 1115 + // This works because we are reading more recent appends before older ones.
1121 1116
1122 // If there is already an entry for this object and generation in the table, it means that a 1117 // If there is already an entry for this object and generation in the table, it means that a
1123 // later xref table has registered this object. Disregard this one. 1118 // later xref table has registered this object. Disregard this one.
1124 - { // private scope  
1125 - int gen = (f0 == 2 ? 0 : f2);  
1126 - QPDFObjGen og(obj, gen);  
1127 - if (m->xref_table.count(og)) {  
1128 - if (overwrite) {  
1129 - QTC::TC("qpdf", "QPDF xref overwrite object");  
1130 - m->xref_table.erase(og);  
1131 - } else {  
1132 - QTC::TC("qpdf", "QPDF xref reused object");  
1133 - return;  
1134 - }  
1135 - }  
1136 - if (m->deleted_objects.count(obj)) {  
1137 - QTC::TC("qpdf", "QPDF xref deleted object");  
1138 - return;  
1139 - } 1119 +
  1120 + if (m->deleted_objects.count(obj)) {
  1121 + QTC::TC("qpdf", "QPDF xref deleted object");
  1122 + return;
1140 } 1123 }
1141 1124
1142 - switch (f0) {  
1143 - case 0:  
1144 - m->deleted_objects.insert(obj);  
1145 - break; 1125 + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
  1126 + if (!created) {
  1127 + QTC::TC("qpdf", "QPDF xref reused object");
  1128 + return;
  1129 + }
1146 1130
  1131 + switch (f0) {
1147 case 1: 1132 case 1:
1148 // f2 is generation 1133 // f2 is generation
1149 QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); 1134 QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
1150 - m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); 1135 + iter->second = QPDFXRefEntry(f1);
1151 break; 1136 break;
1152 1137
1153 case 2: 1138 case 2:
1154 - m->xref_table[QPDFObjGen(obj, 0)] = QPDFXRefEntry(toI(f1), f2); 1139 + iter->second = QPDFXRefEntry(toI(f1), f2);
1155 break; 1140 break;
1156 1141
1157 default: 1142 default:
@@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) @@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
1161 } 1146 }
1162 1147
1163 void 1148 void
  1149 +QPDF::insertFreeXrefEntry(QPDFObjGen og)
  1150 +{
  1151 + if (!m->xref_table.count(og)) {
  1152 + m->deleted_objects.insert(og.getObj());
  1153 + }
  1154 +}
  1155 +
  1156 +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
  1157 +// beginning to end.
  1158 +void
  1159 +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
  1160 +{
  1161 + QPDFObjGen og(obj, f2);
  1162 + if (!m->deleted_objects.count(obj)) {
  1163 + // deleted_objects stores the uncompressed objects removed from the xref table at the start
  1164 + // of recovery.
  1165 + QTC::TC("qpdf", "QPDF xref overwrite object");
  1166 + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
  1167 + }
  1168 +}
  1169 +
  1170 +void
1164 QPDF::showXRefTable() 1171 QPDF::showXRefTable()
1165 { 1172 {
1166 auto& cout = *m->log->getInfo(); 1173 auto& cout = *m->log->getInfo();