Commit a3f693c8f982462ce03421e187504efbe31788b9

Authored by m-holger
1 parent 2015f71c

Move private methods in QPDF_objects to QPDF::Objects

include/qpdf/QPDF.hh
... ... @@ -758,37 +758,15 @@ class QPDF
758 758 class ResolveRecorder;
759 759 class JSONReactor;
760 760  
761   - inline Objects& objects();
  761 + inline Objects& objects() noexcept;
  762 + inline Objects const& objects() const noexcept;
762 763 void parse(char const* password);
763 764 void inParse(bool);
764 765 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
765   - QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
766   - void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
767   - void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
768   - QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
769   - size_t recoverStreamLength(
770   - std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
771 766 QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0);
772 767  
773   - QPDFObjectHandle readObjectAtOffset(
774   - bool attempt_recovery,
775   - qpdf_offset_t offset,
776   - std::string const& description,
777   - QPDFObjGen exp_og,
778   - QPDFObjGen& og,
779   - bool skip_cache_if_in_xref);
780   - QPDFObject* resolve(QPDFObjGen og);
781   - void resolveObjectsInStream(int obj_stream_number);
782 768 void stopOnError(std::string const& message);
783   - QPDFObjGen nextObjGen();
784 769 QPDFObjectHandle newIndirect(QPDFObjGen const&, std::shared_ptr<QPDFObject> const&);
785   - QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
786   - bool isCached(QPDFObjGen const& og);
787   - bool isUnresolved(QPDFObjGen const& og);
788   - std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
789   - std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
790   - void removeObject(QPDFObjGen og);
791   - void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object);
792 770 static QPDFExc damagedPDF(
793 771 InputSource& input,
794 772 std::string const& object,
... ... @@ -834,7 +812,6 @@ class QPDF
834 812 QPDFWriter::ObjTable const& obj,
835 813 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
836 814 void optimize(Xref_table const& obj);
837   - size_t tableSize();
838 815  
839 816 // Get lists of all objects in order according to the part of a linearized file that they belong
840 817 // to.
... ... @@ -854,12 +831,6 @@ class QPDF
854 831 int& O,
855 832 bool compressed);
856 833  
857   - // Get a list of objects that would be permitted in an object stream.
858   - template <typename T>
859   - std::vector<T> getCompressibleObjGens();
860   - std::vector<QPDFObjGen> getCompressibleObjVector();
861   - std::vector<bool> getCompressibleObjSet();
862   -
863 834 // methods to support page handling
864 835  
865 836 void getAllPagesInternal(
... ...
libqpdf/QPDF.cc
... ... @@ -535,26 +535,26 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh)
535 535 if (!oh) {
536 536 throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
537 537 }
538   - return makeIndirectFromQPDFObject(oh.getObj());
  538 + return m->objects.make_indirect(oh.getObj());
539 539 }
540 540  
541 541 QPDFObjectHandle
542 542 QPDF::newReserved()
543 543 {
544   - return makeIndirectFromQPDFObject(QPDF_Reserved::create());
  544 + return m->objects.make_indirect(QPDF_Reserved::create());
545 545 }
546 546  
547 547 QPDFObjectHandle
548 548 QPDF::newIndirectNull()
549 549 {
550   - return makeIndirectFromQPDFObject(QPDF_Null::create());
  550 + return m->objects.make_indirect(QPDF_Null::create());
551 551 }
552 552  
553 553 QPDFObjectHandle
554 554 QPDF::newStream()
555 555 {
556   - return makeIndirectFromQPDFObject(
557   - QPDF_Stream::create(this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
  556 + return m->objects.make_indirect(
  557 + QPDF_Stream::create(this, m->objects.next_id(), QPDFObjectHandle::newDictionary(), 0, 0));
558 558 }
559 559  
560 560 QPDFObjectHandle
... ...
libqpdf/QPDF_json.cc
... ... @@ -536,7 +536,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
536 536 } else if (is_obj_key(key, obj, gen)) {
537 537 this->cur_object = key;
538 538 if (setNextStateIfDictionary(key, value, st_object_top)) {
539   - next_obj = pdf.getObjectForJSON(obj, gen);
  539 + next_obj = pdf.objects().get_for_json(obj, gen);
540 540 }
541 541 } else {
542 542 QTC::TC("qpdf", "QPDF_json bad object key");
... ... @@ -740,7 +740,7 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value)
740 740 int gen = 0;
741 741 std::string str;
742 742 if (is_indirect_object(str_v, obj, gen)) {
743   - result = pdf.getObjectForJSON(obj, gen);
  743 + result = pdf.objects().get_for_json(obj, gen);
744 744 } else if (is_unicode_string(str_v, str)) {
745 745 result = QPDFObjectHandle::newUnicodeString(str);
746 746 } else if (is_binary_string(str_v, str)) {
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -287,7 +287,7 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
287 287 {
288 288 QPDFObjGen og;
289 289 QPDFObjectHandle H =
290   - readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
  290 + objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
291 291 qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
292 292 qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
293 293 if (!H.isStream()) {
... ...
libqpdf/QPDF_objects.cc
... ... @@ -744,7 +744,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
744 744 QPDFObjGen x_og;
745 745 QPDFObjectHandle xref_obj;
746 746 try {
747   - xref_obj = qpdf.readObjectAtOffset(
  747 + xref_obj = qpdf.objects().read(
748 748 false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
749 749 } catch (QPDFExc&) {
750 750 // ignore -- report error below
... ... @@ -1135,8 +1135,8 @@ QPDF::Xref_table::resolve()
1135 1135 for (auto& item: table) {
1136 1136 ++i;
1137 1137 if (item.type()) {
1138   - if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) {
1139   - qpdf.resolve(QPDFObjGen(i, item.gen()));
  1138 + if (objects.unresolved(QPDFObjGen(i, item.gen()))) {
  1139 + objects.resolve(QPDFObjGen(i, item.gen()));
1140 1140 if (may_change && reconstructed_) {
1141 1141 return false;
1142 1142 }
... ... @@ -1177,40 +1177,41 @@ QPDF::Xref_table::read_trailer()
1177 1177 }
1178 1178  
1179 1179 QPDFObjectHandle
1180   -QPDF::readObject(std::string const& description, QPDFObjGen og)
  1180 +QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
1181 1181 {
1182   - setLastObjectDescription(description, og);
  1182 + qpdf.setLastObjectDescription(description, og);
1183 1183 qpdf_offset_t offset = m->file->tell();
1184 1184 bool empty = false;
1185 1185  
1186   - StringDecrypter decrypter{this, og};
  1186 + StringDecrypter decrypter{&qpdf, og};
1187 1187 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1188 1188 auto object =
1189   - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)
  1189 + QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, &qpdf, true)
1190 1190 .parse(empty, false);
1191 1191 if (empty) {
1192 1192 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1193 1193 // actual PDF files and Adobe Reader appears to ignore them.
1194   - warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
  1194 + qpdf.warn(
  1195 + qpdf.damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
1195 1196 return object;
1196 1197 }
1197   - auto token = readToken(*m->file);
  1198 + auto token = qpdf.readToken(*m->file);
1198 1199 if (object.isDictionary() && token.isWord("stream")) {
1199   - readStream(object, og, offset);
1200   - token = readToken(*m->file);
  1200 + read_stream(object, og, offset);
  1201 + token = qpdf.readToken(*m->file);
1201 1202 }
1202 1203 if (!token.isWord("endobj")) {
1203 1204 QTC::TC("qpdf", "QPDF err expected endobj");
1204   - warn(damagedPDF("expected endobj"));
  1205 + qpdf.warn(qpdf.damagedPDF("expected endobj"));
1205 1206 }
1206 1207 return object;
1207 1208 }
1208 1209  
1209 1210 // After reading stream dictionary and stream keyword, read rest of stream.
1210 1211 void
1211   -QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1212 +QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1212 1213 {
1213   - validateStreamLineEnd(object, og, offset);
  1214 + validate_stream_line_end(object, og, offset);
1214 1215  
1215 1216 // Must get offset before accessing any additional objects since resolving a previously
1216 1217 // unresolved indirect object will change file position.
... ... @@ -1223,33 +1224,34 @@ QPDF::readStream(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset_t offset)
1223 1224 if (!length_obj.isInteger()) {
1224 1225 if (length_obj.isNull()) {
1225 1226 QTC::TC("qpdf", "QPDF stream without length");
1226   - throw damagedPDF(offset, "stream dictionary lacks /Length key");
  1227 + throw qpdf.damagedPDF(offset, "stream dictionary lacks /Length key");
1227 1228 }
1228 1229 QTC::TC("qpdf", "QPDF stream length not integer");
1229   - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
  1230 + throw qpdf.damagedPDF(offset, "/Length key in stream dictionary is not an integer");
1230 1231 }
1231 1232  
1232 1233 length = toS(length_obj.getUIntValue());
1233 1234 // Seek in two steps to avoid potential integer overflow
1234 1235 m->file->seek(stream_offset, SEEK_SET);
1235 1236 m->file->seek(toO(length), SEEK_CUR);
1236   - if (!readToken(*m->file).isWord("endstream")) {
  1237 + if (!qpdf.readToken(*m->file).isWord("endstream")) {
1237 1238 QTC::TC("qpdf", "QPDF missing endstream");
1238   - throw damagedPDF("expected endstream");
  1239 + throw qpdf.damagedPDF("expected endstream");
1239 1240 }
1240 1241 } catch (QPDFExc& e) {
1241 1242 if (m->attempt_recovery) {
1242   - warn(e);
1243   - length = recoverStreamLength(m->file_sp, og, stream_offset);
  1243 + qpdf.warn(e);
  1244 + length = recover_stream_length(m->file_sp, og, stream_offset);
1244 1245 } else {
1245 1246 throw;
1246 1247 }
1247 1248 }
1248   - object = {QPDF_Stream::create(this, og, object, stream_offset, length)};
  1249 + object = {QPDF_Stream::create(&qpdf, og, object, stream_offset, length)};
1249 1250 }
1250 1251  
1251 1252 void
1252   -QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1253 +QPDF::Objects::validate_stream_line_end(
  1254 + QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1253 1255 {
1254 1256 // The PDF specification states that the word "stream" should be followed by either a carriage
1255 1257 // return and a newline or by a newline alone. It specifically disallowed following it by a
... ... @@ -1281,7 +1283,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset
1281 1283 // stream data in spite of not having seen a newline.
1282 1284 QTC::TC("qpdf", "QPDF stream with CR only");
1283 1285 m->file->unreadCh(ch);
1284   - warn(damagedPDF(
  1286 + qpdf.warn(qpdf.damagedPDF(
1285 1287 m->file->tell(), "stream keyword followed by carriage return only"));
1286 1288 }
1287 1289 }
... ... @@ -1290,28 +1292,29 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset
1290 1292 if (!QUtil::is_space(ch)) {
1291 1293 QTC::TC("qpdf", "QPDF stream without newline");
1292 1294 m->file->unreadCh(ch);
1293   - warn(damagedPDF(
  1295 + qpdf.warn(qpdf.damagedPDF(
1294 1296 m->file->tell(), "stream keyword not followed by proper line terminator"));
1295 1297 return;
1296 1298 }
1297   - warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
  1299 + qpdf.warn(
  1300 + qpdf.damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
1298 1301 }
1299 1302 }
1300 1303  
1301 1304 QPDFObjectHandle
1302   -QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
  1305 +QPDF::Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
1303 1306 {
1304 1307 m->last_object_description.erase(7); // last_object_description starts with "object "
1305 1308 m->last_object_description += std::to_string(obj);
1306 1309 m->last_object_description += " 0";
1307 1310  
1308 1311 bool empty = false;
1309   - auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true)
  1312 + auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, &qpdf, true)
1310 1313 .parse(empty, false);
1311 1314 if (empty) {
1312 1315 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1313 1316 // actual PDF files and Adobe Reader appears to ignore them.
1314   - warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null"));
  1317 + qpdf.warn(qpdf.damagedPDF(*input, input->getLastOffset(), "empty object treated as null"));
1315 1318 }
1316 1319 return object;
1317 1320 }
... ... @@ -1329,18 +1332,18 @@ QPDF::findEndstream()
1329 1332 }
1330 1333  
1331 1334 size_t
1332   -QPDF::recoverStreamLength(
1333   - std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
  1335 +QPDF::Objects::recover_stream_length(
  1336 + std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
1334 1337 {
1335 1338 // Try to reconstruct stream length by looking for endstream or endobj
1336   - warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));
  1339 + qpdf.warn(qpdf.damagedPDF(*input, stream_offset, "attempting to recover stream length"));
1337 1340  
1338   - PatternFinder ef(*this, &QPDF::findEndstream);
  1341 + PatternFinder ef(qpdf, &QPDF::findEndstream);
1339 1342 size_t length = 0;
1340 1343 if (m->file->findFirst("end", stream_offset, 0, ef)) {
1341 1344 length = toS(m->file->tell() - stream_offset);
1342 1345 // Reread endstream but, if it was endobj, don't skip that.
1343   - QPDFTokenizer::Token t = readToken(*m->file);
  1346 + QPDFTokenizer::Token t = qpdf.readToken(*m->file);
1344 1347 if (t.getValue() == "endobj") {
1345 1348 m->file->seek(m->file->getLastOffset(), SEEK_SET);
1346 1349 }
... ... @@ -1361,10 +1364,10 @@ QPDF::recoverStreamLength(
1361 1364 }
1362 1365  
1363 1366 if (length == 0) {
1364   - warn(damagedPDF(
  1367 + qpdf.warn(qpdf.damagedPDF(
1365 1368 *input, stream_offset, "unable to recover stream data; treating stream as empty"));
1366 1369 } else {
1367   - warn(damagedPDF(
  1370 + qpdf.warn(qpdf.damagedPDF(
1368 1371 *input, stream_offset, "recovered stream length: " + std::to_string(length)));
1369 1372 }
1370 1373  
... ... @@ -1373,7 +1376,7 @@ QPDF::recoverStreamLength(
1373 1376 }
1374 1377  
1375 1378 QPDFObjectHandle
1376   -QPDF::readObjectAtOffset(
  1379 +QPDF::Objects::read(
1377 1380 bool try_recovery,
1378 1381 qpdf_offset_t offset,
1379 1382 std::string const& description,
... ... @@ -1392,7 +1395,7 @@ QPDF::readObjectAtOffset(
1392 1395 check_og = false;
1393 1396 try_recovery = false;
1394 1397 }
1395   - setLastObjectDescription(description, exp_og);
  1398 + qpdf.setLastObjectDescription(description, exp_og);
1396 1399  
1397 1400 if (!m->attempt_recovery) {
1398 1401 try_recovery = false;
... ... @@ -1404,49 +1407,49 @@ QPDF::readObjectAtOffset(
1404 1407 // these.
1405 1408 if (offset == 0) {
1406 1409 QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1407   - warn(damagedPDF(0, "object has offset 0"));
  1410 + qpdf.warn(qpdf.damagedPDF(0, "object has offset 0"));
1408 1411 return QPDFObjectHandle::newNull();
1409 1412 }
1410 1413  
1411 1414 m->file->seek(offset, SEEK_SET);
1412 1415 try {
1413   - QPDFTokenizer::Token tobjid = readToken(*m->file);
  1416 + QPDFTokenizer::Token tobjid = qpdf.readToken(*m->file);
1414 1417 bool objidok = tobjid.isInteger();
1415 1418 QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
1416 1419 if (!objidok) {
1417 1420 QTC::TC("qpdf", "QPDF expected n n obj");
1418   - throw damagedPDF(offset, "expected n n obj");
  1421 + throw qpdf.damagedPDF(offset, "expected n n obj");
1419 1422 }
1420   - QPDFTokenizer::Token tgen = readToken(*m->file);
  1423 + QPDFTokenizer::Token tgen = qpdf.readToken(*m->file);
1421 1424 bool genok = tgen.isInteger();
1422 1425 QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
1423 1426 if (!genok) {
1424   - throw damagedPDF(offset, "expected n n obj");
  1427 + throw qpdf.damagedPDF(offset, "expected n n obj");
1425 1428 }
1426   - QPDFTokenizer::Token tobj = readToken(*m->file);
  1429 + QPDFTokenizer::Token tobj = qpdf.readToken(*m->file);
1427 1430  
1428 1431 bool objok = tobj.isWord("obj");
1429 1432 QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
1430 1433  
1431 1434 if (!objok) {
1432   - throw damagedPDF(offset, "expected n n obj");
  1435 + throw qpdf.damagedPDF(offset, "expected n n obj");
1433 1436 }
1434 1437 int objid = QUtil::string_to_int(tobjid.getValue().c_str());
1435 1438 int generation = QUtil::string_to_int(tgen.getValue().c_str());
1436 1439 og = QPDFObjGen(objid, generation);
1437 1440 if (objid == 0) {
1438 1441 QTC::TC("qpdf", "QPDF object id 0");
1439   - throw damagedPDF(offset, "object with ID 0");
  1442 + throw qpdf.damagedPDF(offset, "object with ID 0");
1440 1443 }
1441 1444 if (check_og && (exp_og != og)) {
1442 1445 QTC::TC("qpdf", "QPDF err wrong objid/generation");
1443   - QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
  1446 + QPDFExc e = qpdf.damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
1444 1447 if (try_recovery) {
1445 1448 // Will be retried below
1446 1449 throw e;
1447 1450 } else {
1448 1451 // We can try reading the object anyway even if the ID doesn't match.
1449   - warn(e);
  1452 + qpdf.warn(e);
1450 1453 }
1451 1454 }
1452 1455 } catch (QPDFExc& e) {
... ... @@ -1455,11 +1458,10 @@ QPDF::readObjectAtOffset(
1455 1458 m->xref_table.reconstruct(e);
1456 1459 if (m->xref_table.type(exp_og) == 1) {
1457 1460 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1458   - return readObjectAtOffset(
1459   - false, m->xref_table.offset(exp_og), description, exp_og, og, false);
  1461 + return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false);
1460 1462 } else {
1461 1463 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1462   - warn(damagedPDF(
  1464 + qpdf.warn(qpdf.damagedPDF(
1463 1465 "",
1464 1466 0,
1465 1467 ("object " + exp_og.unparse(' ') +
... ... @@ -1471,9 +1473,9 @@ QPDF::readObjectAtOffset(
1471 1473 }
1472 1474 }
1473 1475  
1474   - QPDFObjectHandle oh = readObject(description, og);
  1476 + QPDFObjectHandle oh = read_object(description, og);
1475 1477  
1476   - if (isUnresolved(og)) {
  1478 + if (unresolved(og)) {
1477 1479 // Store the object in the cache here so it gets cached whether we first know the offset or
1478 1480 // whether we first know the object ID and generation (in which we case we would get here
1479 1481 // through resolve).
... ... @@ -1492,7 +1494,7 @@ QPDF::readObjectAtOffset(
1492 1494 break;
1493 1495 }
1494 1496 } else {
1495   - throw damagedPDF(m->file->tell(), "EOF after endobj");
  1497 + throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj");
1496 1498 }
1497 1499 }
1498 1500 qpdf_offset_t end_after_space = m->file->tell();
... ... @@ -1526,7 +1528,7 @@ QPDF::readObjectAtOffset(
1526 1528 } else {
1527 1529 m->xref_table.linearization_offsets(
1528 1530 toS(og.getObj()), end_before_space, end_after_space);
1529   - updateCache(og, oh.getObj());
  1531 + update_table(og, oh.getObj());
1530 1532 }
1531 1533 }
1532 1534  
... ... @@ -1534,21 +1536,21 @@ QPDF::readObjectAtOffset(
1534 1536 }
1535 1537  
1536 1538 QPDFObject*
1537   -QPDF::resolve(QPDFObjGen og)
  1539 +QPDF::Objects::resolve(QPDFObjGen og)
1538 1540 {
1539   - if (!isUnresolved(og)) {
1540   - return m->objects.obj_cache[og].object.get();
  1541 + if (!unresolved(og)) {
  1542 + return obj_cache[og].object.get();
1541 1543 }
1542 1544  
1543 1545 if (m->resolving.count(og)) {
1544 1546 // This can happen if an object references itself directly or indirectly in some key that
1545 1547 // has to be resolved during object parsing, such as stream length.
1546 1548 QTC::TC("qpdf", "QPDF recursion loop in resolve");
1547   - warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1548   - updateCache(og, QPDF_Null::create());
1549   - return m->objects.obj_cache[og].object.get();
  1549 + qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
  1550 + update_table(og, QPDF_Null::create());
  1551 + return obj_cache[og].object.get();
1550 1552 }
1551   - ResolveRecorder rr(this, og);
  1553 + ResolveRecorder rr(&qpdf, og);
1552 1554  
1553 1555 try {
1554 1556 switch (m->xref_table.type(og)) {
... ... @@ -1558,8 +1560,7 @@ QPDF::resolve(QPDFObjGen og)
1558 1560 {
1559 1561 // Object stored in cache by readObjectAtOffset
1560 1562 QPDFObjGen a_og;
1561   - QPDFObjectHandle oh =
1562   - readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
  1563 + QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false);
1563 1564 }
1564 1565 break;
1565 1566  
... ... @@ -1568,50 +1569,50 @@ QPDF::resolve(QPDFObjGen og)
1568 1569 break;
1569 1570  
1570 1571 default:
1571   - throw damagedPDF(
  1572 + throw qpdf.damagedPDF(
1572 1573 "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1573 1574 }
1574 1575 } catch (QPDFExc& e) {
1575   - warn(e);
  1576 + qpdf.warn(e);
1576 1577 } catch (std::exception& e) {
1577   - warn(damagedPDF(
  1578 + qpdf.warn(qpdf.damagedPDF(
1578 1579 "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1579 1580 }
1580 1581  
1581   - if (isUnresolved(og)) {
  1582 + if (unresolved(og)) {
1582 1583 // PDF spec says unknown objects resolve to the null object.
1583 1584 QTC::TC("qpdf", "QPDF resolve failure to null");
1584   - updateCache(og, QPDF_Null::create());
  1585 + update_table(og, QPDF_Null::create());
1585 1586 }
1586 1587  
1587   - auto result(m->objects.obj_cache[og].object);
1588   - result->setDefaultDescription(this, og);
  1588 + auto result(obj_cache[og].object);
  1589 + result->setDefaultDescription(&qpdf, og);
1589 1590 return result.get();
1590 1591 }
1591 1592  
1592 1593 void
1593   -QPDF::resolveObjectsInStream(int obj_stream_number)
  1594 +QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
1594 1595 {
1595 1596 if (m->resolved_object_streams.count(obj_stream_number)) {
1596 1597 return;
1597 1598 }
1598 1599 m->resolved_object_streams.insert(obj_stream_number);
1599 1600 // Force resolution of object stream
1600   - QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);
  1601 + QPDFObjectHandle obj_stream = qpdf.getObject(obj_stream_number, 0);
1601 1602 if (!obj_stream.isStream()) {
1602   - throw damagedPDF(
  1603 + throw qpdf.damagedPDF(
1603 1604 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
1604 1605 }
1605 1606  
1606 1607 QPDFObjectHandle dict = obj_stream.getDict();
1607 1608 if (!dict.isDictionaryOfType("/ObjStm")) {
1608 1609 QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
1609   - warn(damagedPDF(
  1610 + qpdf.warn(qpdf.damagedPDF(
1610 1611 "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));
1611 1612 }
1612 1613  
1613 1614 if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {
1614   - throw damagedPDF(
  1615 + throw qpdf.damagedPDF(
1615 1616 ("object stream " + std::to_string(obj_stream_number) + " has incorrect keys"));
1616 1617 }
1617 1618  
... ... @@ -1629,8 +1630,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1629 1630  
1630 1631 qpdf_offset_t last_offset = -1;
1631 1632 for (int i = 0; i < n; ++i) {
1632   - QPDFTokenizer::Token tnum = readToken(*input);
1633   - QPDFTokenizer::Token toffset = readToken(*input);
  1633 + QPDFTokenizer::Token tnum = qpdf.readToken(*input);
  1634 + QPDFTokenizer::Token toffset = qpdf.readToken(*input);
1634 1635 if (!(tnum.isInteger() && toffset.isInteger())) {
1635 1636 throw damagedPDF(
1636 1637 *input,
... ... @@ -1646,7 +1647,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1646 1647 }
1647 1648 if (num == obj_stream_number) {
1648 1649 QTC::TC("qpdf", "QPDF ignore self-referential object stream");
1649   - warn(damagedPDF(
  1650 + qpdf.warn(damagedPDF(
1650 1651 *input,
1651 1652 m->last_object_description,
1652 1653 input->getLastOffset(),
... ... @@ -1678,7 +1679,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1678 1679 int offset = iter.second;
1679 1680 input->seek(offset, SEEK_SET);
1680 1681 QPDFObjectHandle oh = readObjectInStream(input, iter.first);
1681   - updateCache(og, oh.getObj());
  1682 + update_table(og, oh.getObj());
1682 1683 } else {
1683 1684 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
1684 1685 }
... ... @@ -1686,33 +1687,33 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1686 1687 }
1687 1688  
1688 1689 void
1689   -QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)
  1690 +QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
1690 1691 {
1691   - object->setObjGen(this, og);
1692   - if (isCached(og)) {
1693   - auto& cache = m->objects.obj_cache[og];
  1692 + object->setObjGen(&qpdf, og);
  1693 + if (cached(og)) {
  1694 + auto& cache = obj_cache[og];
1694 1695 cache.object->assign(object);
1695 1696 } else {
1696   - m->objects.obj_cache[og] = ObjCache(object);
  1697 + obj_cache[og] = ObjCache(object);
1697 1698 }
1698 1699 }
1699 1700  
1700 1701 bool
1701   -QPDF::isCached(QPDFObjGen const& og)
  1702 +QPDF::Objects::cached(QPDFObjGen og)
1702 1703 {
1703   - return m->objects.obj_cache.count(og) != 0;
  1704 + return obj_cache.count(og) != 0;
1704 1705 }
1705 1706  
1706 1707 bool
1707   -QPDF::isUnresolved(QPDFObjGen const& og)
  1708 +QPDF::Objects::unresolved(QPDFObjGen og)
1708 1709 {
1709   - return !isCached(og) || m->objects.obj_cache[og].object->isUnresolved();
  1710 + return !cached(og) || obj_cache[og].object->isUnresolved();
1710 1711 }
1711 1712  
1712 1713 QPDFObjGen
1713   -QPDF::nextObjGen()
  1714 +QPDF::Objects::next_id()
1714 1715 {
1715   - int max_objid = toI(getObjectCount());
  1716 + int max_objid = toI(qpdf.getObjectCount());
1716 1717 if (max_objid == std::numeric_limits<int>::max()) {
1717 1718 throw std::range_error("max object id is too high to create new objects");
1718 1719 }
... ... @@ -1720,41 +1721,40 @@ QPDF::nextObjGen()
1720 1721 }
1721 1722  
1722 1723 QPDFObjectHandle
1723   -QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
  1724 +QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
1724 1725 {
1725   - QPDFObjGen next{nextObjGen()};
1726   - m->objects.obj_cache[next] = ObjCache(obj);
1727   - return newIndirect(next, m->objects.obj_cache[next].object);
  1726 + QPDFObjGen next{next_id()};
  1727 + obj_cache[next] = ObjCache(obj);
  1728 + return qpdf.newIndirect(next, obj_cache[next].object);
1728 1729 }
1729 1730  
1730 1731 std::shared_ptr<QPDFObject>
1731   -QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
  1732 +QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
1732 1733 {
1733 1734 // This method is called by the parser and therefore must not resolve any objects.
1734 1735 auto og = QPDFObjGen(id, gen);
1735   - if (auto iter = m->objects.obj_cache.find(og); iter != m->objects.obj_cache.end()) {
  1736 + if (auto iter = obj_cache.find(og); iter != obj_cache.end()) {
1736 1737 return iter->second.object;
1737 1738 }
1738 1739 if (m->xref_table.type(og) || !m->xref_table.initialized()) {
1739   - return m->objects.obj_cache.insert({og, QPDF_Unresolved::create(this, og)})
1740   - .first->second.object;
  1740 + return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object;
1741 1741 }
1742 1742 if (parse_pdf) {
1743 1743 return QPDF_Null::create();
1744 1744 }
1745   - return m->objects.obj_cache.insert({og, QPDF_Null::create(this, og)}).first->second.object;
  1745 + return obj_cache.insert({og, QPDF_Null::create(&qpdf, og)}).first->second.object;
1746 1746 }
1747 1747  
1748 1748 std::shared_ptr<QPDFObject>
1749   -QPDF::getObjectForJSON(int id, int gen)
  1749 +QPDF::Objects::get_for_json(int id, int gen)
1750 1750 {
1751 1751 auto og = QPDFObjGen(id, gen);
1752   - auto [it, inserted] = m->objects.obj_cache.try_emplace(og);
  1752 + auto [it, inserted] = obj_cache.try_emplace(og);
1753 1753 auto& obj = it->second.object;
1754 1754 if (inserted) {
1755 1755 obj = (m->xref_table.initialized() && !m->xref_table.type(og))
1756   - ? QPDF_Null::create(this, og)
1757   - : QPDF_Unresolved::create(this, og);
  1756 + ? QPDF_Null::create(&qpdf, og)
  1757 + : QPDF_Unresolved::create(&qpdf, og);
1758 1758 }
1759 1759 return obj;
1760 1760 }
... ... @@ -1766,17 +1766,17 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
1766 1766 QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
1767 1767 throw std::logic_error("QPDF::replaceObject called with indirect object handle");
1768 1768 }
1769   - updateCache(og, oh.getObj());
  1769 + objects().update_table(og, oh.getObj());
1770 1770 }
1771 1771  
1772 1772 void
1773   -QPDF::removeObject(QPDFObjGen og)
  1773 +QPDF::Objects::erase(QPDFObjGen og)
1774 1774 {
1775   - if (auto cached = m->objects.obj_cache.find(og); cached != m->objects.obj_cache.end()) {
  1775 + if (auto cached = obj_cache.find(og); cached != obj_cache.end()) {
1776 1776 // Take care of any object handles that may be floating around.
1777 1777 cached->second.object->assign(QPDF_Null::create());
1778 1778 cached->second.object->setObjGen(nullptr, QPDFObjGen());
1779   - m->objects.obj_cache.erase(cached);
  1779 + obj_cache.erase(cached);
1780 1780 }
1781 1781 }
1782 1782  
... ... @@ -1784,13 +1784,13 @@ void
1784 1784 QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
1785 1785 {
1786 1786 // Force objects to be read from the input source if needed, then swap them in the cache.
1787   - resolve(og1);
1788   - resolve(og2);
  1787 + m->objects.resolve(og1);
  1788 + m->objects.resolve(og2);
1789 1789 m->objects.obj_cache[og1].object->swapWith(m->objects.obj_cache[og2].object);
1790 1790 }
1791 1791  
1792 1792 size_t
1793   -QPDF::tableSize()
  1793 +QPDF::Objects::table_size()
1794 1794 {
1795 1795 // If obj_cache is dense, accommodate all object in tables,else accommodate only original
1796 1796 // objects.
... ... @@ -1798,35 +1798,35 @@ QPDF::tableSize()
1798 1798 if (max_xref > 0) {
1799 1799 --max_xref;
1800 1800 }
1801   - auto max_obj = m->objects.obj_cache.size() ? m->objects.obj_cache.crbegin()->first.getObj() : 0;
  1801 + auto max_obj = obj_cache.size() ? obj_cache.crbegin()->first.getObj() : 0;
1802 1802 auto max_id = std::numeric_limits<int>::max() - 1;
1803 1803 if (max_obj >= max_id || max_xref >= max_id) {
1804 1804 // Temporary fix. Long-term solution is
1805 1805 // - QPDFObjGen to enforce objgens are valid and sensible
1806 1806 // - xref table and obj cache to protect against insertion of impossibly large obj ids
1807   - stopOnError("Impossibly large object id encountered.");
  1807 + qpdf.stopOnError("Impossibly large object id encountered.");
1808 1808 }
1809   - if (max_obj < 1.1 * std::max(toI(m->objects.obj_cache.size()), max_xref)) {
  1809 + if (max_obj < 1.1 * std::max(toI(obj_cache.size()), max_xref)) {
1810 1810 return toS(++max_obj);
1811 1811 }
1812 1812 return toS(++max_xref);
1813 1813 }
1814 1814  
1815 1815 std::vector<QPDFObjGen>
1816   -QPDF::getCompressibleObjVector()
  1816 +QPDF::Objects::compressible_vector()
1817 1817 {
1818   - return getCompressibleObjGens<QPDFObjGen>();
  1818 + return compressible<QPDFObjGen>();
1819 1819 }
1820 1820  
1821 1821 std::vector<bool>
1822   -QPDF::getCompressibleObjSet()
  1822 +QPDF::Objects::compressible_set()
1823 1823 {
1824   - return getCompressibleObjGens<bool>();
  1824 + return compressible<bool>();
1825 1825 }
1826 1826  
1827 1827 template <typename T>
1828 1828 std::vector<T>
1829   -QPDF::getCompressibleObjGens()
  1829 +QPDF::Objects::compressible()
1830 1830 {
1831 1831 // Return a list of objects that are allowed to be in object streams. Walk through the objects
1832 1832 // by traversing the document from the root, including a traversal of the pages tree. This
... ... @@ -1838,14 +1838,14 @@ QPDF::getCompressibleObjGens()
1838 1838 QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
1839 1839 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1840 1840  
1841   - const size_t max_obj = getObjectCount();
  1841 + const size_t max_obj = qpdf.getObjectCount();
1842 1842 std::vector<bool> visited(max_obj, false);
1843 1843 std::vector<QPDFObjectHandle> queue;
1844 1844 queue.reserve(512);
1845 1845 queue.push_back(m->xref_table.trailer());
1846 1846 std::vector<T> result;
1847 1847 if constexpr (std::is_same_v<T, QPDFObjGen>) {
1848   - result.reserve(m->objects.obj_cache.size());
  1848 + result.reserve(obj_cache.size());
1849 1849 } else if constexpr (std::is_same_v<T, bool>) {
1850 1850 result.resize(max_obj + 1U, false);
1851 1851 } else {
... ... @@ -1869,9 +1869,9 @@ QPDF::getCompressibleObjGens()
1869 1869 // Check whether this is the current object. If not, remove it (which changes it into a
1870 1870 // direct null and therefore stops us from revisiting it) and move on to the next object
1871 1871 // in the queue.
1872   - auto upper = m->objects.obj_cache.upper_bound(og);
1873   - if (upper != m->objects.obj_cache.end() && upper->first.getObj() == og.getObj()) {
1874   - removeObject(og);
  1872 + auto upper = obj_cache.upper_bound(og);
  1873 + if (upper != obj_cache.end() && upper->first.getObj() == og.getObj()) {
  1874 + erase(og);
1875 1875 continue;
1876 1876 }
1877 1877  
... ...
libqpdf/qpdf/QPDF_objects.hh
... ... @@ -8,11 +8,54 @@
8 8 class QPDF::Objects
9 9 {
10 10 public:
11   - Objects(QPDF& qpdf, QPDF::Members* m)
  11 + Objects(QPDF& qpdf, QPDF::Members* m) :
  12 + qpdf(qpdf),
  13 + m(m)
12 14 {
13 15 }
14 16  
15 17 std::map<QPDFObjGen, ObjCache> obj_cache;
  18 +
  19 + QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
  20 + QPDFObjectHandle read(
  21 + bool attempt_recovery,
  22 + qpdf_offset_t offset,
  23 + std::string const& description,
  24 + QPDFObjGen exp_og,
  25 + QPDFObjGen& og,
  26 + bool skip_cache_if_in_xref);
  27 + QPDFObject* resolve(QPDFObjGen og);
  28 + void resolveObjectsInStream(int obj_stream_number);
  29 + void update_table(QPDFObjGen og, std::shared_ptr<QPDFObject> const& object);
  30 + QPDFObjGen next_id();
  31 + QPDFObjectHandle make_indirect(std::shared_ptr<QPDFObject> const& obj);
  32 + std::shared_ptr<QPDFObject> get_for_parser(int id, int gen, bool parse_pdf);
  33 + std::shared_ptr<QPDFObject> get_for_json(int id, int gen);
  34 +
  35 + // Get a list of objects that would be permitted in an object stream.
  36 + template <typename T>
  37 + std::vector<T> compressible();
  38 + std::vector<QPDFObjGen> compressible_vector();
  39 + std::vector<bool> compressible_set();
  40 +
  41 + // Used by QPDFWriter to determine the vector part of its object tables.
  42 + size_t table_size();
  43 +
  44 + private:
  45 + friend class QPDF::Xref_table;
  46 +
  47 + void erase(QPDFObjGen og);
  48 + bool cached(QPDFObjGen og);
  49 + bool unresolved(QPDFObjGen og);
  50 +
  51 + QPDFObjectHandle read_object(std::string const& description, QPDFObjGen og);
  52 + void read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  53 + void validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
  54 + size_t recover_stream_length(
  55 + std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
  56 +
  57 + QPDF& qpdf;
  58 + QPDF::Members* m;
16 59 }; // Objects
17 60  
18 61 #endif // QPDF_OBJECTS_HH
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -13,6 +13,7 @@ class QPDF::Xref_table
13 13 public:
14 14 Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) :
15 15 qpdf(qpdf),
  16 + objects(objects),
16 17 file(file)
17 18 {
18 19 tokenizer.allowEOF();
... ... @@ -340,6 +341,7 @@ class QPDF::Xref_table
340 341 }
341 342  
342 343 QPDF& qpdf;
  344 + QPDF::Objects& objects;
343 345 InputSource* const& file;
344 346 QPDFTokenizer tokenizer;
345 347  
... ... @@ -362,21 +364,6 @@ class QPDF::Xref_table
362 364 qpdf_offset_t first_item_offset_{0}; // actual value from file
363 365 };
364 366  
365   -// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
366   -// references.
367   -class QPDF::Resolver
368   -{
369   - friend class QPDFObject;
370   - friend class QPDF_Unresolved;
371   -
372   - private:
373   - static QPDFObject*
374   - resolved(QPDF* qpdf, QPDFObjGen og)
375   - {
376   - return qpdf->resolve(og);
377   - }
378   -};
379   -
380 367 // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
381 368 class QPDF::StreamCopier
382 369 {
... ... @@ -408,7 +395,7 @@ class QPDF::ParseGuard
408 395 static std::shared_ptr<QPDFObject>
409 396 getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
410 397 {
411   - return qpdf->getObjectForParser(id, gen, parse_pdf);
  398 + return qpdf->objects().get_for_parser(id, gen, parse_pdf);
412 399 }
413 400  
414 401 ~ParseGuard()
... ... @@ -803,11 +790,32 @@ class QPDF::Members
803 790 };
804 791  
805 792 inline QPDF::Objects&
806   -QPDF::objects()
  793 +QPDF::objects() noexcept
  794 +{
  795 + return m->objects;
  796 +}
  797 +
  798 +inline QPDF::Objects const&
  799 +QPDF::objects() const noexcept
807 800 {
808 801 return m->objects;
809 802 }
810 803  
  804 +// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
  805 +// references.
  806 +class QPDF::Resolver
  807 +{
  808 + friend class QPDFObject;
  809 + friend class QPDF_Unresolved;
  810 +
  811 + private:
  812 + static QPDFObject*
  813 + resolved(QPDF* qpdf, QPDFObjGen og)
  814 + {
  815 + return qpdf->m->objects.resolve(og);
  816 + }
  817 +};
  818 +
811 819 // JobSetter class is restricted to QPDFJob.
812 820 class QPDF::JobSetter
813 821 {
... ... @@ -884,13 +892,13 @@ class QPDF::Writer
884 892 static std::vector<QPDFObjGen>
885 893 getCompressibleObjGens(QPDF& qpdf)
886 894 {
887   - return qpdf.getCompressibleObjVector();
  895 + return qpdf.objects().compressible_vector();
888 896 }
889 897  
890 898 static std::vector<bool>
891 899 getCompressibleObjSet(QPDF& qpdf)
892 900 {
893   - return qpdf.getCompressibleObjSet();
  901 + return qpdf.objects().compressible_set();
894 902 }
895 903  
896 904 static Xref_table const&
... ... @@ -902,7 +910,7 @@ class QPDF::Writer
902 910 static size_t
903 911 tableSize(QPDF& qpdf)
904 912 {
905   - return qpdf.tableSize();
  913 + return qpdf.objects().table_size();
906 914 }
907 915 };
908 916  
... ...