OpenSystemsDevelopment / qpdf

Browse Code »

Commit 9f0cc086b158b477ad942eb159b929b7b9ae7a0b

Authored by m-holger 2024-10-06 17:45:29 +0100

1 parent 12b67a32

Copy QPDF.cc to new QPDF_objects

Inline Side-by-side

Showing 1 changed file with 2997 additions and 0 deletions

libqpdf/QPDF_objects.cc 0 → 100644

View file @9f0cc08

		1	+#include <qpdf/qpdf-config.h> // include first for large file support
		2	+
		3	+#include <qpdf/QPDF_private.hh>
		4	+
		5	+#include <array>
		6	+#include <atomic>
		7	+#include <cstring>
		8	+#include <limits>
		9	+#include <map>
		10	+#include <regex>
		11	+#include <sstream>
		12	+#include <vector>
		13	+
		14	+#include <qpdf/BufferInputSource.hh>
		15	+#include <qpdf/FileInputSource.hh>
		16	+#include <qpdf/OffsetInputSource.hh>
		17	+#include <qpdf/Pipeline.hh>
		18	+#include <qpdf/QPDFExc.hh>
		19	+#include <qpdf/QPDFLogger.hh>
		20	+#include <qpdf/QPDFObject_private.hh>
		21	+#include <qpdf/QPDFParser.hh>
		22	+#include <qpdf/QPDF_Array.hh>
		23	+#include <qpdf/QPDF_Dictionary.hh>
		24	+#include <qpdf/QPDF_Null.hh>
		25	+#include <qpdf/QPDF_Reserved.hh>
		26	+#include <qpdf/QPDF_Stream.hh>
		27	+#include <qpdf/QPDF_Unresolved.hh>
		28	+#include <qpdf/QTC.hh>
		29	+#include <qpdf/QUtil.hh>
		30	+
		31	+// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
		32	+// being static as well.
		33	+std::string const QPDF::qpdf_version(QPDF_VERSION);
		34	+
		35	+namespace
		36	+{
		37	+ class InvalidInputSource final: public InputSource
		38	+ {
		39	+ public:
		40	+ InvalidInputSource(std::string const& name) :
		41	+ name(name)
		42	+ {
		43	+ }
		44	+ ~InvalidInputSource() final = default;
		45	+ qpdf_offset_t
		46	+ findAndSkipNextEOL() final
		47	+ {
		48	+ throwException();
		49	+ return 0;
		50	+ }
		51	+ std::string const&
		52	+ getName() const final
		53	+ {
		54	+ return name;
		55	+ }
		56	+ qpdf_offset_t
		57	+ tell() final
		58	+ {
		59	+ throwException();
		60	+ return 0;
		61	+ }
		62	+ void
		63	+ seek(qpdf_offset_t offset, int whence) final
		64	+ {
		65	+ throwException();
		66	+ }
		67	+ void
		68	+ rewind() final
		69	+ {
		70	+ throwException();
		71	+ }
		72	+ size_t
		73	+ read(char* buffer, size_t length) final
		74	+ {
		75	+ throwException();
		76	+ return 0;
		77	+ }
		78	+ void
		79	+ unreadCh(char ch) final
		80	+ {
		81	+ throwException();
		82	+ }
		83	+
		84	+ private:
		85	+ void
		86	+ throwException()
		87	+ {
		88	+ throw std::logic_error("QPDF operation attempted on a QPDF object with no input "
		89	+ "source. QPDF operations are invalid before processFile (or "
		90	+ "another process method) or after closeInputSource");
		91	+ }
		92	+
		93	+ std::string const& name;
		94	+ };
		95	+} // namespace
		96	+
		97	+QPDF::ForeignStreamData::ForeignStreamData(
		98	+ std::shared_ptr<EncryptionParameters> encp,
		99	+ std::shared_ptr<InputSource> file,
		100	+ QPDFObjGen const& foreign_og,
		101	+ qpdf_offset_t offset,
		102	+ size_t length,
		103	+ QPDFObjectHandle local_dict) :
		104	+ encp(encp),
		105	+ file(file),
		106	+ foreign_og(foreign_og),
		107	+ offset(offset),
		108	+ length(length),
		109	+ local_dict(local_dict)
		110	+{
		111	+}
		112	+
		113	+QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
		114	+ QPDFObjectHandle::StreamDataProvider(true),
		115	+ destination_qpdf(destination_qpdf)
		116	+{
		117	+}
		118	+
		119	+bool
		120	+QPDF::CopiedStreamDataProvider::provideStreamData(
		121	+ QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
		122	+{
		123	+ std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
		124	+ bool result = false;
		125	+ if (foreign_data.get()) {
		126	+ result = destination_qpdf.pipeForeignStreamData(
		127	+ foreign_data, pipeline, suppress_warnings, will_retry);
		128	+ QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
		129	+ } else {
		130	+ auto foreign_stream = foreign_streams[og];
		131	+ result = foreign_stream.pipeStreamData(
		132	+ pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
		133	+ QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
		134	+ }
		135	+ return result;
		136	+}
		137	+
		138	+void
		139	+QPDF::CopiedStreamDataProvider::registerForeignStream(
		140	+ QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
		141	+{
		142	+ this->foreign_streams[local_og] = foreign_stream;
		143	+}
		144	+
		145	+void
		146	+QPDF::CopiedStreamDataProvider::registerForeignStream(
		147	+ QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
		148	+{
		149	+ this->foreign_stream_data[local_og] = foreign_stream;
		150	+}
		151	+
		152	+QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen const& og) :
		153	+ qpdf(qpdf),
		154	+ og(og)
		155	+{
		156	+}
		157	+
		158	+void
		159	+QPDF::StringDecrypter::decryptString(std::string& val)
		160	+{
		161	+ qpdf->decryptString(val, og);
		162	+}
		163	+
		164	+std::string const&
		165	+QPDF::QPDFVersion()
		166	+{
		167	+ // The C API relies on this being a static value.
		168	+ return QPDF::qpdf_version;
		169	+}
		170	+
		171	+QPDF::EncryptionParameters::EncryptionParameters() :
		172	+ encrypted(false),
		173	+ encryption_initialized(false),
		174	+ encryption_V(0),
		175	+ encryption_R(0),
		176	+ encrypt_metadata(true),
		177	+ cf_stream(e_none),
		178	+ cf_string(e_none),
		179	+ cf_file(e_none),
		180	+ user_password_matched(false),
		181	+ owner_password_matched(false)
		182	+{
		183	+}
		184	+
		185	+QPDF::Members::Members(QPDF& qpdf) :
		186	+ log(QPDFLogger::defaultLogger()),
		187	+ file_sp(new InvalidInputSource(no_input_name)),
		188	+ file(file_sp.get()),
		189	+ encp(new EncryptionParameters),
		190	+ xref_table(qpdf, file)
		191	+{
		192	+}
		193	+
		194	+QPDF::QPDF() :
		195	+ m(new Members(*this))
		196	+{
		197	+ m->tokenizer.allowEOF();
		198	+ // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
		199	+ // the lifetime of this running application.
		200	+ static std::atomic<unsigned long long> unique_id{0};
		201	+ m->unique_id = unique_id.fetch_add(1ULL);
		202	+}
		203	+
		204	+QPDF::~QPDF()
		205	+{
		206	+ // If two objects are mutually referential (through each object having an array or dictionary
		207	+ // that contains an indirect reference to the other), the circular references in the
		208	+ // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
		209	+ // in the object cache, which is those objects that we read from the file, and break all
		210	+ // resolved indirect references by replacing them with an internal object type representing that
		211	+ // they have been destroyed. Note that we can't break references like this at any time when the
		212	+ // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
		213	+ // are reachable from this object to release their association with this QPDF. Direct objects
		214	+ // are not destroyed since they can be moved to other QPDF objects safely.
		215	+
		216	+ for (auto const& iter: m->obj_cache) {
		217	+ iter.second.object->disconnect();
		218	+ if (iter.second.object->getTypeCode() != ::ot_null) {
		219	+ iter.second.object->destroy();
		220	+ }
		221	+ }
		222	+}
		223	+
		224	+std::shared_ptr<QPDF>
		225	+QPDF::create()
		226	+{
		227	+ return std::make_shared<QPDF>();
		228	+}
		229	+
		230	+void
		231	+QPDF::processFile(char const* filename, char const* password)
		232	+{
		233	+ auto* fi = new FileInputSource(filename);
		234	+ processInputSource(std::shared_ptr<InputSource>(fi), password);
		235	+}
		236	+
		237	+void
		238	+QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
		239	+{
		240	+ auto* fi = new FileInputSource(description, filep, close_file);
		241	+ processInputSource(std::shared_ptr<InputSource>(fi), password);
		242	+}
		243	+
		244	+void
		245	+QPDF::processMemoryFile(
		246	+ char const* description, char const* buf, size_t length, char const* password)
		247	+{
		248	+ processInputSource(
		249	+ std::shared_ptr<InputSource>(
		250	+ // line-break
		251	+ new BufferInputSource(
		252	+ description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
		253	+ password);
		254	+}
		255	+
		256	+void
		257	+QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
		258	+{
		259	+ m->file_sp = source;
		260	+ m->file = source.get();
		261	+ parse(password);
		262	+}
		263	+
		264	+void
		265	+QPDF::closeInputSource()
		266	+{
		267	+ m->no_input_name = "closed input source";
		268	+ m->file_sp = std::shared_ptr<InputSource>(new InvalidInputSource(m->no_input_name));
		269	+ m->file = m->file_sp.get();
		270	+}
		271	+
		272	+void
		273	+QPDF::setPasswordIsHexKey(bool val)
		274	+{
		275	+ m->provided_password_is_hex_key = val;
		276	+}
		277	+
		278	+void
		279	+QPDF::emptyPDF()
		280	+{
		281	+ m->pdf_version = "1.3";
		282	+ m->no_input_name = "empty PDF";
		283	+ m->xref_table.initialize_empty();
		284	+}
		285	+
		286	+void
		287	+QPDF::registerStreamFilter(
		288	+ std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
		289	+{
		290	+ QPDF_Stream::registerStreamFilter(filter_name, factory);
		291	+}
		292	+
		293	+void
		294	+QPDF::setIgnoreXRefStreams(bool val)
		295	+{
		296	+ m->xref_table.ignore_streams(val);
		297	+}
		298	+
		299	+std::shared_ptr<QPDFLogger>
		300	+QPDF::getLogger()
		301	+{
		302	+ return m->log;
		303	+}
		304	+
		305	+void
		306	+QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
		307	+{
		308	+ m->log = l;
		309	+}
		310	+
		311	+void
		312	+QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
		313	+{
		314	+ setLogger(QPDFLogger::create());
		315	+ m->log->setOutputStreams(out, err);
		316	+}
		317	+
		318	+void
		319	+QPDF::setSuppressWarnings(bool val)
		320	+{
		321	+ m->suppress_warnings = val;
		322	+}
		323	+
		324	+void
		325	+QPDF::setMaxWarnings(size_t val)
		326	+{
		327	+ m->max_warnings = val;
		328	+}
		329	+
		330	+void
		331	+QPDF::setAttemptRecovery(bool val)
		332	+{
		333	+ m->attempt_recovery = val;
		334	+ m->xref_table.attempt_recovery(val);
		335	+}
		336	+
		337	+void
		338	+QPDF::setImmediateCopyFrom(bool val)
		339	+{
		340	+ m->immediate_copy_from = val;
		341	+}
		342	+
		343	+std::vector<QPDFExc>
		344	+QPDF::getWarnings()
		345	+{
		346	+ std::vector<QPDFExc> result = m->warnings;
		347	+ m->warnings.clear();
		348	+ return result;
		349	+}
		350	+
		351	+bool
		352	+QPDF::anyWarnings() const
		353	+{
		354	+ return !m->warnings.empty();
		355	+}
		356	+
		357	+size_t
		358	+QPDF::numWarnings() const
		359	+{
		360	+ return m->warnings.size();
		361	+}
		362	+
		363	+bool
		364	+QPDF::validatePDFVersion(char const*& p, std::string& version)
		365	+{
		366	+ bool valid = QUtil::is_digit(*p);
		367	+ if (valid) {
		368	+ while (QUtil::is_digit(*p)) {
		369	+ version.append(1, *p++);
		370	+ }
		371	+ if ((p == '.') && QUtil::is_digit((p + 1))) {
		372	+ version.append(1, *p++);
		373	+ while (QUtil::is_digit(*p)) {
		374	+ version.append(1, *p++);
		375	+ }
		376	+ } else {
		377	+ valid = false;
		378	+ }
		379	+ }
		380	+ return valid;
		381	+}
		382	+
		383	+bool
		384	+QPDF::findHeader()
		385	+{
		386	+ qpdf_offset_t global_offset = m->file->tell();
		387	+ std::string line = m->file->readLine(1024);
		388	+ char const* p = line.c_str();
		389	+ if (strncmp(p, "%PDF-", 5) != 0) {
		390	+ throw std::logic_error("findHeader is not looking at %PDF-");
		391	+ }
		392	+ p += 5;
		393	+ std::string version;
		394	+ // Note: The string returned by line.c_str() is always null-terminated. The code below never
		395	+ // overruns the buffer because a null character always short-circuits further advancement.
		396	+ bool valid = validatePDFVersion(p, version);
		397	+ if (valid) {
		398	+ m->pdf_version = version;
		399	+ if (global_offset != 0) {
		400	+ // Empirical evidence strongly suggests that when there is leading material prior to the
		401	+ // PDF header, all explicit offsets in the file are such that 0 points to the beginning
		402	+ // of the header.
		403	+ QTC::TC("qpdf", "QPDF global offset");
		404	+ m->file_sp =
		405	+ std::shared_ptr<InputSource>(new OffsetInputSource(m->file_sp, global_offset));
		406	+ m->file = m->file_sp.get();
		407	+ }
		408	+ }
		409	+ return valid;
		410	+}
		411	+
		412	+bool
		413	+QPDF::findStartxref()
		414	+{
		415	+ if (readToken(m->file).isWord("startxref") && readToken(m->file).isInteger()) {
		416	+ // Position in front of offset token
		417	+ m->file->seek(m->file->getLastOffset(), SEEK_SET);
		418	+ return true;
		419	+ }
		420	+ return false;
		421	+}
		422	+
		423	+void
		424	+QPDF::parse(char const* password)
		425	+{
		426	+ if (password) {
		427	+ m->encp->provided_password = password;
		428	+ }
		429	+
		430	+ // Find the header anywhere in the first 1024 bytes of the file.
		431	+ PatternFinder hf(*this, &QPDF::findHeader);
		432	+ if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
		433	+ QTC::TC("qpdf", "QPDF not a pdf file");
		434	+ warn(damagedPDF("", 0, "can't find PDF header"));
		435	+ // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
		436	+ m->pdf_version = "1.2";
		437	+ }
		438	+
		439	+ m->xref_table.initialize();
		440	+ initializeEncryption();
		441	+ if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
		442	+ // QPDFs created from JSON have an empty xref table and no root object yet.
		443	+ throw damagedPDF("", 0, "unable to find page tree");
		444	+ }
		445	+}
		446	+
		447	+void
		448	+QPDF::inParse(bool v)
		449	+{
		450	+ if (m->in_parse == v) {
		451	+ // This happens if QPDFParser::parse tries to resolve an indirect object while it is
		452	+ // parsing.
		453	+ throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."
		454	+ " Please report at https://github.com/qpdf/qpdf/issues.");
		455	+ }
		456	+ m->in_parse = v;
		457	+}
		458	+
		459	+void
		460	+QPDF::warn(QPDFExc const& e)
		461	+{
		462	+ if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
		463	+ stopOnError("Too many warnings - file is too badly damaged");
		464	+ }
		465	+ m->warnings.push_back(e);
		466	+ if (!m->suppress_warnings) {
		467	+ *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
		468	+ }
		469	+}
		470	+
		471	+void
		472	+QPDF::warn(
		473	+ qpdf_error_code_e error_code,
		474	+ std::string const& object,
		475	+ qpdf_offset_t offset,
		476	+ std::string const& message)
		477	+{
		478	+ warn(QPDFExc(error_code, getFilename(), object, offset, message));
		479	+}
		480	+
		481	+void
		482	+QPDF::Xref_table::initialize_empty()
		483	+{
		484	+ initialized_ = true;
		485	+ trailer_ = QPDFObjectHandle::newDictionary();
		486	+ auto rt = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
		487	+ auto pgs = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
		488	+ pgs.replaceKey("/Type", QPDFObjectHandle::newName("/Pages"));
		489	+ pgs.replaceKey("/Kids", QPDFObjectHandle::newArray());
		490	+ pgs.replaceKey("/Count", QPDFObjectHandle::newInteger(0));
		491	+ rt.replaceKey("/Type", QPDFObjectHandle::newName("/Catalog"));
		492	+ rt.replaceKey("/Pages", pgs);
		493	+ trailer_.replaceKey("/Root", rt);
		494	+ trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(3));
		495	+}
		496	+
		497	+void
		498	+QPDF::Xref_table::initialize_json()
		499	+{
		500	+ initialized_ = true;
		501	+ table.resize(1);
		502	+ trailer_ = QPDFObjectHandle::newDictionary();
		503	+ trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(1));
		504	+}
		505	+
		506	+void
		507	+QPDF::Xref_table::initialize()
		508	+{
		509	+ // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
		510	+ // 30 characters to leave room for the startxref stuff.
		511	+ file->seek(0, SEEK_END);
		512	+ qpdf_offset_t end_offset = file->tell();
		513	+ // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
		514	+ // scenarios at least 3 bytes are required.
		515	+ if (max_id_ > end_offset / 3) {
		516	+ max_id_ = static_cast<int>(end_offset / 3);
		517	+ }
		518	+ qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
		519	+ PatternFinder sf(qpdf, &QPDF::findStartxref);
		520	+ qpdf_offset_t xref_offset = 0;
		521	+ if (file->findLast("startxref", start_offset, 0, sf)) {
		522	+ xref_offset = QUtil::string_to_ll(read_token().getValue().c_str());
		523	+ }
		524	+
		525	+ try {
		526	+ if (xref_offset == 0) {
		527	+ QTC::TC("qpdf", "QPDF can't find startxref");
		528	+ throw damaged_pdf("can't find startxref");
		529	+ }
		530	+ try {
		531	+ read(xref_offset);
		532	+ } catch (QPDFExc&) {
		533	+ throw;
		534	+ } catch (std::exception& e) {
		535	+ throw damaged_pdf(std::string("error reading xref: ") + e.what());
		536	+ }
		537	+ } catch (QPDFExc& e) {
		538	+ if (attempt_recovery_) {
		539	+ reconstruct(e);
		540	+ QTC::TC("qpdf", "QPDF reconstructed xref table");
		541	+ } else {
		542	+ throw;
		543	+ }
		544	+ }
		545	+
		546	+ initialized_ = true;
		547	+}
		548	+
		549	+void
		550	+QPDF::Xref_table::reconstruct(QPDFExc& e)
		551	+{
		552	+ if (reconstructed_) {
		553	+ // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
		554	+ // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
		555	+ throw e;
		556	+ }
		557	+
		558	+ // If recovery generates more than 1000 warnings, the file is so severely damaged that there
		559	+ // probably is no point trying to continue.
		560	+ const auto max_warnings = qpdf.m->warnings.size() + 1000U;
		561	+ auto check_warnings = [this, max_warnings]() {
		562	+ if (qpdf.m->warnings.size() > max_warnings) {
		563	+ throw damaged_pdf("too many errors while reconstructing cross-reference table");
		564	+ }
		565	+ };
		566	+
		567	+ reconstructed_ = true;
		568	+ // We may find more objects, which may contain dangling references.
		569	+ qpdf.m->fixed_dangling_refs = false;
		570	+
		571	+ warn_damaged("file is damaged");
		572	+ qpdf.warn(e);
		573	+ warn_damaged("Attempting to reconstruct cross-reference table");
		574	+
		575	+ // Delete all references to type 1 (uncompressed) objects
		576	+ for (auto& iter: table) {
		577	+ if (iter.type() == 1) {
		578	+ iter = {};
		579	+ }
		580	+ }
		581	+
		582	+ std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
		583	+ std::vector<qpdf_offset_t> trailers;
		584	+ int max_found = 0;
		585	+
		586	+ file->seek(0, SEEK_END);
		587	+ qpdf_offset_t eof = file->tell();
		588	+ file->seek(0, SEEK_SET);
		589	+ // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
		590	+ static size_t const MAX_LEN = 10;
		591	+ while (file->tell() < eof) {
		592	+ QPDFTokenizer::Token t1 = read_token(MAX_LEN);
		593	+ qpdf_offset_t token_start = file->tell() - toO(t1.getValue().length());
		594	+ if (t1.isInteger()) {
		595	+ auto pos = file->tell();
		596	+ QPDFTokenizer::Token t2 = read_token(MAX_LEN);
		597	+ if (t2.isInteger() && read_token(MAX_LEN).isWord("obj")) {
		598	+ int obj = QUtil::string_to_int(t1.getValue().c_str());
		599	+ int gen = QUtil::string_to_int(t2.getValue().c_str());
		600	+ if (obj <= max_id_) {
		601	+ objects.emplace_back(obj, gen, token_start);
		602	+ if (obj > max_found) {
		603	+ max_found = obj;
		604	+ }
		605	+ } else {
		606	+ warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
		607	+ }
		608	+ }
		609	+ file->seek(pos, SEEK_SET);
		610	+ } else if (!trailer_ && t1.isWord("trailer")) {
		611	+ trailers.emplace_back(file->tell());
		612	+ }
		613	+ file->findAndSkipNextEOL();
		614	+ }
		615	+
		616	+ table.resize(toS(max_found) + 1);
		617	+
		618	+ for (auto tr: trailers) {
		619	+ file->seek(tr, SEEK_SET);
		620	+ auto t = read_trailer();
		621	+ if (!t.isDictionary()) {
		622	+ // Oh well. It was worth a try.
		623	+ } else {
		624	+ trailer_ = t;
		625	+ break;
		626	+ }
		627	+ check_warnings();
		628	+ }
		629	+
		630	+ auto rend = objects.rend();
		631	+ for (auto it = objects.rbegin(); it != rend; it++) {
		632	+ auto [obj, gen, token_start] = *it;
		633	+ insert(obj, 1, token_start, gen);
		634	+ check_warnings();
		635	+ }
		636	+
		637	+ if (!trailer_) {
		638	+ qpdf_offset_t max_offset{0};
		639	+ // If there are any xref streams, take the last one to appear.
		640	+ int i = -1;
		641	+ for (auto const& item: table) {
		642	+ ++i;
		643	+ if (item.type() != 1) {
		644	+ continue;
		645	+ }
		646	+ auto oh = qpdf.getObject(i, item.gen());
		647	+ try {
		648	+ if (!oh.isStreamOfType("/XRef")) {
		649	+ continue;
		650	+ }
		651	+ } catch (std::exception&) {
		652	+ continue;
		653	+ }
		654	+ auto offset = item.offset();
		655	+ if (offset > max_offset) {
		656	+ max_offset = offset;
		657	+ trailer_ = oh.getDict();
		658	+ }
		659	+ check_warnings();
		660	+ }
		661	+ if (max_offset > 0) {
		662	+ try {
		663	+ read(max_offset);
		664	+ } catch (std::exception&) {
		665	+ throw damaged_pdf(
		666	+ "error decoding candidate xref stream while recovering damaged file");
		667	+ }
		668	+ QTC::TC("qpdf", "QPDF recover xref stream");
		669	+ }
		670	+ }
		671	+
		672	+ if (!trailer_) {
		673	+ // We could check the last encountered object to see if it was an xref stream. If so, we
		674	+ // could try to get the trailer from there. This may make it possible to recover files with
		675	+ // bad startxref pointers even when they have object streams.
		676	+
		677	+ throw damaged_pdf("unable to find trailer dictionary while recovering damaged file");
		678	+ }
		679	+ if (table.empty()) {
		680	+ // We cannot check for an empty xref table in parse because empty tables are valid when
		681	+ // creating QPDF objects from JSON.
		682	+ throw damaged_pdf("unable to find objects while recovering damaged file");
		683	+ }
		684	+ check_warnings();
		685	+ if (!initialized_) {
		686	+ initialized_ = true;
		687	+ qpdf.getAllPages();
		688	+ check_warnings();
		689	+ if (qpdf.m->all_pages.empty()) {
		690	+ initialized_ = false;
		691	+ throw damaged_pdf("unable to find any pages while recovering damaged file");
		692	+ }
		693	+ }
		694	+ // We could iterate through the objects looking for streams and try to find objects inside of
		695	+ // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
		696	+ // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
		697	+ // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
		698	+ // It's safe to call it more than once.
		699	+}
		700	+
		701	+void
		702	+QPDF::Xref_table::read(qpdf_offset_t xref_offset)
		703	+{
		704	+ std::map<int, int> free_table;
		705	+ std::set<qpdf_offset_t> visited;
		706	+ while (xref_offset) {
		707	+ visited.insert(xref_offset);
		708	+ char buf[7];
		709	+ memset(buf, 0, sizeof(buf));
		710	+ file->seek(xref_offset, SEEK_SET);
		711	+ // Some files miss the mark a little with startxref. We could do a better job of searching
		712	+ // in the neighborhood for something that looks like either an xref table or stream, but the
		713	+ // simple heuristic of skipping whitespace can help with the xref table case and is harmless
		714	+ // with the stream case.
		715	+ bool done = false;
		716	+ bool skipped_space = false;
		717	+ while (!done) {
		718	+ char ch;
		719	+ if (1 == file->read(&ch, 1)) {
		720	+ if (QUtil::is_space(ch)) {
		721	+ skipped_space = true;
		722	+ } else {
		723	+ file->unreadCh(ch);
		724	+ done = true;
		725	+ }
		726	+ } else {
		727	+ QTC::TC("qpdf", "QPDF eof skipping spaces before xref", skipped_space ? 0 : 1);
		728	+ done = true;
		729	+ }
		730	+ }
		731	+
		732	+ file->read(buf, sizeof(buf) - 1);
		733	+ // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
		734	+ // where it is terminated by arbitrary whitespace.
		735	+ if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
		736	+ if (skipped_space) {
		737	+ QTC::TC("qpdf", "QPDF xref skipped space");
		738	+ warn_damaged("extraneous whitespace seen before xref");
		739	+ }
		740	+ QTC::TC(
		741	+ "qpdf",
		742	+ "QPDF xref space",
		743	+ ((buf[4] == '\n') ? 0
		744	+ : (buf[4] == '\r') ? 1
		745	+ : (buf[4] == ' ') ? 2
		746	+ : 9999));
		747	+ int skip = 4;
		748	+ // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.
		749	+ while (QUtil::is_space(buf[skip])) {
		750	+ ++skip;
		751	+ }
		752	+ xref_offset = process_section(xref_offset + skip);
		753	+ } else {
		754	+ xref_offset = read_stream(xref_offset);
		755	+ }
		756	+ if (visited.count(xref_offset) != 0) {
		757	+ QTC::TC("qpdf", "QPDF xref loop");
		758	+ throw damaged_pdf("loop detected following xref tables");
		759	+ }
		760	+ }
		761	+
		762	+ if (!trailer_) {
		763	+ throw damaged_pdf("unable to find trailer while reading xref");
		764	+ }
		765	+ int size = trailer_.getKey("/Size").getIntValueAsInt();
		766	+
		767	+ if (size < 3) {
		768	+ throw damaged_pdf("too few objects - file can't have a page tree");
		769	+ }
		770	+
		771	+ // We are no longer reporting what the highest id in the xref table is. I don't think it adds
		772	+ // anything. If we want to report more detail, we should report the total number of missing
		773	+ // entries, including missing entries before the last actual entry.
		774	+}
		775	+
		776	+QPDF::Xref_table::Subsection
		777	+QPDF::Xref_table::subsection(std::string const& line)
		778	+{
		779	+ auto terminate = [this]() -> void {
		780	+ QTC::TC("qpdf", "QPDF invalid xref");
		781	+ throw damaged_table("xref syntax invalid");
		782	+ };
		783	+
		784	+ // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
		785	+ // buffer.
		786	+ char const* p = line.c_str();
		787	+ char const* start = line.c_str();
		788	+
		789	+ // Skip zero or more spaces
		790	+ while (QUtil::is_space(*p)) {
		791	+ ++p;
		792	+ }
		793	+ // Require digit
		794	+ if (!QUtil::is_digit(*p)) {
		795	+ terminate();
		796	+ }
		797	+ // Gather digits
		798	+ std::string obj_str;
		799	+ while (QUtil::is_digit(*p)) {
		800	+ obj_str.append(1, *p++);
		801	+ }
		802	+ // Require space
		803	+ if (!QUtil::is_space(*p)) {
		804	+ terminate();
		805	+ }
		806	+ // Skip spaces
		807	+ while (QUtil::is_space(*p)) {
		808	+ ++p;
		809	+ }
		810	+ // Require digit
		811	+ if (!QUtil::is_digit(*p)) {
		812	+ terminate();
		813	+ }
		814	+ // Gather digits
		815	+ std::string num_str;
		816	+ while (QUtil::is_digit(*p)) {
		817	+ num_str.append(1, *p++);
		818	+ }
		819	+ // Skip any space including line terminators
		820	+ while (QUtil::is_space(*p)) {
		821	+ ++p;
		822	+ }
		823	+ auto obj = QUtil::string_to_int(obj_str.c_str());
		824	+ auto count = QUtil::string_to_int(num_str.c_str());
		825	+ if (obj > max_id() \|\| count > max_id() \|\| (obj + count) > max_id()) {
		826	+ throw damaged_table("xref table subsection header contains impossibly large entry");
		827	+ }
		828	+ return {obj, count, file->getLastOffset() + toI(p - start)};
		829	+}
		830	+
		831	+std::vector<QPDF::Xref_table::Subsection>
		832	+QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
		833	+{
		834	+ std::vector<QPDF::Xref_table::Subsection> result;
		835	+ file->seek(start, SEEK_SET);
		836	+
		837	+ while (true) {
		838	+ line.assign(50, '\0');
		839	+ file->read(line.data(), line.size());
		840	+ auto [obj, num, offset] = result.emplace_back(subsection(line));
		841	+ file->seek(offset, SEEK_SET);
		842	+ for (qpdf_offset_t i = obj; i - num < obj; ++i) {
		843	+ if (!std::get<0>(read_entry())) {
		844	+ QTC::TC("qpdf", "QPDF invalid xref entry");
		845	+ throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");
		846	+ }
		847	+ }
		848	+ qpdf_offset_t pos = file->tell();
		849	+ if (read_token().isWord("trailer")) {
		850	+ return result;
		851	+ } else {
		852	+ file->seek(pos, SEEK_SET);
		853	+ }
		854	+ }
		855	+}
		856	+
		857	+// Optimistically read and parse all subsection headers. If an error is encountered return the
		858	+// result of bad_subsections.
		859	+std::vector<QPDF::Xref_table::Subsection>
		860	+QPDF::Xref_table::subsections(std::string& line)
		861	+{
		862	+ auto recovery_offset = file->tell();
		863	+ try {
		864	+ std::vector<QPDF::Xref_table::Subsection> result;
		865	+
		866	+ while (true) {
		867	+ line.assign(50, '\0');
		868	+ file->read(line.data(), line.size());
		869	+ auto& sub = result.emplace_back(subsection(line));
		870	+ auto count = std::get<1>(sub);
		871	+ auto offset = std::get<2>(sub);
		872	+ file->seek(offset + 20 * toO(count) - 1, SEEK_SET);
		873	+ file->read(line.data(), 1);
		874	+ if (!(line[0] == '\n' \|\| line[0] == '\n')) {
		875	+ return bad_subsections(line, recovery_offset);
		876	+ }
		877	+ qpdf_offset_t pos = file->tell();
		878	+ if (read_token().isWord("trailer")) {
		879	+ return result;
		880	+ } else {
		881	+ file->seek(pos, SEEK_SET);
		882	+ }
		883	+ }
		884	+ } catch (...) {
		885	+ return bad_subsections(line, recovery_offset);
		886	+ }
		887	+}
		888	+
		889	+// Returns (success, f1, f2, type).
		890	+std::tuple<bool, qpdf_offset_t, int, char>
		891	+QPDF::Xref_table::read_bad_entry()
		892	+{
		893	+ qpdf_offset_t f1{0};
		894	+ int f2{0};
		895	+ char type{'\0'};
		896	+ // Reposition after initial read attempt and reread.
		897	+ file->seek(file->getLastOffset(), SEEK_SET);
		898	+ auto line = file->readLine(30);
		899	+
		900	+ // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
		901	+ // buffer.
		902	+ char const* p = line.data();
		903	+
		904	+ // Skip zero or more spaces. There aren't supposed to be any.
		905	+ bool invalid = false;
		906	+ while (QUtil::is_space(*p)) {
		907	+ ++p;
		908	+ QTC::TC("qpdf", "QPDF ignore first space in xref entry");
		909	+ invalid = true;
		910	+ }
		911	+ // Require digit
		912	+ if (!QUtil::is_digit(*p)) {
		913	+ return {false, 0, 0, '\0'};
		914	+ }
		915	+ // Gather digits
		916	+ std::string f1_str;
		917	+ while (QUtil::is_digit(*p)) {
		918	+ f1_str.append(1, *p++);
		919	+ }
		920	+ // Require space
		921	+ if (!QUtil::is_space(*p)) {
		922	+ return {false, 0, 0, '\0'};
		923	+ }
		924	+ if (QUtil::is_space(*(p + 1))) {
		925	+ QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");
		926	+ invalid = true;
		927	+ }
		928	+ // Skip spaces
		929	+ while (QUtil::is_space(*p)) {
		930	+ ++p;
		931	+ }
		932	+ // Require digit
		933	+ if (!QUtil::is_digit(*p)) {
		934	+ return {false, 0, 0, '\0'};
		935	+ }
		936	+ // Gather digits
		937	+ std::string f2_str;
		938	+ while (QUtil::is_digit(*p)) {
		939	+ f2_str.append(1, *p++);
		940	+ }
		941	+ // Require space
		942	+ if (!QUtil::is_space(*p)) {
		943	+ return {false, 0, 0, '\0'};
		944	+ }
		945	+ if (QUtil::is_space(*(p + 1))) {
		946	+ QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");
		947	+ invalid = true;
		948	+ }
		949	+ // Skip spaces
		950	+ while (QUtil::is_space(*p)) {
		951	+ ++p;
		952	+ }
		953	+ if ((p == 'f') \|\| (p == 'n')) {
		954	+ type = *p;
		955	+ } else {
		956	+ return {false, 0, 0, '\0'};
		957	+ }
		958	+ if ((f1_str.length() != 10) \|\| (f2_str.length() != 5)) {
		959	+ QTC::TC("qpdf", "QPDF ignore length error xref entry");
		960	+ invalid = true;
		961	+ }
		962	+
		963	+ if (invalid) {
		964	+ qpdf.warn(damaged_table("accepting invalid xref table entry"));
		965	+ }
		966	+
		967	+ f1 = QUtil::string_to_ll(f1_str.c_str());
		968	+ f2 = QUtil::string_to_int(f2_str.c_str());
		969	+
		970	+ return {true, f1, f2, type};
		971	+}
		972	+
		973	+// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
		974	+// result. Returns (success, f1, f2, type).
		975	+std::tuple<bool, qpdf_offset_t, int, char>
		976	+QPDF::Xref_table::read_entry()
		977	+{
		978	+ qpdf_offset_t f1{0};
		979	+ int f2{0};
		980	+ char type{'\0'};
		981	+ std::array<char, 21> line;
		982	+ f1 = 0;
		983	+ f2 = 0;
		984	+ if (file->read(line.data(), 20) != 20) {
		985	+ // C++20: [[unlikely]]
		986	+ return {false, 0, 0, '\0'};
		987	+ }
		988	+ line[20] = '\0';
		989	+ char const* p = line.data();
		990	+
		991	+ int f1_len = 0;
		992	+ int f2_len = 0;
		993	+
		994	+ // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
		995	+ // buffer.
		996	+
		997	+ // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
		998	+ while (*p == '0') {
		999	+ ++f1_len;
		1000	+ ++p;
		1001	+ }
		1002	+ while (QUtil::is_digit(*p) && f1_len++ < 10) {
		1003	+ f1 *= 10;
		1004	+ f1 += *p++ - '0';
		1005	+ }
		1006	+ // Require space
		1007	+ if (!QUtil::is_space(*p++)) {
		1008	+ // Entry doesn't start with space or digit.
		1009	+ // C++20: [[unlikely]]
		1010	+ return {false, 0, 0, '\0'};
		1011	+ }
		1012	+ // Gather digits. NB No risk of overflow as 99'999 < max int.
		1013	+ while (*p == '0') {
		1014	+ ++f2_len;
		1015	+ ++p;
		1016	+ }
		1017	+ while (QUtil::is_digit(*p) && f2_len++ < 5) {
		1018	+ f2 *= 10;
		1019	+ f2 += static_cast<int>(*p++ - '0');
		1020	+ }
		1021	+ if (QUtil::is_space(p++) && (p == 'f' \|\| *p == 'n')) {
		1022	+ // C++20: [[likely]]
		1023	+ type = *p;
		1024	+ // No test for valid line[19].
		1025	+ if ((++p) && (++p) && (p == '\n' \|\| p == '\r') && f1_len == 10 && f2_len == 5) {
		1026	+ // C++20: [[likely]]
		1027	+ return {true, f1, f2, type};
		1028	+ }
		1029	+ }
		1030	+ return read_bad_entry();
		1031	+}
		1032	+
		1033	+// Read a single cross-reference table section and associated trailer.
		1034	+qpdf_offset_t
		1035	+QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
		1036	+{
		1037	+ file->seek(xref_offset, SEEK_SET);
		1038	+ std::string line;
		1039	+ auto subs = subsections(line);
		1040	+
		1041	+ auto cur_trailer_offset = file->tell();
		1042	+ auto cur_trailer = read_trailer();
		1043	+ if (!cur_trailer.isDictionary()) {
		1044	+ QTC::TC("qpdf", "QPDF missing trailer");
		1045	+ throw qpdf.damagedPDF("", "expected trailer dictionary");
		1046	+ }
		1047	+
		1048	+ if (!trailer_) {
		1049	+ unsigned int sz;
		1050	+ trailer_ = cur_trailer;
		1051	+
		1052	+ if (!trailer_.hasKey("/Size")) {
		1053	+ QTC::TC("qpdf", "QPDF trailer lacks size");
		1054	+ throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");
		1055	+ }
		1056	+ if (!trailer_.getKey("/Size").getValueAsUInt(sz)) {
		1057	+ QTC::TC("qpdf", "QPDF trailer size not integer");
		1058	+ throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
		1059	+ }
		1060	+ if (sz >= static_cast<unsigned int>(max_id_)) {
		1061	+ QTC::TC("qpdf", "QPDF trailer size impossibly large");
		1062	+ throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is impossibly large");
		1063	+ }
		1064	+ table.resize(sz);
		1065	+ }
		1066	+
		1067	+ for (auto [obj, num, offset]: subs) {
		1068	+ file->seek(offset, SEEK_SET);
		1069	+ for (qpdf_offset_t i = obj; i - num < obj; ++i) {
		1070	+ if (i == 0) {
		1071	+ // This is needed by checkLinearization()
		1072	+ first_item_offset_ = file->tell();
		1073	+ }
		1074	+ // For xref_table, these will always be small enough to be ints
		1075	+ auto [success, f1, f2, type] = read_entry();
		1076	+ if (!success) {
		1077	+ throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");
		1078	+ }
		1079	+ if (type == 'f') {
		1080	+ insert_free(QPDFObjGen(toI(i), f2));
		1081	+ } else {
		1082	+ insert(toI(i), 1, f1, f2);
		1083	+ }
		1084	+ }
		1085	+ qpdf_offset_t pos = file->tell();
		1086	+ if (read_token().isWord("trailer")) {
		1087	+ break;
		1088	+ } else {
		1089	+ file->seek(pos, SEEK_SET);
		1090	+ }
		1091	+ }
		1092	+
		1093	+ if (cur_trailer.hasKey("/XRefStm")) {
		1094	+ if (ignore_streams_) {
		1095	+ QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
		1096	+ } else {
		1097	+ if (cur_trailer.getKey("/XRefStm").isInteger()) {
		1098	+ // Read the xref stream but disregard any return value -- we'll use our trailer's
		1099	+ // /Prev key instead of the xref stream's.
		1100	+ (void)read_stream(cur_trailer.getKey("/XRefStm").getIntValue());
		1101	+ } else {
		1102	+ throw qpdf.damagedPDF("xref stream", cur_trailer_offset, "invalid /XRefStm");
		1103	+ }
		1104	+ }
		1105	+ }
		1106	+
		1107	+ if (cur_trailer.hasKey("/Prev")) {
		1108	+ if (!cur_trailer.getKey("/Prev").isInteger()) {
		1109	+ QTC::TC("qpdf", "QPDF trailer prev not integer");
		1110	+ throw qpdf.damagedPDF(
		1111	+ "trailer", cur_trailer_offset, "/Prev key in trailer dictionary is not an integer");
		1112	+ }
		1113	+ QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
		1114	+ return cur_trailer.getKey("/Prev").getIntValue();
		1115	+ }
		1116	+
		1117	+ return 0;
		1118	+}
		1119	+
		1120	+// Read a single cross-reference stream.
		1121	+qpdf_offset_t
		1122	+QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
		1123	+{
		1124	+ if (!ignore_streams_) {
		1125	+ QPDFObjGen x_og;
		1126	+ QPDFObjectHandle xref_obj;
		1127	+ try {
		1128	+ xref_obj = qpdf.readObjectAtOffset(
		1129	+ false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
		1130	+ } catch (QPDFExc&) {
		1131	+ // ignore -- report error below
		1132	+ }
		1133	+ if (xref_obj.isStreamOfType("/XRef")) {
		1134	+ QTC::TC("qpdf", "QPDF found xref stream");
		1135	+ return process_stream(xref_offset, xref_obj);
		1136	+ }
		1137	+ }
		1138	+
		1139	+ QTC::TC("qpdf", "QPDF can't find xref");
		1140	+ throw qpdf.damagedPDF("", xref_offset, "xref not found");
		1141	+ return 0; // unreachable
		1142	+}
		1143	+
		1144	+// Return the entry size of the xref stream and the processed W array.
		1145	+std::pair<int, std::array<int, 3>>
		1146	+QPDF::Xref_table::process_W(
		1147	+ QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
		1148	+{
		1149	+ auto W_obj = dict.getKey("/W");
		1150	+ if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&
		1151	+ W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
		1152	+ throw damaged("Cross-reference stream does not have a proper /W key");
		1153	+ }
		1154	+
		1155	+ std::array<int, 3> W;
		1156	+ int entry_size = 0;
		1157	+ auto w_vector = W_obj.getArrayAsVector();
		1158	+ int max_bytes = sizeof(qpdf_offset_t);
		1159	+ for (size_t i = 0; i < 3; ++i) {
		1160	+ W[i] = w_vector[i].getIntValueAsInt();
		1161	+ if (W[i] > max_bytes) {
		1162	+ throw damaged("Cross-reference stream's /W contains impossibly large values");
		1163	+ }
		1164	+ if (W[i] < 0) {
		1165	+ throw damaged("Cross-reference stream's /W contains negative values");
		1166	+ }
		1167	+ entry_size += W[i];
		1168	+ }
		1169	+ if (entry_size == 0) {
		1170	+ throw damaged("Cross-reference stream's /W indicates entry size of 0");
		1171	+ }
		1172	+ return {entry_size, W};
		1173	+}
		1174	+
		1175	+// Validate Size entry and return the maximum number of entries that the xref stream can contain and
		1176	+// the value of the Size entry.
		1177	+std::pair<int, size_t>
		1178	+QPDF::Xref_table::process_Size(
		1179	+ QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
		1180	+{
		1181	+ // Number of entries is limited by the highest possible object id and stream size.
		1182	+ auto max_num_entries = std::numeric_limits<int>::max();
		1183	+ if (max_num_entries > (std::numeric_limits<qpdf_offset_t>::max() / entry_size)) {
		1184	+ max_num_entries = toI(std::numeric_limits<qpdf_offset_t>::max() / entry_size);
		1185	+ }
		1186	+
		1187	+ auto Size_obj = dict.getKey("/Size");
		1188	+ long long size;
		1189	+ if (!dict.getKey("/Size").getValueAsInt(size)) {
		1190	+ throw damaged("Cross-reference stream does not have a proper /Size key");
		1191	+ } else if (size < 0) {
		1192	+ throw damaged("Cross-reference stream has a negative /Size key");
		1193	+ } else if (size >= max_num_entries) {
		1194	+ throw damaged("Cross-reference stream has an impossibly large /Size key");
		1195	+ }
		1196	+ // We are not validating that Size <= (Size key of parent xref / trailer).
		1197	+ return {max_num_entries, toS(size)};
		1198	+}
		1199	+
		1200	+// Return the number of entries of the xref stream and the processed Index array.
		1201	+std::pair<int, std::vector<std::pair<int, int>>>
		1202	+QPDF::Xref_table::process_Index(
		1203	+ QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
		1204	+{
		1205	+ auto size = dict.getKey("/Size").getIntValueAsInt();
		1206	+ auto Index_obj = dict.getKey("/Index");
		1207	+
		1208	+ if (Index_obj.isArray()) {
		1209	+ std::vector<std::pair<int, int>> indx;
		1210	+ int num_entries = 0;
		1211	+ auto index_vec = Index_obj.getArrayAsVector();
		1212	+ if ((index_vec.size() % 2) \|\| index_vec.size() < 2) {
		1213	+ throw damaged("Cross-reference stream's /Index has an invalid number of values");
		1214	+ }
		1215	+
		1216	+ int i = 0;
		1217	+ long long first = 0;
		1218	+ for (auto& val: index_vec) {
		1219	+ if (val.isInteger()) {
		1220	+ if (i % 2) {
		1221	+ auto count = val.getIntValue();
		1222	+ if (count <= 0) {
		1223	+ throw damaged(
		1224	+ "Cross-reference stream section claims to contain " +
		1225	+ std::to_string(count) + " entries");
		1226	+ }
		1227	+ // We are guarding against the possibility of num_entries * entry_size
		1228	+ // overflowing. We are not checking that entries are in ascending order as
		1229	+ // required by the spec, which probably should generate a warning. We are also
		1230	+ // not checking that for each subsection first object number + number of entries
		1231	+ // <= /Size. The spec requires us to ignore object number > /Size.
		1232	+ if (first > (max_num_entries - count) \|\|
		1233	+ count > (max_num_entries - num_entries)) {
		1234	+ throw damaged(
		1235	+ "Cross-reference stream claims to contain too many entries: " +
		1236	+ std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
		1237	+ std::to_string(num_entries));
		1238	+ }
		1239	+ indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
		1240	+ num_entries += static_cast<int>(count);
		1241	+ } else {
		1242	+ first = val.getIntValue();
		1243	+ if (first < 0) {
		1244	+ throw damaged(
		1245	+ "Cross-reference stream's /Index contains a negative object id");
		1246	+ } else if (first > max_num_entries) {
		1247	+ throw damaged("Cross-reference stream's /Index contains an impossibly "
		1248	+ "large object id");
		1249	+ }
		1250	+ }
		1251	+ } else {
		1252	+ throw damaged(
		1253	+ "Cross-reference stream's /Index's item " + std::to_string(i) +
		1254	+ " is not an integer");
		1255	+ }
		1256	+ i++;
		1257	+ }
		1258	+ QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
		1259	+ return {num_entries, indx};
		1260	+ } else if (Index_obj.isNull()) {
		1261	+ QTC::TC("qpdf", "QPDF xref /Index is null");
		1262	+ return {size, {{0, size}}};
		1263	+ } else {
		1264	+ throw damaged("Cross-reference stream does not have a proper /Index key");
		1265	+ }
		1266	+}
		1267	+
		1268	+qpdf_offset_t
		1269	+QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
		1270	+{
		1271	+ auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
		1272	+ return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
		1273	+ };
		1274	+
		1275	+ auto dict = xref_obj.getDict();
		1276	+
		1277	+ auto [entry_size, W] = process_W(dict, damaged);
		1278	+ auto [max_num_entries, size] = process_Size(dict, entry_size, damaged);
		1279	+ auto [num_entries, indx] = process_Index(dict, max_num_entries, damaged);
		1280	+
		1281	+ std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
		1282	+ size_t actual_size = bp->getSize();
		1283	+ auto expected_size = toS(entry_size) * toS(num_entries);
		1284	+
		1285	+ if (expected_size != actual_size) {
		1286	+ QPDFExc x = damaged(
		1287	+ "Cross-reference stream data has the wrong size; expected = " +
		1288	+ std::to_string(expected_size) + "; actual = " + std::to_string(actual_size));
		1289	+ if (expected_size > actual_size) {
		1290	+ throw x;
		1291	+ } else {
		1292	+ qpdf.warn(x);
		1293	+ }
		1294	+ }
		1295	+
		1296	+ if (!trailer_) {
		1297	+ trailer_ = dict;
		1298	+ if (size > toS(max_id_)) {
		1299	+ throw damaged("Cross-reference stream /Size entry is impossibly large");
		1300	+ }
		1301	+ table.resize(size);
		1302	+ }
		1303	+
		1304	+ bool saw_first_compressed_object = false;
		1305	+
		1306	+ // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
		1307	+ // We know that entry_size * num_entries is less or equal to the size of the buffer.
		1308	+ auto p = bp->getBuffer();
		1309	+ for (auto [obj, sec_entries]: indx) {
		1310	+ // Process a subsection.
		1311	+ for (int i = 0; i < sec_entries; ++i) {
		1312	+ // Read this entry
		1313	+ std::array<qpdf_offset_t, 3> fields{};
		1314	+ if (W[0] == 0) {
		1315	+ QTC::TC("qpdf", "QPDF default for xref stream field 0");
		1316	+ fields[0] = 1;
		1317	+ }
		1318	+ for (size_t j = 0; j < 3; ++j) {
		1319	+ for (int k = 0; k < W[j]; ++k) {
		1320	+ fields[j] <<= 8;
		1321	+ fields[j] \|= *p++;
		1322	+ }
		1323	+ }
		1324	+
		1325	+ // Get the generation number. The generation number is 0 unless this is an uncompressed
		1326	+ // object record, in which case the generation number appears as the third field.
		1327	+ if (saw_first_compressed_object) {
		1328	+ if (fields[0] != 2) {
		1329	+ uncompressed_after_compressed_ = true;
		1330	+ }
		1331	+ } else if (fields[0] == 2) {
		1332	+ saw_first_compressed_object = true;
		1333	+ }
		1334	+ if (obj == 0) {
		1335	+ // This is needed by checkLinearization()
		1336	+ first_item_offset_ = xref_offset;
		1337	+ } else if (fields[0] == 0) {
		1338	+ // Ignore fields[2], which we don't care about in this case. This works around the
		1339	+ // issue of some PDF files that put invalid values, like -1, here for deleted
		1340	+ // objects.
		1341	+ insert_free(QPDFObjGen(obj, 0));
		1342	+ } else {
		1343	+ insert(obj, toI(fields[0]), fields[1], toI(fields[2]));
		1344	+ }
		1345	+ ++obj;
		1346	+ }
		1347	+ }
		1348	+
		1349	+ if (dict.hasKey("/Prev")) {
		1350	+ if (!dict.getKey("/Prev").isInteger()) {
		1351	+ throw qpdf.damagedPDF(
		1352	+ "xref stream", "/Prev key in xref stream dictionary is not an integer");
		1353	+ }
		1354	+ QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
		1355	+ return dict.getKey("/Prev").getIntValue();
		1356	+ } else {
		1357	+ return 0;
		1358	+ }
		1359	+}
		1360	+
		1361	+void
		1362	+QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
		1363	+{
		1364	+ // Populate the xref table in such a way that the first reference to an object that we see,
		1365	+ // which is the one in the latest xref table in which it appears, is the one that gets stored.
		1366	+ // This works because we are reading more recent appends before older ones.
		1367	+
		1368	+ // If there is already an entry for this object and generation in the table, it means that a
		1369	+ // later xref table has registered this object. Disregard this one.
		1370	+
		1371	+ int new_gen = f0 == 2 ? 0 : f2;
		1372	+
		1373	+ if (!(obj > 0 && static_cast<size_t>(obj) < table.size() && 0 <= f2 && new_gen < 65535)) {
		1374	+ // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
		1375	+ // is probably no point having another warning but we could count invalid items in order to
		1376	+ // decide when to give up.
		1377	+ QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
		1378	+ return;
		1379	+ }
		1380	+
		1381	+ auto& entry = table[static_cast<size_t>(obj)];
		1382	+ auto old_type = entry.type();
		1383	+
		1384	+ if (!old_type && entry.gen() > 0) {
		1385	+ // At the moment we are processing the updates last to first and therefore the gen doesn't
		1386	+ // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need
		1387	+ // to be revisited when we want to support incremental updates or more comprhensive
		1388	+ // checking.
		1389	+ QTC::TC("qpdf", "QPDF xref deleted object");
		1390	+ return;
		1391	+ }
		1392	+
		1393	+ if (f0 == 2 && static_cast<int>(f1) == obj) {
		1394	+ qpdf.warn(qpdf.damagedPDF(
		1395	+ "xref stream", "self-referential object stream " + std::to_string(obj)));
		1396	+ return;
		1397	+ }
		1398	+
		1399	+ if (old_type && entry.gen() >= new_gen) {
		1400	+ QTC::TC("qpdf", "QPDF xref reused object");
		1401	+ return;
		1402	+ }
		1403	+
		1404	+ switch (f0) {
		1405	+ case 1:
		1406	+ // f2 is generation
		1407	+ QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0);
		1408	+ entry = {f2, Uncompressed(f1)};
		1409	+ break;
		1410	+
		1411	+ case 2:
		1412	+ entry = {0, Compressed(toI(f1), f2)};
		1413	+ object_streams_ = true;
		1414	+ break;
		1415	+
		1416	+ default:
		1417	+ throw qpdf.damagedPDF(
		1418	+ "xref stream", "unknown xref stream entry type " + std::to_string(f0));
		1419	+ break;
		1420	+ }
		1421	+}
		1422	+
		1423	+void
		1424	+QPDF::Xref_table::insert_free(QPDFObjGen og)
		1425	+{
		1426	+ // At the moment we are processing the updates last to first and therefore the gen doesn't
		1427	+ // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
		1428	+ // revisited when we want to support incremental updates or more comprhensive checking.
		1429	+ if (og.getObj() < 1) {
		1430	+ return;
		1431	+ }
		1432	+ size_t id = static_cast<size_t>(og.getObj());
		1433	+ if (id < table.size() && !type(id)) {
		1434	+ table[id] = {1, {}};
		1435	+ }
		1436	+}
		1437	+
		1438	+QPDFObjGen
		1439	+QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
		1440	+{
		1441	+ int id = 0;
		1442	+ int gen = 0;
		1443	+ qpdf_offset_t start = 0;
		1444	+
		1445	+ int i = 0;
		1446	+ for (auto const& item: table) {
		1447	+ auto o = item.offset();
		1448	+ if (start < o && o <= offset) {
		1449	+ start = o;
		1450	+ id = i;
		1451	+ gen = item.gen();
		1452	+ }
		1453	+ ++i;
		1454	+ }
		1455	+ return QPDFObjGen(id, gen);
		1456	+}
		1457	+
		1458	+std::map<QPDFObjGen, QPDFXRefEntry>
		1459	+QPDF::Xref_table::as_map() const
		1460	+{
		1461	+ std::map<QPDFObjGen, QPDFXRefEntry> result;
		1462	+ int i{0};
		1463	+ for (auto const& item: table) {
		1464	+ switch (item.type()) {
		1465	+ case 0:
		1466	+ break;
		1467	+ case 1:
		1468	+ result.emplace(QPDFObjGen(i, item.gen()), item.offset());
		1469	+ break;
		1470	+ case 2:
		1471	+ result.emplace(
		1472	+ QPDFObjGen(i, 0), QPDFXRefEntry(item.stream_number(), item.stream_index()));
		1473	+ break;
		1474	+ default:
		1475	+ throw std::logic_error("Xref_table: invalid entry type");
		1476	+ }
		1477	+ ++i;
		1478	+ }
		1479	+ return result;
		1480	+}
		1481	+
		1482	+void
		1483	+QPDF::showXRefTable()
		1484	+{
		1485	+ m->xref_table.show();
		1486	+}
		1487	+
		1488	+void
		1489	+QPDF::Xref_table::show()
		1490	+{
		1491	+ auto& cout = *qpdf.m->log->getInfo();
		1492	+ int i = -1;
		1493	+ for (auto const& item: table) {
		1494	+ ++i;
		1495	+ if (item.type()) {
		1496	+ cout << std::to_string(i) << "/" << std::to_string(item.gen()) << ": ";
		1497	+ switch (item.type()) {
		1498	+ case 1:
		1499	+ cout << "uncompressed; offset = " << item.offset() << "\n";
		1500	+ break;
		1501	+
		1502	+ case 2:
		1503	+ cout << "compressed; stream = " << item.stream_number()
		1504	+ << ", index = " << item.stream_index() << "\n";
		1505	+ break;
		1506	+
		1507	+ default:
		1508	+ throw std::logic_error(
		1509	+ "unknown cross-reference table type while showing xref_table");
		1510	+ }
		1511	+ }
		1512	+ }
		1513	+}
		1514	+
		1515	+// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
		1516	+// return false. Otherwise return true.
		1517	+bool
		1518	+QPDF::Xref_table::resolve()
		1519	+{
		1520	+ bool may_change = !reconstructed_;
		1521	+ int i = -1;
		1522	+ for (auto& item: table) {
		1523	+ ++i;
		1524	+ if (item.type()) {
		1525	+ if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) {
		1526	+ qpdf.resolve(QPDFObjGen(i, item.gen()));
		1527	+ if (may_change && reconstructed_) {
		1528	+ return false;
		1529	+ }
		1530	+ }
		1531	+ }
		1532	+ }
		1533	+ return true;
		1534	+}
		1535	+
		1536	+// Ensure all objects in the pdf file, including those in indirect references, appear in the object
		1537	+// cache.
		1538	+void
		1539	+QPDF::fixDanglingReferences(bool force)
		1540	+{
		1541	+ if (m->fixed_dangling_refs) {
		1542	+ return;
		1543	+ }
		1544	+ if (!m->xref_table.resolve()) {
		1545	+ QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
		1546	+ m->xref_table.resolve();
		1547	+ }
		1548	+ m->fixed_dangling_refs = true;
		1549	+}
		1550	+
		1551	+size_t
		1552	+QPDF::getObjectCount()
		1553	+{
		1554	+ // This method returns the next available indirect object number. makeIndirectObject uses it for
		1555	+ // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
		1556	+ // be in obj_cache.
		1557	+ fixDanglingReferences();
		1558	+ QPDFObjGen og;
		1559	+ if (!m->obj_cache.empty()) {
		1560	+ og = (*(m->obj_cache.rbegin())).first;
		1561	+ }
		1562	+ return toS(og.getObj());
		1563	+}
		1564	+
		1565	+std::vector<QPDFObjectHandle>
		1566	+QPDF::getAllObjects()
		1567	+{
		1568	+ // After fixDanglingReferences is called, all objects are in the object cache.
		1569	+ fixDanglingReferences();
		1570	+ std::vector<QPDFObjectHandle> result;
		1571	+ for (auto const& iter: m->obj_cache) {
		1572	+ result.push_back(newIndirect(iter.first, iter.second.object));
		1573	+ }
		1574	+ return result;
		1575	+}
		1576	+
		1577	+void
		1578	+QPDF::setLastObjectDescription(std::string const& description, QPDFObjGen const& og)
		1579	+{
		1580	+ m->last_object_description.clear();
		1581	+ if (!description.empty()) {
		1582	+ m->last_object_description += description;
		1583	+ if (og.isIndirect()) {
		1584	+ m->last_object_description += ": ";
		1585	+ }
		1586	+ }
		1587	+ if (og.isIndirect()) {
		1588	+ m->last_object_description += "object " + og.unparse(' ');
		1589	+ }
		1590	+}
		1591	+
		1592	+QPDFObjectHandle
		1593	+QPDF::Xref_table::read_trailer()
		1594	+{
		1595	+ qpdf_offset_t offset = file->tell();
		1596	+ bool empty = false;
		1597	+ auto object = QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false);
		1598	+ if (empty) {
		1599	+ // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
		1600	+ // actual PDF files and Adobe Reader appears to ignore them.
		1601	+ qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null"));
		1602	+ } else if (object.isDictionary() && read_token().isWord("stream")) {
		1603	+ qpdf.warn(qpdf.damagedPDF("trailer", file->tell(), "stream keyword found in trailer"));
		1604	+ }
		1605	+ // Override last_offset so that it points to the beginning of the object we just read
		1606	+ file->setLastOffset(offset);
		1607	+ return object;
		1608	+}
		1609	+
		1610	+QPDFObjectHandle
		1611	+QPDF::readObject(std::string const& description, QPDFObjGen og)
		1612	+{
		1613	+ setLastObjectDescription(description, og);
		1614	+ qpdf_offset_t offset = m->file->tell();
		1615	+ bool empty = false;
		1616	+
		1617	+ StringDecrypter decrypter{this, og};
		1618	+ StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
		1619	+ auto object =
		1620	+ QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)
		1621	+ .parse(empty, false);
		1622	+ if (empty) {
		1623	+ // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
		1624	+ // actual PDF files and Adobe Reader appears to ignore them.
		1625	+ warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
		1626	+ return object;
		1627	+ }
		1628	+ auto token = readToken(*m->file);
		1629	+ if (object.isDictionary() && token.isWord("stream")) {
		1630	+ readStream(object, og, offset);
		1631	+ token = readToken(*m->file);
		1632	+ }
		1633	+ if (!token.isWord("endobj")) {
		1634	+ QTC::TC("qpdf", "QPDF err expected endobj");
		1635	+ warn(damagedPDF("expected endobj"));
		1636	+ }
		1637	+ return object;
		1638	+}
		1639	+
		1640	+// After reading stream dictionary and stream keyword, read rest of stream.
		1641	+void
		1642	+QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
		1643	+{
		1644	+ validateStreamLineEnd(object, og, offset);
		1645	+
		1646	+ // Must get offset before accessing any additional objects since resolving a previously
		1647	+ // unresolved indirect object will change file position.
		1648	+ qpdf_offset_t stream_offset = m->file->tell();
		1649	+ size_t length = 0;
		1650	+
		1651	+ try {
		1652	+ auto length_obj = object.getKey("/Length");
		1653	+
		1654	+ if (!length_obj.isInteger()) {
		1655	+ if (length_obj.isNull()) {
		1656	+ QTC::TC("qpdf", "QPDF stream without length");
		1657	+ throw damagedPDF(offset, "stream dictionary lacks /Length key");
		1658	+ }
		1659	+ QTC::TC("qpdf", "QPDF stream length not integer");
		1660	+ throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
		1661	+ }
		1662	+
		1663	+ length = toS(length_obj.getUIntValue());
		1664	+ // Seek in two steps to avoid potential integer overflow
		1665	+ m->file->seek(stream_offset, SEEK_SET);
		1666	+ m->file->seek(toO(length), SEEK_CUR);
		1667	+ if (!readToken(*m->file).isWord("endstream")) {
		1668	+ QTC::TC("qpdf", "QPDF missing endstream");
		1669	+ throw damagedPDF("expected endstream");
		1670	+ }
		1671	+ } catch (QPDFExc& e) {
		1672	+ if (m->attempt_recovery) {
		1673	+ warn(e);
		1674	+ length = recoverStreamLength(m->file_sp, og, stream_offset);
		1675	+ } else {
		1676	+ throw;
		1677	+ }
		1678	+ }
		1679	+ object = {QPDF_Stream::create(this, og, object, stream_offset, length)};
		1680	+}
		1681	+
		1682	+void
		1683	+QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
		1684	+{
		1685	+ // The PDF specification states that the word "stream" should be followed by either a carriage
		1686	+ // return and a newline or by a newline alone. It specifically disallowed following it by a
		1687	+ // carriage return alone since, in that case, there would be no way to tell whether the NL in a
		1688	+ // CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
		1689	+ // accept a carriage return by itself when followed by a non-newline character, so that's what
		1690	+ // we do here. We have also seen files that have extraneous whitespace between the stream
		1691	+ // keyword and the newline.
		1692	+ while (true) {
		1693	+ char ch;
		1694	+ if (m->file->read(&ch, 1) == 0) {
		1695	+ // A premature EOF here will result in some other problem that will get reported at
		1696	+ // another time.
		1697	+ return;
		1698	+ }
		1699	+ if (ch == '\n') {
		1700	+ // ready to read stream data
		1701	+ QTC::TC("qpdf", "QPDF stream with NL only");
		1702	+ return;
		1703	+ }
		1704	+ if (ch == '\r') {
		1705	+ // Read another character
		1706	+ if (m->file->read(&ch, 1) != 0) {
		1707	+ if (ch == '\n') {
		1708	+ // Ready to read stream data
		1709	+ QTC::TC("qpdf", "QPDF stream with CRNL");
		1710	+ } else {
		1711	+ // Treat the \r by itself as the whitespace after endstream and start reading
		1712	+ // stream data in spite of not having seen a newline.
		1713	+ QTC::TC("qpdf", "QPDF stream with CR only");
		1714	+ m->file->unreadCh(ch);
		1715	+ warn(damagedPDF(
		1716	+ m->file->tell(), "stream keyword followed by carriage return only"));
		1717	+ }
		1718	+ }
		1719	+ return;
		1720	+ }
		1721	+ if (!QUtil::is_space(ch)) {
		1722	+ QTC::TC("qpdf", "QPDF stream without newline");
		1723	+ m->file->unreadCh(ch);
		1724	+ warn(damagedPDF(
		1725	+ m->file->tell(), "stream keyword not followed by proper line terminator"));
		1726	+ return;
		1727	+ }
		1728	+ warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
		1729	+ }
		1730	+}
		1731	+
		1732	+QPDFObjectHandle
		1733	+QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
		1734	+{
		1735	+ m->last_object_description.erase(7); // last_object_description starts with "object "
		1736	+ m->last_object_description += std::to_string(obj);
		1737	+ m->last_object_description += " 0";
		1738	+
		1739	+ bool empty = false;
		1740	+ auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true)
		1741	+ .parse(empty, false);
		1742	+ if (empty) {
		1743	+ // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
		1744	+ // actual PDF files and Adobe Reader appears to ignore them.
		1745	+ warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null"));
		1746	+ }
		1747	+ return object;
		1748	+}
		1749	+
		1750	+bool
		1751	+QPDF::findEndstream()
		1752	+{
		1753	+ // Find endstream or endobj. Position the input at that token.
		1754	+ auto t = readToken(*m->file, 20);
		1755	+ if (t.isWord("endobj") \|\| t.isWord("endstream")) {
		1756	+ m->file->seek(m->file->getLastOffset(), SEEK_SET);
		1757	+ return true;
		1758	+ }
		1759	+ return false;
		1760	+}
		1761	+
		1762	+size_t
		1763	+QPDF::recoverStreamLength(
		1764	+ std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
		1765	+{
		1766	+ // Try to reconstruct stream length by looking for endstream or endobj
		1767	+ warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));
		1768	+
		1769	+ PatternFinder ef(*this, &QPDF::findEndstream);
		1770	+ size_t length = 0;
		1771	+ if (m->file->findFirst("end", stream_offset, 0, ef)) {
		1772	+ length = toS(m->file->tell() - stream_offset);
		1773	+ // Reread endstream but, if it was endobj, don't skip that.
		1774	+ QPDFTokenizer::Token t = readToken(*m->file);
		1775	+ if (t.getValue() == "endobj") {
		1776	+ m->file->seek(m->file->getLastOffset(), SEEK_SET);
		1777	+ }
		1778	+ }
		1779	+
		1780	+ if (length) {
		1781	+ // Make sure this is inside this object
		1782	+ auto found = m->xref_table.at_offset(stream_offset + toO(length));
		1783	+ if (found == QPDFObjGen() \|\| found == og) {
		1784	+ // If we are trying to recover an XRef stream the xref table will not contain and
		1785	+ // won't contain any entries, therefore we cannot check the found length. Otherwise we
		1786	+ // found endstream\nendobj within the space allowed for this object, so we're probably
		1787	+ // in good shape.
		1788	+ } else {
		1789	+ QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
		1790	+ length = 0;
		1791	+ }
		1792	+ }
		1793	+
		1794	+ if (length == 0) {
		1795	+ warn(damagedPDF(
		1796	+ *input, stream_offset, "unable to recover stream data; treating stream as empty"));
		1797	+ } else {
		1798	+ warn(damagedPDF(
		1799	+ *input, stream_offset, "recovered stream length: " + std::to_string(length)));
		1800	+ }
		1801	+
		1802	+ QTC::TC("qpdf", "QPDF recovered stream length");
		1803	+ return length;
		1804	+}
		1805	+
		1806	+QPDFTokenizer::Token
		1807	+QPDF::readToken(InputSource& input, size_t max_len)
		1808	+{
		1809	+ return m->tokenizer.readToken(input, m->last_object_description, true, max_len);
		1810	+}
		1811	+
		1812	+QPDFObjectHandle
		1813	+QPDF::readObjectAtOffset(
		1814	+ bool try_recovery,
		1815	+ qpdf_offset_t offset,
		1816	+ std::string const& description,
		1817	+ QPDFObjGen exp_og,
		1818	+ QPDFObjGen& og,
		1819	+ bool skip_cache_if_in_xref)
		1820	+{
		1821	+ bool check_og = true;
		1822	+ if (exp_og.getObj() == 0) {
		1823	+ // This method uses an expect object ID of 0 to indicate that we don't know or don't care
		1824	+ // what the actual object ID is at this offset. This is true when we read the xref stream
		1825	+ // and linearization hint streams. In this case, we don't verify the expect object
		1826	+ // ID/generation against what was read from the file. There is also no reason to attempt
		1827	+ // xref recovery if we get a failure in this case since the read attempt was not triggered
		1828	+ // by an xref lookup.
		1829	+ check_og = false;
		1830	+ try_recovery = false;
		1831	+ }
		1832	+ setLastObjectDescription(description, exp_og);
		1833	+
		1834	+ if (!m->attempt_recovery) {
		1835	+ try_recovery = false;
		1836	+ }
		1837	+
		1838	+ // Special case: if offset is 0, just return null. Some PDF writers, in particular
		1839	+ // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
		1840	+ // "0000000000 00000 n", which is not correct, but it won't hurt anything for us to ignore
		1841	+ // these.
		1842	+ if (offset == 0) {
		1843	+ QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
		1844	+ warn(damagedPDF(0, "object has offset 0"));
		1845	+ return QPDFObjectHandle::newNull();
		1846	+ }
		1847	+
		1848	+ m->file->seek(offset, SEEK_SET);
		1849	+ try {
		1850	+ QPDFTokenizer::Token tobjid = readToken(*m->file);
		1851	+ bool objidok = tobjid.isInteger();
		1852	+ QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
		1853	+ if (!objidok) {
		1854	+ QTC::TC("qpdf", "QPDF expected n n obj");
		1855	+ throw damagedPDF(offset, "expected n n obj");
		1856	+ }
		1857	+ QPDFTokenizer::Token tgen = readToken(*m->file);
		1858	+ bool genok = tgen.isInteger();
		1859	+ QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
		1860	+ if (!genok) {
		1861	+ throw damagedPDF(offset, "expected n n obj");
		1862	+ }
		1863	+ QPDFTokenizer::Token tobj = readToken(*m->file);
		1864	+
		1865	+ bool objok = tobj.isWord("obj");
		1866	+ QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
		1867	+
		1868	+ if (!objok) {
		1869	+ throw damagedPDF(offset, "expected n n obj");
		1870	+ }
		1871	+ int objid = QUtil::string_to_int(tobjid.getValue().c_str());
		1872	+ int generation = QUtil::string_to_int(tgen.getValue().c_str());
		1873	+ og = QPDFObjGen(objid, generation);
		1874	+ if (objid == 0) {
		1875	+ QTC::TC("qpdf", "QPDF object id 0");
		1876	+ throw damagedPDF(offset, "object with ID 0");
		1877	+ }
		1878	+ if (check_og && (exp_og != og)) {
		1879	+ QTC::TC("qpdf", "QPDF err wrong objid/generation");
		1880	+ QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
		1881	+ if (try_recovery) {
		1882	+ // Will be retried below
		1883	+ throw e;
		1884	+ } else {
		1885	+ // We can try reading the object anyway even if the ID doesn't match.
		1886	+ warn(e);
		1887	+ }
		1888	+ }
		1889	+ } catch (QPDFExc& e) {
		1890	+ if (try_recovery) {
		1891	+ // Try again after reconstructing xref table
		1892	+ m->xref_table.reconstruct(e);
		1893	+ if (m->xref_table.type(exp_og) == 1) {
		1894	+ QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
		1895	+ return readObjectAtOffset(
		1896	+ false, m->xref_table.offset(exp_og), description, exp_og, og, false);
		1897	+ } else {
		1898	+ QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
		1899	+ warn(damagedPDF(
		1900	+ "",
		1901	+ 0,
		1902	+ ("object " + exp_og.unparse(' ') +
		1903	+ " not found in file after regenerating cross reference table")));
		1904	+ return QPDFObjectHandle::newNull();
		1905	+ }
		1906	+ } else {
		1907	+ throw;
		1908	+ }
		1909	+ }
		1910	+
		1911	+ QPDFObjectHandle oh = readObject(description, og);
		1912	+
		1913	+ if (isUnresolved(og)) {
		1914	+ // Store the object in the cache here so it gets cached whether we first know the offset or
		1915	+ // whether we first know the object ID and generation (in which we case we would get here
		1916	+ // through resolve).
		1917	+
		1918	+ // Determine the end offset of this object before and after white space. We use these
		1919	+ // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
		1920	+ // the end of an object to be anywhere between these values.
		1921	+ qpdf_offset_t end_before_space = m->file->tell();
		1922	+
		1923	+ // skip over spaces
		1924	+ while (true) {
		1925	+ char ch;
		1926	+ if (m->file->read(&ch, 1)) {
		1927	+ if (!isspace(static_cast<unsigned char>(ch))) {
		1928	+ m->file->seek(-1, SEEK_CUR);
		1929	+ break;
		1930	+ }
		1931	+ } else {
		1932	+ throw damagedPDF(m->file->tell(), "EOF after endobj");
		1933	+ }
		1934	+ }
		1935	+ qpdf_offset_t end_after_space = m->file->tell();
		1936	+ if (skip_cache_if_in_xref && m->xref_table.type(og)) {
		1937	+ // Ordinarily, an object gets read here when resolved through xref table or stream. In
		1938	+ // the special case of the xref stream and linearization hint tables, the offset comes
		1939	+ // from another source. For the specific case of xref streams, the xref stream is read
		1940	+ // and loaded into the object cache very early in parsing. Ordinarily, when a file is
		1941	+ // updated by appending, items inserted into the xref table in later updates take
		1942	+ // precedence over earlier items. In the special case of reusing the object number
		1943	+ // previously used as the xref stream, we have the following order of events:
		1944	+ //
		1945	+ // * reused object gets loaded into the xref table
		1946	+ // * old object is read here while reading xref streams
		1947	+ // * original xref entry is ignored (since already in xref table)
		1948	+ //
		1949	+ // It is the second step that causes a problem. Even though the xref table is correct in
		1950	+ // this case, the old object is already in the cache and so effectively prevails over
		1951	+ // the reused object. To work around this issue, we have a special case for the xref
		1952	+ // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,
		1953	+ // don't cache what we read here.
		1954	+ //
		1955	+ // It is likely that the same bug may exist for linearization hint tables, but the
		1956	+ // existing code uses end_before_space and end_after_space from the cache, so fixing
		1957	+ // that would require more significant rework. The chances of a linearization hint
		1958	+ // stream being reused seems smaller because the xref stream is probably the highest
		1959	+ // object in the file and the linearization hint stream would be some random place in
		1960	+ // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we
		1961	+ // could use !check_og in place of skip_cache_if_in_xref.
		1962	+ QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
		1963	+ } else {
		1964	+ m->xref_table.linearization_offsets(
		1965	+ toS(og.getObj()), end_before_space, end_after_space);
		1966	+ updateCache(og, oh.getObj());
		1967	+ }
		1968	+ }
		1969	+
		1970	+ return oh;
		1971	+}
		1972	+
		1973	+QPDFObject*
		1974	+QPDF::resolve(QPDFObjGen og)
		1975	+{
		1976	+ if (!isUnresolved(og)) {
		1977	+ return m->obj_cache[og].object.get();
		1978	+ }
		1979	+
		1980	+ if (m->resolving.count(og)) {
		1981	+ // This can happen if an object references itself directly or indirectly in some key that
		1982	+ // has to be resolved during object parsing, such as stream length.
		1983	+ QTC::TC("qpdf", "QPDF recursion loop in resolve");
		1984	+ warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
		1985	+ updateCache(og, QPDF_Null::create());
		1986	+ return m->obj_cache[og].object.get();
		1987	+ }
		1988	+ ResolveRecorder rr(this, og);
		1989	+
		1990	+ try {
		1991	+ switch (m->xref_table.type(og)) {
		1992	+ case 0:
		1993	+ break;
		1994	+ case 1:
		1995	+ {
		1996	+ // Object stored in cache by readObjectAtOffset
		1997	+ QPDFObjGen a_og;
		1998	+ QPDFObjectHandle oh =
		1999	+ readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
		2000	+ }
		2001	+ break;
		2002	+
		2003	+ case 2:
		2004	+ resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
		2005	+ break;
		2006	+
		2007	+ default:
		2008	+ throw damagedPDF(
		2009	+ "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
		2010	+ }
		2011	+ } catch (QPDFExc& e) {
		2012	+ warn(e);
		2013	+ } catch (std::exception& e) {
		2014	+ warn(damagedPDF(
		2015	+ "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
		2016	+ }
		2017	+
		2018	+ if (isUnresolved(og)) {
		2019	+ // PDF spec says unknown objects resolve to the null object.
		2020	+ QTC::TC("qpdf", "QPDF resolve failure to null");
		2021	+ updateCache(og, QPDF_Null::create());
		2022	+ }
		2023	+
		2024	+ auto result(m->obj_cache[og].object);
		2025	+ result->setDefaultDescription(this, og);
		2026	+ return result.get();
		2027	+}
		2028	+
		2029	+void
		2030	+QPDF::resolveObjectsInStream(int obj_stream_number)
		2031	+{
		2032	+ if (m->resolved_object_streams.count(obj_stream_number)) {
		2033	+ return;
		2034	+ }
		2035	+ m->resolved_object_streams.insert(obj_stream_number);
		2036	+ // Force resolution of object stream
		2037	+ QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);
		2038	+ if (!obj_stream.isStream()) {
		2039	+ throw damagedPDF(
		2040	+ "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
		2041	+ }
		2042	+
		2043	+ QPDFObjectHandle dict = obj_stream.getDict();
		2044	+ if (!dict.isDictionaryOfType("/ObjStm")) {
		2045	+ QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
		2046	+ warn(damagedPDF(
		2047	+ "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));
		2048	+ }
		2049	+
		2050	+ if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {
		2051	+ throw damagedPDF(
		2052	+ ("object stream " + std::to_string(obj_stream_number) + " has incorrect keys"));
		2053	+ }
		2054	+
		2055	+ int n = dict.getKey("/N").getIntValueAsInt();
		2056	+ int first = dict.getKey("/First").getIntValueAsInt();
		2057	+
		2058	+ std::map<int, int> offsets;
		2059	+
		2060	+ std::shared_ptr<Buffer> bp = obj_stream.getStreamData(qpdf_dl_specialized);
		2061	+ auto input = std::shared_ptr<InputSource>(
		2062	+ // line-break
		2063	+ new BufferInputSource(
		2064	+ (m->file->getName() + " object stream " + std::to_string(obj_stream_number)),
		2065	+ bp.get()));
		2066	+
		2067	+ qpdf_offset_t last_offset = -1;
		2068	+ for (int i = 0; i < n; ++i) {
		2069	+ QPDFTokenizer::Token tnum = readToken(*input);
		2070	+ QPDFTokenizer::Token toffset = readToken(*input);
		2071	+ if (!(tnum.isInteger() && toffset.isInteger())) {
		2072	+ throw damagedPDF(
		2073	+ *input,
		2074	+ m->last_object_description,
		2075	+ input->getLastOffset(),
		2076	+ "expected integer in object stream header");
		2077	+ }
		2078	+
		2079	+ int num = QUtil::string_to_int(tnum.getValue().c_str());
		2080	+ long long offset = QUtil::string_to_int(toffset.getValue().c_str());
		2081	+ if (num > m->xref_table.max_id()) {
		2082	+ continue;
		2083	+ }
		2084	+ if (num == obj_stream_number) {
		2085	+ QTC::TC("qpdf", "QPDF ignore self-referential object stream");
		2086	+ warn(damagedPDF(
		2087	+ *input,
		2088	+ m->last_object_description,
		2089	+ input->getLastOffset(),
		2090	+ "object stream claims to contain itself"));
		2091	+ continue;
		2092	+ }
		2093	+ if (offset <= last_offset) {
		2094	+ throw damagedPDF(
		2095	+ *input,
		2096	+ m->last_object_description,
		2097	+ input->getLastOffset(),
		2098	+ "expected offsets in object stream to be increasing");
		2099	+ }
		2100	+ last_offset = offset;
		2101	+
		2102	+ offsets[num] = toI(offset + first);
		2103	+ }
		2104	+
		2105	+ // To avoid having to read the object stream multiple times, store all objects that would be
		2106	+ // found here in the cache. Remember that some objects stored here might have been overridden
		2107	+ // by new objects appended to the file, so it is necessary to recheck the xref table and only
		2108	+ // cache what would actually be resolved here.
		2109	+ m->last_object_description.clear();
		2110	+ m->last_object_description += "object ";
		2111	+ for (auto const& iter: offsets) {
		2112	+ QPDFObjGen og(iter.first, 0);
		2113	+ if (m->xref_table.type(og) == 2 &&
		2114	+ m->xref_table.stream_number(og.getObj()) == obj_stream_number) {
		2115	+ int offset = iter.second;
		2116	+ input->seek(offset, SEEK_SET);
		2117	+ QPDFObjectHandle oh = readObjectInStream(input, iter.first);
		2118	+ updateCache(og, oh.getObj());
		2119	+ } else {
		2120	+ QTC::TC("qpdf", "QPDF not caching overridden objstm object");
		2121	+ }
		2122	+ }
		2123	+}
		2124	+
		2125	+QPDFObjectHandle
		2126	+QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj)
		2127	+{
		2128	+ obj->setDefaultDescription(this, og);
		2129	+ return {obj};
		2130	+}
		2131	+
		2132	+void
		2133	+QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)
		2134	+{
		2135	+ object->setObjGen(this, og);
		2136	+ if (isCached(og)) {
		2137	+ auto& cache = m->obj_cache[og];
		2138	+ cache.object->assign(object);
		2139	+ } else {
		2140	+ m->obj_cache[og] = ObjCache(object);
		2141	+ }
		2142	+}
		2143	+
		2144	+bool
		2145	+QPDF::isCached(QPDFObjGen const& og)
		2146	+{
		2147	+ return m->obj_cache.count(og) != 0;
		2148	+}
		2149	+
		2150	+bool
		2151	+QPDF::isUnresolved(QPDFObjGen const& og)
		2152	+{
		2153	+ return !isCached(og) \|\| m->obj_cache[og].object->isUnresolved();
		2154	+}
		2155	+
		2156	+QPDFObjGen
		2157	+QPDF::nextObjGen()
		2158	+{
		2159	+ int max_objid = toI(getObjectCount());
		2160	+ if (max_objid == std::numeric_limits<int>::max()) {
		2161	+ throw std::range_error("max object id is too high to create new objects");
		2162	+ }
		2163	+ return QPDFObjGen(max_objid + 1, 0);
		2164	+}
		2165	+
		2166	+QPDFObjectHandle
		2167	+QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
		2168	+{
		2169	+ QPDFObjGen next{nextObjGen()};
		2170	+ m->obj_cache[next] = ObjCache(obj);
		2171	+ return newIndirect(next, m->obj_cache[next].object);
		2172	+}
		2173	+
		2174	+QPDFObjectHandle
		2175	+QPDF::makeIndirectObject(QPDFObjectHandle oh)
		2176	+{
		2177	+ if (!oh) {
		2178	+ throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
		2179	+ }
		2180	+ return makeIndirectFromQPDFObject(oh.getObj());
		2181	+}
		2182	+
		2183	+QPDFObjectHandle
		2184	+QPDF::newReserved()
		2185	+{
		2186	+ return makeIndirectFromQPDFObject(QPDF_Reserved::create());
		2187	+}
		2188	+
		2189	+QPDFObjectHandle
		2190	+QPDF::newIndirectNull()
		2191	+{
		2192	+ return makeIndirectFromQPDFObject(QPDF_Null::create());
		2193	+}
		2194	+
		2195	+QPDFObjectHandle
		2196	+QPDF::newStream()
		2197	+{
		2198	+ return makeIndirectFromQPDFObject(
		2199	+ QPDF_Stream::create(this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
		2200	+}
		2201	+
		2202	+QPDFObjectHandle
		2203	+QPDF::newStream(std::shared_ptr<Buffer> data)
		2204	+{
		2205	+ auto result = newStream();
		2206	+ result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
		2207	+ return result;
		2208	+}
		2209	+
		2210	+QPDFObjectHandle
		2211	+QPDF::newStream(std::string const& data)
		2212	+{
		2213	+ auto result = newStream();
		2214	+ result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
		2215	+ return result;
		2216	+}
		2217	+
		2218	+std::shared_ptr<QPDFObject>
		2219	+QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
		2220	+{
		2221	+ // This method is called by the parser and therefore must not resolve any objects.
		2222	+ auto og = QPDFObjGen(id, gen);
		2223	+ if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
		2224	+ return iter->second.object;
		2225	+ }
		2226	+ if (m->xref_table.type(og) \|\| !m->xref_table.initialized()) {
		2227	+ return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
		2228	+ }
		2229	+ if (parse_pdf) {
		2230	+ return QPDF_Null::create();
		2231	+ }
		2232	+ return m->obj_cache.insert({og, QPDF_Null::create(this, og)}).first->second.object;
		2233	+}
		2234	+
		2235	+std::shared_ptr<QPDFObject>
		2236	+QPDF::getObjectForJSON(int id, int gen)
		2237	+{
		2238	+ auto og = QPDFObjGen(id, gen);
		2239	+ auto [it, inserted] = m->obj_cache.try_emplace(og);
		2240	+ auto& obj = it->second.object;
		2241	+ if (inserted) {
		2242	+ obj = (m->xref_table.initialized() && !m->xref_table.type(og))
		2243	+ ? QPDF_Null::create(this, og)
		2244	+ : QPDF_Unresolved::create(this, og);
		2245	+ }
		2246	+ return obj;
		2247	+}
		2248	+
		2249	+QPDFObjectHandle
		2250	+QPDF::getObject(QPDFObjGen const& og)
		2251	+{
		2252	+ if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
		2253	+ return {it->second.object};
		2254	+ } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
		2255	+ return QPDF_Null::create();
		2256	+ } else {
		2257	+ auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
		2258	+ return {result.first->second.object};
		2259	+ }
		2260	+}
		2261	+
		2262	+QPDFObjectHandle
		2263	+QPDF::getObject(int objid, int generation)
		2264	+{
		2265	+ return getObject(QPDFObjGen(objid, generation));
		2266	+}
		2267	+
		2268	+QPDFObjectHandle
		2269	+QPDF::getObjectByObjGen(QPDFObjGen const& og)
		2270	+{
		2271	+ return getObject(og);
		2272	+}
		2273	+
		2274	+QPDFObjectHandle
		2275	+QPDF::getObjectByID(int objid, int generation)
		2276	+{
		2277	+ return getObject(QPDFObjGen(objid, generation));
		2278	+}
		2279	+
		2280	+void
		2281	+QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
		2282	+{
		2283	+ replaceObject(QPDFObjGen(objid, generation), oh);
		2284	+}
		2285	+
		2286	+void
		2287	+QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
		2288	+{
		2289	+ if (!oh \|\| (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) {
		2290	+ QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
		2291	+ throw std::logic_error("QPDF::replaceObject called with indirect object handle");
		2292	+ }
		2293	+ updateCache(og, oh.getObj());
		2294	+}
		2295	+
		2296	+void
		2297	+QPDF::removeObject(QPDFObjGen og)
		2298	+{
		2299	+ if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
		2300	+ // Take care of any object handles that may be floating around.
		2301	+ cached->second.object->assign(QPDF_Null::create());
		2302	+ cached->second.object->setObjGen(nullptr, QPDFObjGen());
		2303	+ m->obj_cache.erase(cached);
		2304	+ }
		2305	+}
		2306	+
		2307	+void
		2308	+QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
		2309	+{
		2310	+ QTC::TC("qpdf", "QPDF replaceReserved");
		2311	+ auto tc = reserved.getTypeCode();
		2312	+ if (!(tc == ::ot_reserved \|\| tc == ::ot_null)) {
		2313	+ throw std::logic_error("replaceReserved called with non-reserved object");
		2314	+ }
		2315	+ replaceObject(reserved.getObjGen(), replacement);
		2316	+}
		2317	+
		2318	+QPDFObjectHandle
		2319	+QPDF::copyForeignObject(QPDFObjectHandle foreign)
		2320	+{
		2321	+ // Here's an explanation of what's going on here.
		2322	+ //
		2323	+ // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
		2324	+ // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
		2325	+ // foreign QPDF into the local QPDF, we have to replace all indirect object references with
		2326	+ // references to the corresponding object in the local file.
		2327	+ //
		2328	+ // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
		2329	+ // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
		2330	+ // mapping from the foreign ObjGen to the local QPDFObjectHandle.
		2331	+ //
		2332	+ // To copy, we do a deep traversal of the foreign object with loop detection to discover all
		2333	+ // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
		2334	+ // indirect object, we check to see if we have already created a local copy of it. If not, we
		2335	+ // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
		2336	+ // mapping from the foreign object ID to the new object. While we
		2337	+ // do this, we keep a list of objects to copy.
		2338	+ //
		2339	+ // Once we are done with the traversal, we copy all the objects that we need to copy. However,
		2340	+ // the copies will contain indirect object IDs that refer to objects in the foreign file. We
		2341	+ // need to replace them with references to objects in the local file. This is what
		2342	+ // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
		2343	+ // all the indirect references replaced with new ones in the local context, we can replace the
		2344	+ // local reserved object with the copy. This mechanism allows us to copy objects with circular
		2345	+ // references in any order.
		2346	+
		2347	+ // For streams, rather than copying the objects, we set up the stream data to pull from the
		2348	+ // original stream by using a stream data provider. This is done in a manner that doesn't
		2349	+ // require the original QPDF object but may require the original source of the stream data with
		2350	+ // special handling for immediate_copy_from. This logic is also in
		2351	+ // replaceForeignIndirectObjects.
		2352	+
		2353	+ // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
		2354	+ // use case to copy pages this way if the intention is to not update the pages tree.
		2355	+ if (!foreign.isIndirect()) {
		2356	+ QTC::TC("qpdf", "QPDF copyForeign direct");
		2357	+ throw std::logic_error("QPDF::copyForeign called with direct object handle");
		2358	+ }
		2359	+ QPDF& other = foreign.getQPDF();
		2360	+ if (&other == this) {
		2361	+ QTC::TC("qpdf", "QPDF copyForeign not foreign");
		2362	+ throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
		2363	+ }
		2364	+
		2365	+ ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
		2366	+ if (!obj_copier.visiting.empty()) {
		2367	+ throw std::logic_error("obj_copier.visiting is not empty"
		2368	+ " at the beginning of copyForeignObject");
		2369	+ }
		2370	+
		2371	+ // Make sure we have an object in this file for every referenced object in the old file.
		2372	+ // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
		2373	+ // have to copy, the local object will be a reservation, unless it is a stream, in which case
		2374	+ // the local object will already be a stream.
		2375	+ reserveObjects(foreign, obj_copier, true);
		2376	+
		2377	+ if (!obj_copier.visiting.empty()) {
		2378	+ throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
		2379	+ }
		2380	+
		2381	+ // Copy any new objects and replace the reservations.
		2382	+ for (auto& to_copy: obj_copier.to_copy) {
		2383	+ QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
		2384	+ if (!to_copy.isStream()) {
		2385	+ QPDFObjGen og(to_copy.getObjGen());
		2386	+ replaceReserved(obj_copier.object_map[og], copy);
		2387	+ }
		2388	+ }
		2389	+ obj_copier.to_copy.clear();
		2390	+
		2391	+ auto og = foreign.getObjGen();
		2392	+ if (!obj_copier.object_map.count(og)) {
		2393	+ warn(damagedPDF("unexpected reference to /Pages object while copying foreign object; "
		2394	+ "replacing with null"));
		2395	+ return QPDFObjectHandle::newNull();
		2396	+ }
		2397	+ return obj_copier.object_map[foreign.getObjGen()];
		2398	+}
		2399	+
		2400	+void
		2401	+QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
		2402	+{
		2403	+ auto foreign_tc = foreign.getTypeCode();
		2404	+ if (foreign_tc == ::ot_reserved) {
		2405	+ throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
		2406	+ }
		2407	+
		2408	+ if (foreign.isPagesObject()) {
		2409	+ QTC::TC("qpdf", "QPDF not copying pages object");
		2410	+ return;
		2411	+ }
		2412	+
		2413	+ if (foreign.isIndirect()) {
		2414	+ QPDFObjGen foreign_og(foreign.getObjGen());
		2415	+ if (!obj_copier.visiting.add(foreign_og)) {
		2416	+ QTC::TC("qpdf", "QPDF loop reserving objects");
		2417	+ return;
		2418	+ }
		2419	+ if (obj_copier.object_map.count(foreign_og) > 0) {
		2420	+ QTC::TC("qpdf", "QPDF already reserved object");
		2421	+ if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
		2422	+ obj_copier.visiting.erase(foreign);
		2423	+ return;
		2424	+ }
		2425	+ } else {
		2426	+ QTC::TC("qpdf", "QPDF copy indirect");
		2427	+ obj_copier.object_map[foreign_og] =
		2428	+ foreign.isStream() ? newStream() : newIndirectNull();
		2429	+ if ((!top) && foreign.isPageObject()) {
		2430	+ QTC::TC("qpdf", "QPDF not crossing page boundary");
		2431	+ obj_copier.visiting.erase(foreign_og);
		2432	+ return;
		2433	+ }
		2434	+ }
		2435	+ obj_copier.to_copy.push_back(foreign);
		2436	+ }
		2437	+
		2438	+ if (foreign_tc == ::ot_array) {
		2439	+ QTC::TC("qpdf", "QPDF reserve array");
		2440	+ int n = foreign.getArrayNItems();
		2441	+ for (int i = 0; i < n; ++i) {
		2442	+ reserveObjects(foreign.getArrayItem(i), obj_copier, false);
		2443	+ }
		2444	+ } else if (foreign_tc == ::ot_dictionary) {
		2445	+ QTC::TC("qpdf", "QPDF reserve dictionary");
		2446	+ for (auto const& key: foreign.getKeys()) {
		2447	+ reserveObjects(foreign.getKey(key), obj_copier, false);
		2448	+ }
		2449	+ } else if (foreign_tc == ::ot_stream) {
		2450	+ QTC::TC("qpdf", "QPDF reserve stream");
		2451	+ reserveObjects(foreign.getDict(), obj_copier, false);
		2452	+ }
		2453	+
		2454	+ obj_copier.visiting.erase(foreign);
		2455	+}
		2456	+
		2457	+QPDFObjectHandle
		2458	+QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
		2459	+{
		2460	+ auto foreign_tc = foreign.getTypeCode();
		2461	+ QPDFObjectHandle result;
		2462	+ if ((!top) && foreign.isIndirect()) {
		2463	+ QTC::TC("qpdf", "QPDF replace indirect");
		2464	+ auto mapping = obj_copier.object_map.find(foreign.getObjGen());
		2465	+ if (mapping == obj_copier.object_map.end()) {
		2466	+ // This case would occur if this is a reference to a Pages object that we didn't
		2467	+ // traverse into.
		2468	+ QTC::TC("qpdf", "QPDF replace foreign indirect with null");
		2469	+ result = QPDFObjectHandle::newNull();
		2470	+ } else {
		2471	+ result = mapping->second;
		2472	+ }
		2473	+ } else if (foreign_tc == ::ot_array) {
		2474	+ QTC::TC("qpdf", "QPDF replace array");
		2475	+ result = QPDFObjectHandle::newArray();
		2476	+ int n = foreign.getArrayNItems();
		2477	+ for (int i = 0; i < n; ++i) {
		2478	+ result.appendItem(
		2479	+ // line-break
		2480	+ replaceForeignIndirectObjects(foreign.getArrayItem(i), obj_copier, false));
		2481	+ }
		2482	+ } else if (foreign_tc == ::ot_dictionary) {
		2483	+ QTC::TC("qpdf", "QPDF replace dictionary");
		2484	+ result = QPDFObjectHandle::newDictionary();
		2485	+ std::set<std::string> keys = foreign.getKeys();
		2486	+ for (auto const& iter: keys) {
		2487	+ result.replaceKey(
		2488	+ iter, replaceForeignIndirectObjects(foreign.getKey(iter), obj_copier, false));
		2489	+ }
		2490	+ } else if (foreign_tc == ::ot_stream) {
		2491	+ QTC::TC("qpdf", "QPDF replace stream");
		2492	+ result = obj_copier.object_map[foreign.getObjGen()];
		2493	+ result.assertStream();
		2494	+ QPDFObjectHandle dict = result.getDict();
		2495	+ QPDFObjectHandle old_dict = foreign.getDict();
		2496	+ std::set<std::string> keys = old_dict.getKeys();
		2497	+ for (auto const& iter: keys) {
		2498	+ dict.replaceKey(
		2499	+ iter, replaceForeignIndirectObjects(old_dict.getKey(iter), obj_copier, false));
		2500	+ }
		2501	+ copyStreamData(result, foreign);
		2502	+ } else {
		2503	+ foreign.assertScalar();
		2504	+ result = foreign;
		2505	+ result.makeDirect();
		2506	+ }
		2507	+
		2508	+ if (top && (!result.isStream()) && result.isIndirect()) {
		2509	+ throw std::logic_error("replacement for foreign object is indirect");
		2510	+ }
		2511	+
		2512	+ return result;
		2513	+}
		2514	+
		2515	+void
		2516	+QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
		2517	+{
		2518	+ // This method was originally written for copying foreign streams, but it is used by
		2519	+ // QPDFObjectHandle to copy streams from the same QPDF object as well.
		2520	+
		2521	+ QPDFObjectHandle dict = result.getDict();
		2522	+ QPDFObjectHandle old_dict = foreign.getDict();
		2523	+ if (m->copied_stream_data_provider == nullptr) {
		2524	+ m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
		2525	+ m->copied_streams =
		2526	+ std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
		2527	+ }
		2528	+ QPDFObjGen local_og(result.getObjGen());
		2529	+ // Copy information from the foreign stream so we can pipe its data later without keeping the
		2530	+ // original QPDF object around.
		2531	+
		2532	+ QPDF& foreign_stream_qpdf =
		2533	+ foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
		2534	+
		2535	+ auto stream = foreign.getObjectPtr()->as<QPDF_Stream>();
		2536	+ if (stream == nullptr) {
		2537	+ throw std::logic_error("unable to retrieve underlying"
		2538	+ " stream object from foreign stream");
		2539	+ }
		2540	+ std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();
		2541	+ if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
		2542	+ // Pull the stream data into a buffer before attempting the copy operation. Do it on the
		2543	+ // source stream so that if the source stream is copied multiple times, we don't have to
		2544	+ // keep duplicating the memory.
		2545	+ QTC::TC("qpdf", "QPDF immediate copy stream data");
		2546	+ foreign.replaceStreamData(
		2547	+ foreign.getRawStreamData(),
		2548	+ old_dict.getKey("/Filter"),
		2549	+ old_dict.getKey("/DecodeParms"));
		2550	+ stream_buffer = stream->getStreamDataBuffer();
		2551	+ }
		2552	+ std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
		2553	+ stream->getStreamDataProvider();
		2554	+ if (stream_buffer.get()) {
		2555	+ QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
		2556	+ result.replaceStreamData(
		2557	+ stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
		2558	+ } else if (stream_provider.get()) {
		2559	+ // In this case, the remote stream's QPDF must stay in scope.
		2560	+ QTC::TC("qpdf", "QPDF copy foreign stream with provider");
		2561	+ m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
		2562	+ result.replaceStreamData(
		2563	+ m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
		2564	+ } else {
		2565	+ auto foreign_stream_data = std::make_shared<ForeignStreamData>(
		2566	+ foreign_stream_qpdf.m->encp,
		2567	+ foreign_stream_qpdf.m->file_sp,
		2568	+ foreign.getObjGen(),
		2569	+ stream->getParsedOffset(),
		2570	+ stream->getLength(),
		2571	+ dict);
		2572	+ m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
		2573	+ result.replaceStreamData(
		2574	+ m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
		2575	+ }
		2576	+}
		2577	+
		2578	+void
		2579	+QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
		2580	+{
		2581	+ swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2));
		2582	+}
		2583	+
		2584	+void
		2585	+QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
		2586	+{
		2587	+ // Force objects to be read from the input source if needed, then swap them in the cache.
		2588	+ resolve(og1);
		2589	+ resolve(og2);
		2590	+ m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
		2591	+}
		2592	+
		2593	+unsigned long long
		2594	+QPDF::getUniqueId() const
		2595	+{
		2596	+ return m->unique_id;
		2597	+}
		2598	+
		2599	+std::string
		2600	+QPDF::getFilename() const
		2601	+{
		2602	+ return m->file->getName();
		2603	+}
		2604	+
		2605	+PDFVersion
		2606	+QPDF::getVersionAsPDFVersion()
		2607	+{
		2608	+ int major = 1;
		2609	+ int minor = 3;
		2610	+ int extension_level = getExtensionLevel();
		2611	+
		2612	+ std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
		2613	+ std::smatch match;
		2614	+ if (std::regex_search(m->pdf_version, match, v)) {
		2615	+ major = QUtil::string_to_int(match[1].str().c_str());
		2616	+ minor = QUtil::string_to_int(match[2].str().c_str());
		2617	+ }
		2618	+
		2619	+ return {major, minor, extension_level};
		2620	+}
		2621	+
		2622	+std::string
		2623	+QPDF::getPDFVersion() const
		2624	+{
		2625	+ return m->pdf_version;
		2626	+}
		2627	+
		2628	+int
		2629	+QPDF::getExtensionLevel()
		2630	+{
		2631	+ int result = 0;
		2632	+ QPDFObjectHandle obj = getRoot();
		2633	+ if (obj.hasKey("/Extensions")) {
		2634	+ obj = obj.getKey("/Extensions");
		2635	+ if (obj.isDictionary() && obj.hasKey("/ADBE")) {
		2636	+ obj = obj.getKey("/ADBE");
		2637	+ if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
		2638	+ obj = obj.getKey("/ExtensionLevel");
		2639	+ if (obj.isInteger()) {
		2640	+ result = obj.getIntValueAsInt();
		2641	+ }
		2642	+ }
		2643	+ }
		2644	+ }
		2645	+ return result;
		2646	+}
		2647	+
		2648	+QPDFObjectHandle
		2649	+QPDF::getTrailer()
		2650	+{
		2651	+ return m->xref_table.trailer();
		2652	+}
		2653	+
		2654	+QPDFObjectHandle
		2655	+QPDF::getRoot()
		2656	+{
		2657	+ QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");
		2658	+ if (!root.isDictionary()) {
		2659	+ throw damagedPDF("", 0, "unable to find /Root dictionary");
		2660	+ } else if (
		2661	+ // Check_mode is an interim solution to request #810 pending a more comprehensive review of
		2662	+ // the approach to more extensive checks and warning levels.
		2663	+ m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
		2664	+ warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
		2665	+ root.replaceKey("/Type", "/Catalog"_qpdf);
		2666	+ }
		2667	+ return root;
		2668	+}
		2669	+
		2670	+std::map<QPDFObjGen, QPDFXRefEntry>
		2671	+QPDF::getXRefTable()
		2672	+{
		2673	+ if (!m->xref_table.initialized()) {
		2674	+ throw std::logic_error("QPDF::getXRefTable called before parsing.");
		2675	+ }
		2676	+ return m->xref_table.as_map();
		2677	+}
		2678	+
		2679	+size_t
		2680	+QPDF::tableSize()
		2681	+{
		2682	+ // If obj_cache is dense, accommodate all object in tables,else accommodate only original
		2683	+ // objects.
		2684	+ auto max_xref = toI(m->xref_table.size());
		2685	+ if (max_xref > 0) {
		2686	+ --max_xref;
		2687	+ }
		2688	+ auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
		2689	+ auto max_id = std::numeric_limits<int>::max() - 1;
		2690	+ if (max_obj >= max_id \|\| max_xref >= max_id) {
		2691	+ // Temporary fix. Long-term solution is
		2692	+ // - QPDFObjGen to enforce objgens are valid and sensible
		2693	+ // - xref table and obj cache to protect against insertion of impossibly large obj ids
		2694	+ stopOnError("Impossibly large object id encountered.");
		2695	+ }
		2696	+ if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
		2697	+ return toS(++max_obj);
		2698	+ }
		2699	+ return toS(++max_xref);
		2700	+}
		2701	+
		2702	+std::vector<QPDFObjGen>
		2703	+QPDF::getCompressibleObjVector()
		2704	+{
		2705	+ return getCompressibleObjGens<QPDFObjGen>();
		2706	+}
		2707	+
		2708	+std::vector<bool>
		2709	+QPDF::getCompressibleObjSet()
		2710	+{
		2711	+ return getCompressibleObjGens<bool>();
		2712	+}
		2713	+
		2714	+template <typename T>
		2715	+std::vector<T>
		2716	+QPDF::getCompressibleObjGens()
		2717	+{
		2718	+ // Return a list of objects that are allowed to be in object streams. Walk through the objects
		2719	+ // by traversing the document from the root, including a traversal of the pages tree. This
		2720	+ // makes that objects that are on the same page are more likely to be in the same object stream,
		2721	+ // which is slightly more efficient, particularly with linearized files. This is better than
		2722	+ // iterating through the xref table since it avoids preserving orphaned items.
		2723	+
		2724	+ // Exclude encryption dictionary, if any
		2725	+ QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
		2726	+ QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
		2727	+
		2728	+ const size_t max_obj = getObjectCount();
		2729	+ std::vector<bool> visited(max_obj, false);
		2730	+ std::vector<QPDFObjectHandle> queue;
		2731	+ queue.reserve(512);
		2732	+ queue.push_back(m->xref_table.trailer());
		2733	+ std::vector<T> result;
		2734	+ if constexpr (std::is_same_v<T, QPDFObjGen>) {
		2735	+ result.reserve(m->obj_cache.size());
		2736	+ } else if constexpr (std::is_same_v<T, bool>) {
		2737	+ result.resize(max_obj + 1U, false);
		2738	+ } else {
		2739	+ throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
		2740	+ }
		2741	+ while (!queue.empty()) {
		2742	+ auto obj = queue.back();
		2743	+ queue.pop_back();
		2744	+ if (obj.getObjectID() > 0) {
		2745	+ QPDFObjGen og = obj.getObjGen();
		2746	+ const size_t id = toS(og.getObj() - 1);
		2747	+ if (id >= max_obj) {
		2748	+ throw std::logic_error(
		2749	+ "unexpected object id encountered in getCompressibleObjGens");
		2750	+ }
		2751	+ if (visited[id]) {
		2752	+ QTC::TC("qpdf", "QPDF loop detected traversing objects");
		2753	+ continue;
		2754	+ }
		2755	+
		2756	+ // Check whether this is the current object. If not, remove it (which changes it into a
		2757	+ // direct null and therefore stops us from revisiting it) and move on to the next object
		2758	+ // in the queue.
		2759	+ auto upper = m->obj_cache.upper_bound(og);
		2760	+ if (upper != m->obj_cache.end() && upper->first.getObj() == og.getObj()) {
		2761	+ removeObject(og);
		2762	+ continue;
		2763	+ }
		2764	+
		2765	+ visited[id] = true;
		2766	+
		2767	+ if (og == encryption_dict_og) {
		2768	+ QTC::TC("qpdf", "QPDF exclude encryption dictionary");
		2769	+ } else if (!(obj.isStream() \|\|
		2770	+ (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
		2771	+ obj.hasKey("/Contents")))) {
		2772	+ if constexpr (std::is_same_v<T, QPDFObjGen>) {
		2773	+ result.push_back(og);
		2774	+ } else if constexpr (std::is_same_v<T, bool>) {
		2775	+ result[id + 1U] = true;
		2776	+ }
		2777	+ }
		2778	+ }
		2779	+ if (obj.isStream()) {
		2780	+ QPDFObjectHandle dict = obj.getDict();
		2781	+ std::set<std::string> keys = dict.getKeys();
		2782	+ for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {
		2783	+ std::string const& key = *iter;
		2784	+ QPDFObjectHandle value = dict.getKey(key);
		2785	+ if (key == "/Length") {
		2786	+ // omit stream lengths
		2787	+ if (value.isIndirect()) {
		2788	+ QTC::TC("qpdf", "QPDF exclude indirect length");
		2789	+ }
		2790	+ } else {
		2791	+ queue.push_back(value);
		2792	+ }
		2793	+ }
		2794	+ } else if (obj.isDictionary()) {
		2795	+ std::set<std::string> keys = obj.getKeys();
		2796	+ for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {
		2797	+ queue.push_back(obj.getKey(*iter));
		2798	+ }
		2799	+ } else if (obj.isArray()) {
		2800	+ int n = obj.getArrayNItems();
		2801	+ for (int i = 1; i <= n; ++i) {
		2802	+ queue.push_back(obj.getArrayItem(n - i));
		2803	+ }
		2804	+ }
		2805	+ }
		2806	+
		2807	+ return result;
		2808	+}
		2809	+
		2810	+bool
		2811	+QPDF::pipeStreamData(
		2812	+ std::shared_ptr<EncryptionParameters> encp,
		2813	+ std::shared_ptr<InputSource> file,
		2814	+ QPDF& qpdf_for_warning,
		2815	+ QPDFObjGen const& og,
		2816	+ qpdf_offset_t offset,
		2817	+ size_t length,
		2818	+ QPDFObjectHandle stream_dict,
		2819	+ Pipeline* pipeline,
		2820	+ bool suppress_warnings,
		2821	+ bool will_retry)
		2822	+{
		2823	+ std::unique_ptr<Pipeline> to_delete;
		2824	+ if (encp->encrypted) {
		2825	+ decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
		2826	+ }
		2827	+
		2828	+ bool attempted_finish = false;
		2829	+ try {
		2830	+ file->seek(offset, SEEK_SET);
		2831	+ auto buf = std::make_unique<char[]>(length);
		2832	+ if (auto read = file->read(buf.get(), length); read != length) {
		2833	+ throw damagedPDF(*file, "", offset + toO(read), "unexpected EOF reading stream data");
		2834	+ }
		2835	+ pipeline->write(buf.get(), length);
		2836	+ attempted_finish = true;
		2837	+ pipeline->finish();
		2838	+ return true;
		2839	+ } catch (QPDFExc& e) {
		2840	+ if (!suppress_warnings) {
		2841	+ qpdf_for_warning.warn(e);
		2842	+ }
		2843	+ } catch (std::exception& e) {
		2844	+ if (!suppress_warnings) {
		2845	+ QTC::TC("qpdf", "QPDF decoding error warning");
		2846	+ qpdf_for_warning.warn(
		2847	+ // line-break
		2848	+ damagedPDF(
		2849	+ *file,
		2850	+ "",
		2851	+ file->getLastOffset(),
		2852	+ ("error decoding stream data for object " + og.unparse(' ') + ": " +
		2853	+ e.what())));
		2854	+ if (will_retry) {
		2855	+ qpdf_for_warning.warn(
		2856	+ // line-break
		2857	+ damagedPDF(
		2858	+ *file,
		2859	+ "",
		2860	+ file->getLastOffset(),
		2861	+ "stream will be re-processed without filtering to avoid data loss"));
		2862	+ }
		2863	+ }
		2864	+ }
		2865	+ if (!attempted_finish) {
		2866	+ try {
		2867	+ pipeline->finish();
		2868	+ } catch (std::exception&) {
		2869	+ // ignore
		2870	+ }
		2871	+ }
		2872	+ return false;
		2873	+}
		2874	+
		2875	+bool
		2876	+QPDF::pipeStreamData(
		2877	+ QPDFObjGen const& og,
		2878	+ qpdf_offset_t offset,
		2879	+ size_t length,
		2880	+ QPDFObjectHandle stream_dict,
		2881	+ Pipeline* pipeline,
		2882	+ bool suppress_warnings,
		2883	+ bool will_retry)
		2884	+{
		2885	+ return pipeStreamData(
		2886	+ m->encp,
		2887	+ m->file_sp,
		2888	+ *this,
		2889	+ og,
		2890	+ offset,
		2891	+ length,
		2892	+ stream_dict,
		2893	+ pipeline,
		2894	+ suppress_warnings,
		2895	+ will_retry);
		2896	+}
		2897	+
		2898	+bool
		2899	+QPDF::pipeForeignStreamData(
		2900	+ std::shared_ptr<ForeignStreamData> foreign,
		2901	+ Pipeline* pipeline,
		2902	+ bool suppress_warnings,
		2903	+ bool will_retry)
		2904	+{
		2905	+ if (foreign->encp->encrypted) {
		2906	+ QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
		2907	+ }
		2908	+ return pipeStreamData(
		2909	+ foreign->encp,
		2910	+ foreign->file,
		2911	+ *this,
		2912	+ foreign->foreign_og,
		2913	+ foreign->offset,
		2914	+ foreign->length,
		2915	+ foreign->local_dict,
		2916	+ pipeline,
		2917	+ suppress_warnings,
		2918	+ will_retry);
		2919	+}
		2920	+
		2921	+// Throw a generic exception when we lack context for something more specific. New code should not
		2922	+// use this. This method exists to improve somewhat from calling assert in very old code.
		2923	+void
		2924	+QPDF::stopOnError(std::string const& message)
		2925	+{
		2926	+ throw damagedPDF("", message);
		2927	+}
		2928	+
		2929	+// Return an exception of type qpdf_e_damaged_pdf.
		2930	+QPDFExc
		2931	+QPDF::damagedPDF(
		2932	+ InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
		2933	+{
		2934	+ return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};
		2935	+}
		2936	+
		2937	+// Return an exception of type qpdf_e_damaged_pdf. The object is taken from
		2938	+// m->last_object_description.
		2939	+QPDFExc
		2940	+QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
		2941	+{
		2942	+ return damagedPDF(input, m->last_object_description, offset, message);
		2943	+}
		2944	+
		2945	+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
		2946	+QPDFExc
		2947	+QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
		2948	+{
		2949	+ return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
		2950	+}
		2951	+
		2952	+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
		2953	+// offset from .m->file->getLastOffset().
		2954	+QPDFExc
		2955	+QPDF::damagedPDF(std::string const& object, std::string const& message)
		2956	+{
		2957	+ return damagedPDF(object, m->file->getLastOffset(), message);
		2958	+}
		2959	+
		2960	+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
		2961	+// from .m->last_object_description.
		2962	+QPDFExc
		2963	+QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
		2964	+{
		2965	+ return damagedPDF(m->last_object_description, offset, message);
		2966	+}
		2967	+
		2968	+// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
		2969	+// from m->last_object_description and the offset from m->file->getLastOffset().
		2970	+QPDFExc
		2971	+QPDF::damagedPDF(std::string const& message)
		2972	+{
		2973	+ return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
		2974	+}
		2975	+
		2976	+bool
		2977	+QPDF::everCalledGetAllPages() const
		2978	+{
		2979	+ return m->ever_called_get_all_pages;
		2980	+}
		2981	+
		2982	+bool
		2983	+QPDF::everPushedInheritedAttributesToPages() const
		2984	+{
		2985	+ return m->ever_pushed_inherited_attributes_to_pages;
		2986	+}
		2987	+
		2988	+void
		2989	+QPDF::removeSecurityRestrictions()
		2990	+{
		2991	+ auto root = getRoot();
		2992	+ root.removeKey("/Perms");
		2993	+ auto acroform = root.getKey("/AcroForm");
		2994	+ if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
		2995	+ acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
		2996	+ }
		2997	+}