diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index cecb242..89fdf81 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -787,18 +787,6 @@ class QPDF bool is_root_metadata, std::unique_ptr& heap); - struct HPageOffsetEntry; - struct HPageOffset; - struct HSharedObjectEntry; - struct HSharedObject; - struct HGeneric; - struct LinParameters; - struct CHPageOffsetEntry; - struct CHPageOffset; - struct CHSharedObjectEntry; - struct CHSharedObject; - class ObjUser; - struct UpdateObjectMapsFrame; class PatternFinder; // Methods to support pattern finding diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 62992cd..45efafe 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -460,7 +460,6 @@ class QPDFJob bool main_input); // Transformations - void setQPDFOptions(QPDF& pdf); void handlePageSpecs(QPDF& pdf); bool shouldRemoveUnreferencedResources(QPDF& pdf); void handleRotations(QPDF& pdf); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index f0d3882..3a96465 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -27,9 +27,9 @@ using namespace qpdf; using namespace std::literals; -using Doc = QPDF::Doc; -using Common = Doc::Common; -using Objects = Doc::Objects; +using QDoc = QPDF::Doc; +using Common = QDoc::Common; +using Objects = QDoc::Objects; using Foreign = Objects::Foreign; using Streams = Objects::Streams; @@ -133,7 +133,6 @@ QPDF::Members::Members(QPDF& qpdf) : lin(*this), objects(*this), pages(*this), - log(QPDFLogger::defaultLogger()), file(std::make_shared()), encp(std::make_shared()) { @@ -232,7 +231,7 @@ QPDF::closeInputSource() void QPDF::setPasswordIsHexKey(bool val) { - m->provided_password_is_hex_key = val; + m->cf.password_is_hex_key(val); } void @@ -251,56 +250,56 @@ QPDF::registerStreamFilter( void QPDF::setIgnoreXRefStreams(bool val) { - m->ignore_xref_streams = val; + (void)m->cf.ignore_xref_streams(val); } std::shared_ptr QPDF::getLogger() { - return m->log; + return m->cf.log(); } void QPDF::setLogger(std::shared_ptr l) { - m->log = l; + m->cf.log(l); } void QPDF::setOutputStreams(std::ostream* out, std::ostream* err) { setLogger(QPDFLogger::create()); - m->log->setOutputStreams(out, err); + m->cf.log()->setOutputStreams(out, err); } void QPDF::setSuppressWarnings(bool val) { - m->suppress_warnings = val; + (void)m->cf.suppress_warnings(val); } void QPDF::setMaxWarnings(size_t val) { - m->max_warnings = val; + (void)m->cf.max_warnings(val); } void QPDF::setAttemptRecovery(bool val) { - m->attempt_recovery = val; + (void)m->cf.surpress_recovery(!val); } void QPDF::setImmediateCopyFrom(bool val) { - m->immediate_copy_from = val; + (void)m->cf.immediate_copy_from(val); } std::vector QPDF::getWarnings() { - std::vector result = m->warnings; + std::vector result = std::move(m->warnings); m->warnings.clear(); return result; } @@ -372,12 +371,12 @@ QPDF::warn(QPDFExc const& e) void Common::warn(QPDFExc const& e) { - if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { + if (cf.max_warnings() > 0 && m->warnings.size() >= cf.max_warnings()) { stopOnError("Too many warnings - file is too badly damaged"); } - m->warnings.push_back(e); - if (!m->suppress_warnings) { - *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; + m->warnings.emplace_back(e); + if (!cf.suppress_warnings()) { + *cf.log()->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; } } @@ -715,7 +714,7 @@ QPDF::getRoot() } else if ( // Check_mode is an interim solution to request #810 pending a more comprehensive review of // the approach to more extensive checks and warning levels. - m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { + m->cf.check_mode() && !root.getKey("/Type").isNameAndEquals("/Catalog")) { warn(m->c.damagedPDF("", -1, "catalog /Type entry missing or invalid")); root.replaceKey("/Type", "/Catalog"_qpdf); } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 287bfee..d920ab0 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -30,20 +30,8 @@ using namespace qpdf; -using Doc = QPDF::Doc; -using Pages = Doc::Pages; - -// JobSetter class is restricted to QPDFJob. -class Doc::JobSetter -{ - public: - // Enable enhanced warnings for pdf file checking. - static void - setCheckMode(QPDF& qpdf, bool val) - { - qpdf.m->check_mode = val; - } -}; +using QDoc = QPDF::Doc; +using Pages = QDoc::Pages; namespace { @@ -489,7 +477,7 @@ QPDFJob::writeQPDF(QPDF& pdf) if (!pdf.getWarnings().empty()) { m->warnings = true; } - if (m->warnings && (!m->suppress_warnings)) { + if (m->warnings && !m->qcf.suppress_warnings()) { if (createsOutput()) { *m->log->getWarn() << m->message_prefix @@ -647,24 +635,6 @@ QPDFJob::getEncryptionStatus() return m->encryption_status; } -void -QPDFJob::setQPDFOptions(QPDF& pdf) -{ - pdf.setLogger(m->log); - if (m->ignore_xref_streams) { - pdf.setIgnoreXRefStreams(true); - } - if (m->suppress_recovery) { - pdf.setAttemptRecovery(false); - } - if (m->password_is_hex_key) { - pdf.setPasswordIsHexKey(true); - } - if (m->suppress_warnings) { - pdf.setSuppressWarnings(true); - } -} - static std::string show_bool(bool v) { @@ -749,7 +719,6 @@ QPDFJob::doCheck(QPDF& pdf) bool okay = true; auto& cout = *m->log->getInfo(); cout << "checking " << m->infile_name() << "\n"; - Doc::JobSetter::setCheckMode(pdf, true); try { int extension_level = pdf.getExtensionLevel(); cout << "PDF Version: " << pdf.getPDFVersion(); @@ -1740,7 +1709,7 @@ QPDFJob::doProcessOnce( bool main_input) { pdf = std::make_unique(); - setQPDFOptions(*pdf); + pdf->doc().config(m->qcf.log(m->log)); if (empty) { pdf->emptyPDF(); } else if (main_input && m->json_input) { @@ -1770,16 +1739,15 @@ QPDFJob::doProcess( // was incorrectly encoded, there's a good chance we'd succeed here. std::string ptemp; - if (password && (!m->password_is_hex_key)) { + if (password && !m->qcf.password_is_hex_key()) { if (m->password_mode == QPDFJob::pm_hex_bytes) { // Special case: handle --password-mode=hex-bytes for input password as well as output // password - QTC::TC("qpdf", "QPDFJob input password hex-bytes"); ptemp = QUtil::hex_decode(password); password = ptemp.c_str(); } } - if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { + if (!password || empty || m->qcf.password_is_hex_key() || m->suppress_password_recovery) { // There is no password, or we're not doing recovery, so just do the normal processing with // the supplied password. doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); @@ -3046,12 +3014,10 @@ QPDFJob::doSplitPages(QPDF& pdf) last = num_pages; } QPDF outpdf; + outpdf.doc().config(m->qcf); outpdf.emptyPDF(); QPDFAcroFormDocumentHelper* out_afdh = afdh.hasAcroForm() ? &outpdf.doc().acroform() : nullptr; - if (m->suppress_warnings) { - outpdf.setSuppressWarnings(true); - } for (size_t pageno = first; pageno <= last; ++pageno) { QPDFObjectHandle page = pages.at(pageno - 1); outpdf.addPage(page, false); diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 746b5be..de8c0a6 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -68,6 +68,7 @@ QPDFJob::Config* QPDFJob::Config::check() { o.m->check = true; + o.m->qcf.check_mode(true); o.m->require_outfile = false; return this; } @@ -233,7 +234,7 @@ QPDFJob::Config::generateAppearances() QPDFJob::Config* QPDFJob::Config::ignoreXrefStreams() { - o.m->ignore_xref_streams = true; + o.m->qcf.ignore_xref_streams(true); return this; } @@ -415,7 +416,7 @@ QPDFJob::Config::noOriginalObjectIds() QPDFJob::Config* QPDFJob::Config::noWarn() { - o.m->suppress_warnings = true; + o.m->qcf.suppress_warnings(true); return this; } @@ -465,7 +466,7 @@ QPDFJob::Config::password(std::string const& parameter) QPDFJob::Config* QPDFJob::Config::passwordIsHexKey() { - o.m->password_is_hex_key = true; + o.m->qcf.password_is_hex_key(true); return this; } @@ -662,7 +663,7 @@ QPDFJob::Config::suppressPasswordRecovery() QPDFJob::Config* QPDFJob::Config::suppressRecovery() { - o.m->suppress_recovery = true; + o.m->qcf.surpress_recovery(true); return this; } diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 3977bb4..8aa53ac 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -27,8 +27,8 @@ using namespace std::literals; using namespace qpdf; -using Doc = QPDF::Doc; -using Encryption = Doc::Encryption; +using QDoc = QPDF::Doc; +using Encryption = QDoc::Encryption; QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) { @@ -263,7 +263,7 @@ Pl_stack::Popper::pop() } // Writer class is restricted to QPDFWriter so that only it can call certain methods. -class Doc::Writer: Doc::Common +class QPDF::Doc::Writer: QPDF::Doc::Common { friend class QPDFWriter; Writer(QPDF& qpdf) : diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 153852d..0bd46c0 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -27,12 +27,6 @@ using namespace qpdf; using Streams = QPDF::Doc::Objects::Streams; -bool -Streams::immediate_copy_from() const -{ - return qpdf.m->immediate_copy_from; -} - class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider { class Data @@ -308,14 +302,13 @@ Stream::copy_data_to(Stream& dest) { qpdf_expect(dest); auto s = stream(); - auto& streams = qpdf()->doc().objects().streams(); auto& d_streams = dest.qpdf()->doc().objects().streams(); auto dict = dest.getDict(); // Copy information from the foreign stream so we can pipe its data later without keeping the // original QPDF object around. - if (streams.immediate_copy_from() && !s->stream_data) { + if (qpdf()->doc().config().immediate_copy_from() && !s->stream_data) { // Pull the stream data into a buffer before attempting the copy operation. Do it on the // source stream so that if the source stream is copied multiple times, we don't have to // keep duplicating the memory. diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 91b3819..5a0f2c0 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -904,7 +904,7 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf) } Encryption data(V, R, Length / 8, p, O, U, OE, UE, Perms, id1, encrypt_metadata); - if (qm.provided_password_is_hex_key) { + if (qm.cf.password_is_hex_key()) { // ignore passwords in file encryption_key = QUtil::hex_decode(provided_password); return; diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 9313d43..dfeb2dc 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -69,20 +69,20 @@ load_vector_vector( bit_stream.skipToNextByte(); } -QPDF::ObjUser::ObjUser(user_e type) : +Lin::ObjUser::ObjUser(user_e type) : ou_type(type) { qpdf_expect(type == ou_root); } -QPDF::ObjUser::ObjUser(user_e type, size_t pageno) : +Lin::ObjUser::ObjUser(user_e type, size_t pageno) : ou_type(type), pageno(pageno) { qpdf_expect(type == ou_page || type == ou_thumb); } -QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : +Lin::ObjUser::ObjUser(user_e type, std::string const& key) : ou_type(type), key(key) { @@ -90,7 +90,7 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : } bool -QPDF::ObjUser::operator<(ObjUser const& rhs) const +Lin::ObjUser::operator<(ObjUser const& rhs) const { if (ou_type < rhs.ou_type) { return true; @@ -106,8 +106,8 @@ QPDF::ObjUser::operator<(ObjUser const& rhs) const return false; } -QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame( - QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) : +Lin::UpdateObjectMapsFrame::UpdateObjectMapsFrame( + ObjUser const& ou, QPDFObjectHandle oh, bool top) : ou(ou), oh(oh), top(top) @@ -137,7 +137,7 @@ Lin::optimize_internal( bool allow_changes, std::function skip_stream_parameters) { - if (!m->obj_user_to_objects.empty()) { + if (!obj_user_to_objects_.empty()) { // already optimized return; } @@ -186,9 +186,9 @@ Lin::optimize_internal( } ObjUser root_ou = ObjUser(ObjUser::ou_root); - auto root_og = QPDFObjGen(root.getObjGen()); - m->obj_user_to_objects[root_ou].insert(root_og); - m->object_to_obj_users[root_og].insert(root_ou); + auto root_og = root.id_gen(); + obj_user_to_objects_[root_ou].insert(root_og); + object_to_obj_users_[root_og].insert(root_ou); filterCompressedObjects(object_stream_data); } @@ -217,14 +217,14 @@ Lin::updateObjectMaps( } } - if (cur.oh.isIndirect()) { + if (cur.oh.indirect()) { QPDFObjGen og(cur.oh.getObjGen()); if (!visited.add(og)) { QTC::TC("qpdf", "QPDF opt loop detected"); continue; } - m->obj_user_to_objects[cur.ou].insert(og); - m->object_to_obj_users[og].insert(cur.ou); + obj_user_to_objects_[cur.ou].insert(og); + object_to_obj_users_[og].insert(cur.ou); } if (cur.oh.isArray()) { @@ -280,34 +280,30 @@ Lin::filterCompressedObjects(std::map const& object_stream_data) std::map> t_obj_user_to_objects; std::map> t_object_to_obj_users; - for (auto const& i1: m->obj_user_to_objects) { - ObjUser const& ou = i1.first; - // Loop over objects. - for (auto const& og: i1.second) { + for (auto const& [ou, ogs]: obj_user_to_objects_) { + for (auto const& og: ogs) { auto i2 = object_stream_data.find(og.getObj()); if (i2 == object_stream_data.end()) { t_obj_user_to_objects[ou].insert(og); } else { - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0)); + t_obj_user_to_objects[ou].insert({i2->second, 0}); } } } - for (auto const& i1: m->object_to_obj_users) { - QPDFObjGen const& og = i1.first; - // Loop over obj_users. - for (auto const& ou: i1.second) { + for (auto const& [og, ous]: object_to_obj_users_) { + for (auto const& ou: ous) { auto i2 = object_stream_data.find(og.getObj()); if (i2 == object_stream_data.end()) { t_object_to_obj_users[og].insert(ou); } else { - t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou); + t_object_to_obj_users[{i2->second, 0}].insert(ou); } } } - m->obj_user_to_objects = t_obj_user_to_objects; - m->object_to_obj_users = t_object_to_obj_users; + obj_user_to_objects_ = std::move(t_obj_user_to_objects); + object_to_obj_users_ = std::move(t_object_to_obj_users); } void @@ -324,10 +320,8 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) std::map> t_obj_user_to_objects; std::map> t_object_to_obj_users; - for (auto const& i1: m->obj_user_to_objects) { - ObjUser const& ou = i1.first; - // Loop over objects. - for (auto const& og: i1.second) { + for (auto const& [ou, ogs]: obj_user_to_objects_) { + for (auto const& og: ogs) { if (obj.contains(og)) { if (auto const& i2 = obj[og].object_stream; i2 <= 0) { t_obj_user_to_objects[ou].insert(og); @@ -338,40 +332,45 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) } } - for (auto const& i1: m->object_to_obj_users) { - QPDFObjGen const& og = i1.first; + for (auto const& [og, ous]: object_to_obj_users_) { if (obj.contains(og)) { // Loop over obj_users. - for (auto const& ou: i1.second) { + for (auto const& ou: ous) { if (auto i2 = obj[og].object_stream; i2 <= 0) { t_object_to_obj_users[og].insert(ou); } else { - t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); + t_object_to_obj_users[{i2, 0}].insert(ou); } } } } - m->obj_user_to_objects = t_obj_user_to_objects; - m->object_to_obj_users = t_object_to_obj_users; + obj_user_to_objects_ = std::move(t_obj_user_to_objects); + object_to_obj_users_ = std::move(t_object_to_obj_users); } void Lin::linearizationWarning(std::string_view msg) { - m->linearization_warnings = true; + linearization_warnings_ = true; warn(qpdf_e_linearization, "", 0, std::string(msg)); } bool QPDF::checkLinearization() { + return m->lin.check(); +} + +bool +Lin::check() +{ try { - m->lin.readLinearizationData(); - m->lin.checkLinearizationInternal(); - return !m->linearization_warnings; + readLinearizationData(); + checkLinearizationInternal(); + return !linearization_warnings_; } catch (std::runtime_error& e) { - m->lin.linearizationWarning( + linearizationWarning( "error encountered while checking linearization data: " + std::string(e.what())); return false; } @@ -380,6 +379,12 @@ QPDF::checkLinearization() bool QPDF::isLinearized() { + return m->lin.linearized(); +} + +bool +Lin::linearized() +{ // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L // key that accurately indicates the file size, initialize m->lindict and return true. @@ -411,7 +416,7 @@ QPDF::isLinearized() continue; } - Dictionary candidate = getObject(toI(QUtil::string_to_ll(t1.getValue().data())), 0); + Dictionary candidate = qpdf.getObject(toI(QUtil::string_to_ll(t1.getValue().data())), 0); auto linkey = candidate["/Linearized"]; if (!(linkey.isNumber() && toI(floor(linkey.getNumericValue())) == 1)) { return false; @@ -422,8 +427,8 @@ QPDF::isLinearized() if (L != m->file->tell()) { return false; } - m->linp.file_size = L; - m->lindict = candidate; + linp_.file_size = L; + lindict_ = candidate; return true; } } @@ -432,24 +437,24 @@ void Lin::readLinearizationData() { util::assertion( - qpdf.isLinearized(), "called readLinearizationData for file that is not linearized" // + linearized(), "called readLinearizationData for file that is not linearized" // ); // This function throws an exception (which is trapped by checkLinearization()) for any errors // that prevent loading. // /L is read and stored in linp by isLinearized() - Array H = m->lindict["/H"]; // hint table offset/length for primary and overflow hint tables + Array H = lindict_["/H"]; // hint table offset/length for primary and overflow hint tables auto H_size = H.size(); Integer H_0 = H[0]; // hint table offset Integer H_1 = H[1]; // hint table length Integer H_2 = H[2]; // hint table offset for overflow hint table Integer H_3 = H[3]; // hint table length for overflow hint table - Integer O = m->lindict["/O"]; - Integer E = m->lindict["/E"]; - Integer N = m->lindict["/N"]; - Integer T = m->lindict["/T"]; - auto P_oh = m->lindict["/P"]; + Integer O = lindict_["/O"]; + Integer E = lindict_["/E"]; + Integer N = lindict_["/N"]; + Integer T = lindict_["/T"]; + auto P_oh = lindict_["/P"]; Integer P = P_oh; // first page number QTC::TC("qpdf", "QPDF P absent in lindict", P ? 0 : 1); @@ -482,13 +487,13 @@ Lin::readLinearizationData() ); // file_size initialized by isLinearized() - m->linp.first_page_object = O; - m->linp.first_page_end = E; - m->linp.npages = N; - m->linp.xref_zero_offset = T; - m->linp.first_page = P ? P : 0; - m->linp.H_offset = H_0; - m->linp.H_length = H_1; + linp_.first_page_object = O; + linp_.first_page_end = E; + linp_.npages = N; + linp_.xref_zero_offset = T; + linp_.first_page = P ? P : 0; + linp_.H_offset = H_0; + linp_.H_length = H_1; // Read hint streams @@ -532,7 +537,7 @@ Lin::readLinearizationData() "linearization dictionary" // ); size_t HOi = HO; - readHGeneric(BitStream(h_buf + HO, h_size - HOi), m->outline_hints); + readHGeneric(BitStream(h_buf + HO, h_size - HOi), outline_hints_); } } @@ -576,7 +581,7 @@ Lin::readHPageOffset(BitStream h) { // All comments referring to the PDF spec refer to the spec for version 1.4. - HPageOffset& t = m->page_offset_hints; + HPageOffset& t = page_offset_hints_; t.min_nobjects = h.getBitsInt(32); // 1 t.first_page_offset = h.getBitsInt(32); // 2 @@ -594,7 +599,7 @@ Lin::readHPageOffset(BitStream h) std::vector& entries = t.entries; entries.clear(); - int nitems = toI(m->linp.npages); + int nitems = toI(linp_.npages); load_vector_int(h, nitems, entries, t.nbits_delta_nobjects, &HPageOffsetEntry::delta_nobjects); load_vector_int( h, nitems, entries, t.nbits_delta_page_length, &HPageOffsetEntry::delta_page_length); @@ -623,7 +628,7 @@ Lin::readHPageOffset(BitStream h) void Lin::readHSharedObject(BitStream h) { - HSharedObject& t = m->shared_object_hints; + HSharedObject& t = shared_object_hints_; t.first_shared_obj = h.getBitsInt(32); // 1 t.first_shared_offset = h.getBitsInt(32); // 2 @@ -672,7 +677,7 @@ Lin::checkLinearizationInternal() // Check all values in linearization parameter dictionary - LinParameters& p = m->linp; + LinParameters& p = linp_; // L: file size in bytes -- checked by isLinearized @@ -708,10 +713,10 @@ Lin::checkLinearizationInternal() break; } } - if (m->file->tell() != m->first_xref_item_offset) { + if (m->file->tell() != objects.first_xref_item_offset()) { linearizationWarning( "space before first xref item (/T) mismatch (computed = " + - std::to_string(m->first_xref_item_offset) + + std::to_string(objects.first_xref_item_offset()) + "; file = " + std::to_string(m->file->tell())); } @@ -722,7 +727,7 @@ Lin::checkLinearizationInternal() // compressed objects are supposed to be at the end of the containing xref section if any object // streams are in use. - if (m->uncompressed_after_compressed) { + if (objects.uncompressed_after_compressed()) { linearizationWarning( "linearized file contains an uncompressed object after a compressed " "one in a cross-reference stream"); @@ -751,11 +756,11 @@ Lin::checkLinearizationInternal() // suite doesn't contain any files with threads. no_ci_stop_if( - m->part6.empty(), "linearization part 6 unexpectedly empty" // + part6_.empty(), "linearization part 6 unexpectedly empty" // ); qpdf_offset_t min_E = -1; qpdf_offset_t max_E = -1; - for (auto const& oh: m->part6) { + for (auto const& oh: part6_) { QPDFObjGen og(oh.getObjGen()); // All objects have to have been dereferenced to be classified. util::assertion(m->obj_cache.contains(og), "linearization part6 object not in cache"); @@ -781,12 +786,12 @@ qpdf_offset_t Lin::maxEnd(ObjUser const& ou) { no_ci_stop_if( - !m->obj_user_to_objects.contains(ou), + !obj_user_to_objects_.contains(ou), "no entry in object user table for requested object user" // ); qpdf_offset_t end = 0; - for (auto const& og: m->obj_user_to_objects[ou]) { + for (auto const& og: obj_user_to_objects_[ou]) { no_ci_stop_if( !m->obj_cache.contains(og), "unknown object referenced in object user table" // ); @@ -868,7 +873,7 @@ Lin::checkHPageOffset( // dictionary in with shared objects even when they are private. size_t npages = pages.size(); - qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); + qpdf_offset_t table_offset = adjusted_offset(page_offset_hints_.first_page_offset); QPDFObjGen first_page_og(pages.at(0).getObjGen()); if (!m->xref_table.contains(first_page_og)) { stopOnError("supposed first page object is not known"); @@ -886,9 +891,9 @@ Lin::checkHPageOffset( } offset = getLinearizationOffset(page_og); - HPageOffsetEntry& he = m->page_offset_hints.entries.at(toS(pageno)); - CHPageOffsetEntry& ce = m->c_page_offset_data.entries.at(toS(pageno)); - int h_nobjects = he.delta_nobjects + m->page_offset_hints.min_nobjects; + HPageOffsetEntry& he = page_offset_hints_.entries.at(pageno); + CHPageOffsetEntry& ce = c_page_offset_data_.entries.at(pageno); + int h_nobjects = he.delta_nobjects + page_offset_hints_.min_nobjects; if (h_nobjects != ce.nobjects) { // This happens with pdlin when there are thumbnails. linearizationWarning( @@ -899,7 +904,7 @@ Lin::checkHPageOffset( // Use value for number of objects in hint table rather than computed value if there is a // discrepancy. int length = lengthNextN(first_object, h_nobjects); - int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); + int h_length = toI(he.delta_page_length + page_offset_hints_.min_page_length); if (length != h_length) { // This condition almost certainly indicates a bad hint table or a bug in this code. linearizationWarning( @@ -932,11 +937,11 @@ Lin::checkHPageOffset( for (size_t i = 0; i < toS(ce.nshared_objects); ++i) { int idx = ce.shared_identifiers.at(i); no_ci_stop_if( - idx >= m->c_shared_object_data.nshared_total, + idx >= c_shared_object_data_.nshared_total, "index out of bounds for shared object hint table" // ); - int obj = m->c_shared_object_data.entries.at(toS(idx)).object; + int obj = c_shared_object_data_.entries.at(toS(idx)).object; computed_shared.insert(obj); } @@ -978,7 +983,7 @@ Lin::checkHSharedObject(std::vector const& pages, std::mapshared_object_hints; + HSharedObject& so = shared_object_hints_; if (so.nshared_total < so.nshared_first_page) { linearizationWarning("shared object hint table: ntotal < nfirst_page"); } else { @@ -988,10 +993,10 @@ Lin::checkHSharedObject(std::vector const& pages, std::mappart8.empty()) { + if (part8_.empty()) { linearizationWarning("part 8 is empty but nshared_total > nshared_first_page"); } else { - int obj = m->part8.at(0).getObjectID(); + int obj = part8_.at(0).getObjectID(); if (obj != so.first_shared_obj) { linearizationWarning( "first shared object number mismatch: hint table = " + @@ -1039,12 +1044,12 @@ Lin::checkHOutlines() // correct number of objects from the wrong starting place). pdlin appears to generate correct // values in those cases. - if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { - if (m->c_outline_data.nobjects == 0) { + if (c_outline_data_.nobjects == outline_hints_.nobjects) { + if (c_outline_data_.nobjects == 0) { return; } - if (m->c_outline_data.first_object == m->outline_hints.first_object) { + if (c_outline_data_.first_object == outline_hints_.first_object) { // Check length and offset. Acrobat gets these wrong. QPDFObjectHandle outlines = qpdf.getRoot().getKey("/Outlines"); if (!outlines.isIndirect()) { @@ -1060,13 +1065,13 @@ Lin::checkHOutlines() qpdf_offset_t offset = getLinearizationOffset(og); ObjUser ou(ObjUser::ou_root_key, "/Outlines"); int length = toI(maxEnd(ou) - offset); - qpdf_offset_t table_offset = adjusted_offset(m->outline_hints.first_object_offset); + qpdf_offset_t table_offset = adjusted_offset(outline_hints_.first_object_offset); if (offset != table_offset) { linearizationWarning( "incorrect offset in outlines table: hint table = " + std::to_string(table_offset) + "; computed = " + std::to_string(offset)); } - int table_length = m->outline_hints.group_length; + int table_length = outline_hints_.group_length; if (length != table_length) { linearizationWarning( "incorrect length in outlines table: hint table = " + @@ -1083,38 +1088,46 @@ Lin::checkHOutlines() void QPDF::showLinearizationData() { + m->lin.show_data(); +} + +void +Lin::show_data() +{ try { - m->lin.readLinearizationData(); - m->lin.checkLinearizationInternal(); - m->lin.dumpLinearizationDataInternal(); + readLinearizationData(); + checkLinearizationInternal(); + dumpLinearizationDataInternal(); } catch (QPDFExc& e) { - m->lin.linearizationWarning(e.what()); + linearizationWarning(e.what()); } } void Lin::dumpLinearizationDataInternal() { - *m->log->getInfo() << m->file->getName() << ": linearization data:\n\n"; - - *m->log->getInfo() << "file_size: " << m->linp.file_size << "\n" - << "first_page_object: " << m->linp.first_page_object << "\n" - << "first_page_end: " << m->linp.first_page_end << "\n" - << "npages: " << m->linp.npages << "\n" - << "xref_zero_offset: " << m->linp.xref_zero_offset << "\n" - << "first_page: " << m->linp.first_page << "\n" - << "H_offset: " << m->linp.H_offset << "\n" - << "H_length: " << m->linp.H_length << "\n" - << "\n"; - - *m->log->getInfo() << "Page Offsets Hint Table\n\n"; + auto& info = *cf.log()->getInfo(); + + info << m->file->getName() << ": linearization data:\n\n"; + + info << "file_size: " << linp_.file_size << "\n" + << "first_page_object: " << linp_.first_page_object << "\n" + << "first_page_end: " << linp_.first_page_end << "\n" + << "npages: " << linp_.npages << "\n" + << "xref_zero_offset: " << linp_.xref_zero_offset << "\n" + << "first_page: " << linp_.first_page << "\n" + << "H_offset: " << linp_.H_offset << "\n" + << "H_length: " << linp_.H_length << "\n" + << "\n"; + + info << "Page Offsets Hint Table\n\n"; dumpHPageOffset(); - *m->log->getInfo() << "\nShared Objects Hint Table\n\n"; + info << "\nShared Objects Hint Table\n\n"; dumpHSharedObject(); - if (m->outline_hints.nobjects > 0) { - *m->log->getInfo() << "\nOutlines Hint Table\n\n"; - dumpHGeneric(m->outline_hints); + if (outline_hints_.nobjects > 0) { + info << "\nOutlines Hint Table\n\n"; + dumpHGeneric(outline_hints_); } } @@ -1123,8 +1136,8 @@ Lin::adjusted_offset(qpdf_offset_t offset) { // All offsets >= H_offset have to be increased by H_length since all hint table location values // disregard the hint table itself. - if (offset >= m->linp.H_offset) { - return offset + m->linp.H_length; + if (offset >= linp_.H_offset) { + return offset + linp_.H_length; } return offset; } @@ -1132,38 +1145,35 @@ Lin::adjusted_offset(qpdf_offset_t offset) void Lin::dumpHPageOffset() { - HPageOffset& t = m->page_offset_hints; - *m->log->getInfo() << "min_nobjects: " << t.min_nobjects << "\n" - << "first_page_offset: " << adjusted_offset(t.first_page_offset) << "\n" - << "nbits_delta_nobjects: " << t.nbits_delta_nobjects << "\n" - << "min_page_length: " << t.min_page_length << "\n" - << "nbits_delta_page_length: " << t.nbits_delta_page_length << "\n" - << "min_content_offset: " << t.min_content_offset << "\n" - << "nbits_delta_content_offset: " << t.nbits_delta_content_offset << "\n" - << "min_content_length: " << t.min_content_length << "\n" - << "nbits_delta_content_length: " << t.nbits_delta_content_length << "\n" - << "nbits_nshared_objects: " << t.nbits_nshared_objects << "\n" - << "nbits_shared_identifier: " << t.nbits_shared_identifier << "\n" - << "nbits_shared_numerator: " << t.nbits_shared_numerator << "\n" - << "shared_denominator: " << t.shared_denominator << "\n"; - - for (size_t i1 = 0; i1 < m->linp.npages; ++i1) { + auto& info = *cf.log()->getInfo(); + HPageOffset& t = page_offset_hints_; + info << "min_nobjects: " << t.min_nobjects << "\n" + << "first_page_offset: " << adjusted_offset(t.first_page_offset) << "\n" + << "nbits_delta_nobjects: " << t.nbits_delta_nobjects << "\n" + << "min_page_length: " << t.min_page_length << "\n" + << "nbits_delta_page_length: " << t.nbits_delta_page_length << "\n" + << "min_content_offset: " << t.min_content_offset << "\n" + << "nbits_delta_content_offset: " << t.nbits_delta_content_offset << "\n" + << "min_content_length: " << t.min_content_length << "\n" + << "nbits_delta_content_length: " << t.nbits_delta_content_length << "\n" + << "nbits_nshared_objects: " << t.nbits_nshared_objects << "\n" + << "nbits_shared_identifier: " << t.nbits_shared_identifier << "\n" + << "nbits_shared_numerator: " << t.nbits_shared_numerator << "\n" + << "shared_denominator: " << t.shared_denominator << "\n"; + + for (size_t i1 = 0; i1 < linp_.npages; ++i1) { HPageOffsetEntry& pe = t.entries.at(i1); - *m->log->getInfo() << "Page " << i1 << ":\n" - << " nobjects: " << pe.delta_nobjects + t.min_nobjects << "\n" - << " length: " << pe.delta_page_length + t.min_page_length - << "\n" - // content offset is relative to page, not file - << " content_offset: " << pe.delta_content_offset + t.min_content_offset - << "\n" - << " content_length: " << pe.delta_content_length + t.min_content_length - << "\n" - << " nshared_objects: " << pe.nshared_objects << "\n"; + info << "Page " << i1 << ":\n" + << " nobjects: " << pe.delta_nobjects + t.min_nobjects << "\n" + << " length: " << pe.delta_page_length + t.min_page_length + << "\n" + // content offset is relative to page, not file + << " content_offset: " << pe.delta_content_offset + t.min_content_offset << "\n" + << " content_length: " << pe.delta_content_length + t.min_content_length << "\n" + << " nshared_objects: " << pe.nshared_objects << "\n"; for (size_t i2 = 0; i2 < toS(pe.nshared_objects); ++i2) { - *m->log->getInfo() << " identifier " << i2 << ": " << pe.shared_identifiers.at(i2) - << "\n"; - *m->log->getInfo() << " numerator " << i2 << ": " << pe.shared_numerators.at(i2) - << "\n"; + info << " identifier " << i2 << ": " << pe.shared_identifiers.at(i2) << "\n"; + info << " numerator " << i2 << ": " << pe.shared_numerators.at(i2) << "\n"; } } } @@ -1171,27 +1181,27 @@ Lin::dumpHPageOffset() void Lin::dumpHSharedObject() { - HSharedObject& t = m->shared_object_hints; - *m->log->getInfo() << "first_shared_obj: " << t.first_shared_obj << "\n" - << "first_shared_offset: " << adjusted_offset(t.first_shared_offset) << "\n" - << "nshared_first_page: " << t.nshared_first_page << "\n" - << "nshared_total: " << t.nshared_total << "\n" - << "nbits_nobjects: " << t.nbits_nobjects << "\n" - << "min_group_length: " << t.min_group_length << "\n" - << "nbits_delta_group_length: " << t.nbits_delta_group_length << "\n"; + auto& info = *cf.log()->getInfo(); + HSharedObject& t = shared_object_hints_; + info << "first_shared_obj: " << t.first_shared_obj << "\n" + << "first_shared_offset: " << adjusted_offset(t.first_shared_offset) << "\n" + << "nshared_first_page: " << t.nshared_first_page << "\n" + << "nshared_total: " << t.nshared_total << "\n" + << "nbits_nobjects: " << t.nbits_nobjects << "\n" + << "min_group_length: " << t.min_group_length << "\n" + << "nbits_delta_group_length: " << t.nbits_delta_group_length << "\n"; for (size_t i = 0; i < toS(t.nshared_total); ++i) { HSharedObjectEntry& se = t.entries.at(i); - *m->log->getInfo() << "Shared Object " << i << ":\n" - << " group length: " << se.delta_group_length + t.min_group_length - << "\n"; + info << "Shared Object " << i << ":\n" + << " group length: " << se.delta_group_length + t.min_group_length << "\n"; // PDF spec says signature present nobjects_minus_one are always 0, so print them only if // they have a non-zero value. if (se.signature_present) { - *m->log->getInfo() << " signature present\n"; + info << " signature present\n"; } if (se.nobjects_minus_one != 0) { - *m->log->getInfo() << " nobjects: " << se.nobjects_minus_one + 1 << "\n"; + info << " nobjects: " << se.nobjects_minus_one + 1 << "\n"; } } } @@ -1199,10 +1209,11 @@ Lin::dumpHSharedObject() void Lin::dumpHGeneric(HGeneric& t) { - *m->log->getInfo() << "first_object: " << t.first_object << "\n" - << "first_object_offset: " << adjusted_offset(t.first_object_offset) << "\n" - << "nobjects: " << t.nobjects << "\n" - << "group_length: " << t.group_length << "\n"; + *cf.log()->getInfo() << "first_object: " << t.first_object << "\n" + << "first_object_offset: " << adjusted_offset(t.first_object_offset) + << "\n" + << "nobjects: " << t.nobjects << "\n" + << "group_length: " << t.group_length << "\n"; } template @@ -1215,7 +1226,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) // actual offsets and lengths are not computed here, but anything related to object ordering is. util::assertion( - !m->object_to_obj_users.empty(), + !object_to_obj_users_.empty(), "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()" // ); // Note that we can't call optimize here because we don't know whether it should be called @@ -1264,15 +1275,15 @@ Lin::calculateLinearizationData(T const& object_stream_data) // * outlines: part 6 or 9 - m->part4.clear(); - m->part6.clear(); - m->part7.clear(); - m->part8.clear(); - m->part9.clear(); - m->c_linp = LinParameters(); - m->c_page_offset_data = CHPageOffset(); - m->c_shared_object_data = CHSharedObject(); - m->c_outline_data = HGeneric(); + part4_.clear(); + part6_.clear(); + part7_.clear(); + part8_.clear(); + part9_.clear(); + c_linp_ = LinParameters(); + c_page_offset_data_ = CHPageOffset(); + c_shared_object_data_ = CHSharedObject(); + c_outline_data_ = HGeneric(); QPDFObjectHandle root = qpdf.getRoot(); bool outlines_in_first_page = false; @@ -1307,10 +1318,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) std::set lc_outlines; std::set lc_root; - for (auto& oiter: m->object_to_obj_users) { - QPDFObjGen const& og = oiter.first; - std::set& ous = oiter.second; - + for (auto& [og, ous]: object_to_obj_users_) { bool in_open_document = false; bool in_first_page = false; int other_pages = 0; @@ -1409,8 +1417,8 @@ Lin::calculateLinearizationData(T const& object_stream_data) // npages is the size of the existing pages vector, which has been created by traversing the // pages tree, and as such is a reasonable size. - m->c_linp.npages = npages; - m->c_page_offset_data.entries = std::vector(npages); + c_linp_.npages = npages; + c_page_offset_data_.entries = std::vector(npages); // Part 4: open document objects. We don't care about the order. @@ -1418,9 +1426,9 @@ Lin::calculateLinearizationData(T const& object_stream_data) lc_root.size() != 1, "found other than one root while calculating linearization data" // ); - m->part4.emplace_back(qpdf.getObject(*(lc_root.begin()))); + part4_.emplace_back(qpdf.getObject(*(lc_root.begin()))); for (auto const& og: lc_open_document) { - m->part4.emplace_back(qpdf.getObject(og)); + part4_.emplace_back(qpdf.getObject(og)); } // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats @@ -1435,31 +1443,31 @@ Lin::calculateLinearizationData(T const& object_stream_data) no_ci_stop_if( !lc_first_page_private.erase(first_page_og), "unable to linearize first page" // ); - m->c_linp.first_page_object = uc_pages.at(0).getObjectID(); - m->part6.emplace_back(uc_pages.at(0)); + c_linp_.first_page_object = uc_pages.at(0).getObjectID(); + part6_.emplace_back(uc_pages.at(0)); // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard // it except to the extent that it groups private and shared objects contiguously for the sake // of hint tables. for (auto const& og: lc_first_page_private) { - m->part6.emplace_back(qpdf.getObject(og)); + part6_.emplace_back(qpdf.getObject(og)); } for (auto const& og: lc_first_page_shared) { - m->part6.emplace_back(qpdf.getObject(og)); + part6_.emplace_back(qpdf.getObject(og)); } // Place the outline dictionary if it goes in the first page section. if (outlines_in_first_page) { - pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); + pushOutlinesToPart(part6_, lc_outlines, object_stream_data); } // Fill in page offset hint table information for the first page. The PDF spec says that // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but // it fills in garbage values for all the shared object identifiers on the first page. - m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); + c_page_offset_data_.entries.at(0).nobjects = toI(part6_.size()); // Part 7: other pages' private objects @@ -1473,23 +1481,23 @@ Lin::calculateLinearizationData(T const& object_stream_data) "unable to linearize page " + std::to_string(i) // ); - m->part7.emplace_back(uc_pages.at(i)); + part7_.emplace_back(uc_pages.at(i)); // Place all non-shared objects referenced by this page, updating the page object count for // the hint table. - m->c_page_offset_data.entries.at(i).nobjects = 1; + c_page_offset_data_.entries.at(i).nobjects = 1; ObjUser ou(ObjUser::ou_page, i); no_ci_stop_if( - !m->obj_user_to_objects.contains(ou), + !obj_user_to_objects_.contains(ou), "found unreferenced page while calculating linearization data" // ); - for (auto const& og: m->obj_user_to_objects[ou]) { + for (auto const& og: obj_user_to_objects_[ou]) { if (lc_other_page_private.erase(og)) { - m->part7.emplace_back(qpdf.getObject(og)); - ++m->c_page_offset_data.entries.at(i).nobjects; + part7_.emplace_back(qpdf.getObject(og)); + ++c_page_offset_data_.entries.at(i).nobjects; } } } @@ -1504,7 +1512,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) // Order is unimportant. for (auto const& og: lc_other_page_shared) { - m->part8.emplace_back(qpdf.getObject(og)); + part8_.emplace_back(qpdf.getObject(og)); } // Part 9: other objects @@ -1515,14 +1523,13 @@ Lin::calculateLinearizationData(T const& object_stream_data) // we throw all remaining objects in arbitrary order. // Place the pages tree. - std::set pages_ogs = - m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; + auto& pages_ogs = obj_user_to_objects_[{ObjUser::ou_root_key, "/Pages"}]; no_ci_stop_if( pages_ogs.empty(), "found empty pages tree while calculating linearization data" // ); for (auto const& og: pages_ogs) { if (lc_other.erase(og)) { - m->part9.emplace_back(qpdf.getObject(og)); + part9_.emplace_back(qpdf.getObject(og)); } } @@ -1534,17 +1541,16 @@ Lin::calculateLinearizationData(T const& object_stream_data) QPDFObjGen thumb_og(thumb.getObjGen()); // Output the thumbnail itself if (lc_thumbnail_private.erase(thumb_og) && !thumb.null()) { - m->part9.emplace_back(thumb); + part9_.emplace_back(thumb); } else { // No internal error this time...there's nothing to stop this object from having // been referred to somewhere else outside of a page's /Thumb, and if it had been, // there's nothing to prevent it from having been in some set other than // lc_thumbnail_private. } - std::set& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, i)]; - for (auto const& og: ogs) { + for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) { if (lc_thumbnail_private.erase(og)) { - m->part9.emplace_back(qpdf.getObject(og)); + part9_.emplace_back(qpdf.getObject(og)); } } } @@ -1556,24 +1562,24 @@ Lin::calculateLinearizationData(T const& object_stream_data) // Place shared thumbnail objects for (auto const& og: lc_thumbnail_shared) { - m->part9.emplace_back(qpdf.getObject(og)); + part9_.emplace_back(qpdf.getObject(og)); } // Place outlines unless in first page if (!outlines_in_first_page) { - pushOutlinesToPart(m->part9, lc_outlines, object_stream_data); + pushOutlinesToPart(part9_, lc_outlines, object_stream_data); } // Place all remaining objects for (auto const& og: lc_other) { - m->part9.emplace_back(qpdf.getObject(og)); + part9_.emplace_back(qpdf.getObject(og)); } // Make sure we got everything exactly once. size_t num_placed = - m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); - size_t num_wanted = m->object_to_obj_users.size(); + part4_.size() + part6_.size() + part7_.size() + part8_.size() + part9_.size(); + size_t num_wanted = object_to_obj_users_.size(); no_ci_stop_if( // This can happen with damaged files, e.g. if the root is part of the the pages tree. num_placed != num_wanted, @@ -1593,20 +1599,20 @@ Lin::calculateLinearizationData(T const& object_stream_data) // only without regards to generation. std::map obj_to_index; - m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); - m->c_shared_object_data.nshared_total = - m->c_shared_object_data.nshared_first_page + toI(m->part8.size()); + c_shared_object_data_.nshared_first_page = toI(part6_.size()); + c_shared_object_data_.nshared_total = + c_shared_object_data_.nshared_first_page + toI(part8_.size()); - std::vector& shared = m->c_shared_object_data.entries; - for (auto& oh: m->part6) { + std::vector& shared = c_shared_object_data_.entries; + for (auto& oh: part6_) { int obj = oh.getObjectID(); obj_to_index[obj] = toI(shared.size()); shared.emplace_back(obj); } - QTC::TC("qpdf", "QPDF lin part 8 empty", m->part8.empty() ? 1 : 0); - if (!m->part8.empty()) { - m->c_shared_object_data.first_shared_obj = m->part8.at(0).getObjectID(); - for (auto& oh: m->part8) { + QTC::TC("qpdf", "QPDF lin part 8 empty", part8_.empty() ? 1 : 0); + if (!part8_.empty()) { + c_shared_object_data_.first_shared_obj = part8_.at(0).getObjectID(); + for (auto& oh: part8_) { int obj = oh.getObjectID(); obj_to_index[obj] = toI(shared.size()); shared.emplace_back(obj); @@ -1614,22 +1620,22 @@ Lin::calculateLinearizationData(T const& object_stream_data) } no_ci_stop_if( std::cmp_not_equal( - m->c_shared_object_data.nshared_total, m->c_shared_object_data.entries.size()), + c_shared_object_data_.nshared_total, c_shared_object_data_.entries.size()), "shared object hint table has wrong number of entries" // ); // Now compute the list of shared objects for each page after the first page. for (size_t i = 1; i < npages; ++i) { - CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); + CHPageOffsetEntry& pe = c_page_offset_data_.entries.at(i); ObjUser ou(ObjUser::ou_page, i); no_ci_stop_if( - !m->obj_user_to_objects.contains(ou), + !obj_user_to_objects_.contains(ou), "found unreferenced page while calculating linearization data" // ); - for (auto const& og: m->obj_user_to_objects[ou]) { - if ((m->object_to_obj_users[og].size() > 1) && (obj_to_index.contains(og.getObj()))) { + for (auto const& og: obj_user_to_objects_[ou]) { + if (object_to_obj_users_[og].size() > 1 && obj_to_index.contains(og.getObj())) { int idx = obj_to_index[og.getObj()]; ++pe.nshared_objects; pe.shared_identifiers.push_back(idx); @@ -1655,22 +1661,22 @@ Lin::pushOutlinesToPart( QTC::TC( "qpdf", "QPDF lin outlines in part", - &part == &m->part6 ? 0 - : (&part == &m->part9) ? 1 - : 9999); // can't happen + &part == &part6_ ? 0 + : (&part == &part9_) ? 1 + : 9999); // can't happen if (lc_outlines.erase(outlines_og)) { // Make sure outlines is in lc_outlines in case the file is damaged. in which case it may be // included in an earlier part. part.emplace_back(outlines); - m->c_outline_data.first_object = outlines_og.getObj(); - m->c_outline_data.nobjects = 1; + c_outline_data_.first_object = outlines_og.getObj(); + c_outline_data_.nobjects = 1; } for (auto const& og: lc_outlines) { - if (!m->c_outline_data.first_object) { - m->c_outline_data.first_object = og.getObj(); + if (!c_outline_data_.first_object) { + c_outline_data_.first_object = og.getObj(); } part.emplace_back(qpdf.getObject(og)); - ++m->c_outline_data.nobjects; + ++c_outline_data_.nobjects; } } @@ -1684,11 +1690,11 @@ Lin::getLinearizedParts( std::vector& part9) { calculateLinearizationData(obj); - part4 = m->part4; - part6 = m->part6; - part7 = m->part7; - part8 = m->part8; - part9 = m->part9; + part4 = part4_; + part6 = part6_; + part7 = part7_; + part8 = part8_; + part9 = part9_; } static inline int @@ -1728,7 +1734,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob auto const& all_pages = pages.all(); size_t npages = all_pages.size(); - CHPageOffset& cph = m->c_page_offset_data; + CHPageOffset& cph = c_page_offset_data_; std::vector& cphe = cph.entries; // Calculate minimum and maximum values for number of objects per page and page length. @@ -1739,7 +1745,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob int max_length = 0; int max_shared = 0; - HPageOffset& ph = m->page_offset_hints; + HPageOffset& ph = page_offset_hints_; std::vector& phe = ph.entries; // npages is the size of the existing pages array. phe = std::vector(npages); @@ -1774,7 +1780,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob ph.min_page_length = min_length; ph.nbits_delta_page_length = nbits(max_length - min_length); ph.nbits_nshared_objects = nbits(max_shared); - ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); + ph.nbits_shared_identifier = nbits(c_shared_object_data_.nshared_total); ph.shared_denominator = 4; // doesn't matter // It isn't clear how to compute content offset and content length. Since we are not @@ -1806,9 +1812,9 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob void Lin::calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { - CHSharedObject& cso = m->c_shared_object_data; + CHSharedObject& cso = c_shared_object_data_; std::vector& csoe = cso.entries; - HSharedObject& so = m->shared_object_hints; + HSharedObject& so = shared_object_hints_; std::vector& soe = so.entries; soe.clear(); @@ -1851,13 +1857,13 @@ Lin::calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter:: void Lin::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { - HGeneric& cho = m->c_outline_data; + HGeneric& cho = c_outline_data_; if (cho.nobjects == 0) { return; } - HGeneric& ho = m->outline_hints; + HGeneric& ho = outline_hints_; ho.first_object = obj[cho.first_object].renumber; ho.first_object_offset = new_obj[ho.first_object].xref.getOffset(); @@ -1902,7 +1908,7 @@ write_vector_vector( void Lin::writeHPageOffset(BitWriter& w) { - HPageOffset& t = m->page_offset_hints; + HPageOffset& t = page_offset_hints_; w.writeBitsInt(t.min_nobjects, 32); // 1 w.writeBits(toULL(t.first_page_offset), 32); // 2 @@ -1949,7 +1955,7 @@ Lin::writeHPageOffset(BitWriter& w) void Lin::writeHSharedObject(BitWriter& w) { - HSharedObject& t = m->shared_object_hints; + HSharedObject& t = shared_object_hints_; w.writeBitsInt(t.first_shared_obj, 32); // 1 w.writeBits(toULL(t.first_shared_offset), 32); // 2 @@ -2011,9 +2017,9 @@ Lin::generateHintStream( S = toI(c.getCount()); writeHSharedObject(w); O = 0; - if (m->outline_hints.nobjects > 0) { + if (outline_hints_.nobjects > 0) { O = toI(c.getCount()); - writeHGeneric(w, m->outline_hints); + writeHGeneric(w, outline_hints_); } if (compressed) { hint_buffer = pl::pipe(hint_buffer, Pl_Flate::a_deflate); diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index ea3144b..cd8a849 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -157,7 +157,7 @@ Objects::parse(char const* password) throw damagedPDF("", -1, std::string("error reading xref: ") + e.what()); } } catch (QPDFExc& e) { - if (m->attempt_recovery) { + if (!cf.surpress_recovery()) { reconstruct_xref(e, xref_offset > 0); } else { throw; @@ -694,7 +694,7 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) for (qpdf_offset_t i = obj; i - num < obj; ++i) { if (i == 0) { // This is needed by checkLinearization() - m->first_xref_item_offset = m->file->tell(); + first_xref_item_offset_ = m->file->tell(); } // For xref_table, these will always be small enough to be ints qpdf_offset_t f1 = 0; @@ -736,7 +736,7 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) } if (cur_trailer.hasKey("/XRefStm")) { - if (m->ignore_xref_streams) { + if (cf.ignore_xref_streams()) { QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); } else { if (cur_trailer.getKey("/XRefStm").isInteger()) { @@ -763,7 +763,7 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) qpdf_offset_t Objects::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery) { - if (!m->ignore_xref_streams) { + if (!cf.ignore_xref_streams()) { QPDFObjectHandle xref_obj; try { m->in_read_xref_stream = true; @@ -956,14 +956,14 @@ Objects::processXRefStream( // object record, in which case the generation number appears as the third field. if (saw_first_compressed_object) { if (fields[0] != 2) { - m->uncompressed_after_compressed = true; + uncompressed_after_compressed_ = true; } } else if (fields[0] == 2) { saw_first_compressed_object = true; } if (obj == 0) { // This is needed by checkLinearization() - m->first_xref_item_offset = xref_offset; + first_xref_item_offset_ = xref_offset; } else if (fields[0] == 0) { // Ignore fields[2], which we don't care about in this case. This works around the // issue of some PDF files that put invalid values, like -1, here for deleted @@ -1073,7 +1073,7 @@ Objects::insertFreeXrefEntry(QPDFObjGen og) void QPDF::showXRefTable() { - auto& cout = *m->log->getInfo(); + auto& cout = *m->cf.log()->getInfo(); for (auto const& iter: m->xref_table) { QPDFObjGen const& og = iter.first; QPDFXRefEntry const& entry = iter.second; @@ -1084,15 +1084,15 @@ QPDF::showXRefTable() break; case 2: - *m->log->getInfo() << "compressed; stream = " << entry.getObjStreamNumber() - << ", index = " << entry.getObjStreamIndex(); + *m->cf.log()->getInfo() << "compressed; stream = " << entry.getObjStreamNumber() + << ", index = " << entry.getObjStreamIndex(); break; default: throw std::logic_error("unknown cross-reference table type while showing xref_table"); break; } - m->log->info("\n"); + m->cf.log()->info("\n"); } } @@ -1248,7 +1248,7 @@ Objects::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offse throw damagedPDF("expected endstream"); } } catch (QPDFExc& e) { - if (m->attempt_recovery) { + if (!cf.surpress_recovery()) { warn(e); length = recoverStreamLength(m->file, og, stream_offset); } else { @@ -1431,7 +1431,7 @@ Objects::readObjectAtOffset( QPDFObjGen og; setLastObjectDescription(description, exp_og); - if (!m->attempt_recovery) { + if (cf.surpress_recovery()) { try_recovery = false; } diff --git a/libqpdf/qpdf/QPDFJob_private.hh b/libqpdf/qpdf/QPDFJob_private.hh index 5f80fd6..8fe9be0 100644 --- a/libqpdf/qpdf/QPDFJob_private.hh +++ b/libqpdf/qpdf/QPDFJob_private.hh @@ -5,6 +5,7 @@ #include #include +#include // A selection of pages from a single input PDF to be included in the output. This corresponds to a // single clause in the --pages option. @@ -149,7 +150,7 @@ class QPDFJob::Members public: Members(QPDFJob& job) : - log(QPDFLogger::defaultLogger()), + log(qcf.log()), inputs(job) { } @@ -167,6 +168,7 @@ class QPDFJob::Members static int constexpr DEFAULT_OI_MIN_AREA = 16384; static int constexpr DEFAULT_II_MIN_BYTES = 1024; + qpdf::Doc::Config qcf; std::shared_ptr log; std::string message_prefix{"qpdf"}; bool warnings{false}; @@ -179,11 +181,9 @@ class QPDFJob::Members int split_pages{0}; bool progress{false}; std::function progress_handler{nullptr}; - bool suppress_warnings{false}; bool warnings_exit_zero{false}; bool copy_encryption{false}; bool encrypt{false}; - bool password_is_hex_key{false}; bool suppress_password_recovery{false}; password_mode_e password_mode{pm_auto}; bool allow_insecure{false}; @@ -218,10 +218,8 @@ class QPDFJob::Members bool decode_level_set{false}; bool normalize_set{false}; bool normalize{false}; - bool suppress_recovery{false}; bool object_stream_set{false}; qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; - bool ignore_xref_streams{false}; bool qdf_mode{false}; bool preserve_unreferenced_objects{false}; remove_unref_e remove_unreferenced_page_resources{re_auto}; diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index ed426aa..79efe70 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,135 @@ namespace qpdf { class OffsetBuffer; } // namespace is + + class Doc: public QPDF + { + public: + class Config + { + public: + Config() : + log_(QPDFLogger::defaultLogger()) + { + } + + bool + password_is_hex_key() const + { + return password_is_hex_key_; + } + + Config& + password_is_hex_key(bool val) + { + password_is_hex_key_ = val; + return *this; + } + + bool + ignore_xref_streams() const + { + return ignore_xref_streams_; + } + + Config& + ignore_xref_streams(bool val) + { + ignore_xref_streams_ = val; + return *this; + } + + std::shared_ptr + log() const + { + return log_; + } + + Config& + log(std::shared_ptr val) + { + log_ = val; + return *this; + } + + bool + suppress_warnings() const + { + return suppress_warnings_; + } + + Config& + suppress_warnings(bool val) + { + suppress_warnings_ = val; + return *this; + } + + size_t + max_warnings() const + { + return max_warnings_; + } + + Config& + max_warnings(size_t val) + { + max_warnings_ = val; + return *this; + } + + bool + surpress_recovery() const + { + return surpress_recovery_; + } + + Config& + surpress_recovery(bool val) + { + surpress_recovery_ = val; + return *this; + } + + bool + immediate_copy_from() const + { + return immediate_copy_from_; + } + + Config& + immediate_copy_from(bool val) + { + immediate_copy_from_ = val; + return *this; + } + + bool + check_mode() const + { + return check_mode_; + } + + Config& + check_mode(bool val) + { + check_mode_ = val; + return *this; + } + + private: + std::shared_ptr log_; + + size_t max_warnings_{0}; + + bool password_is_hex_key_{false}; + bool ignore_xref_streams_{false}; + bool suppress_warnings_{false}; + bool surpress_recovery_{false}; + bool check_mode_{false}; + bool immediate_copy_from_{false}; + }; // Class Config + }; // class Doc } // namespace qpdf class BitStream; @@ -106,168 +236,8 @@ class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter QPDF* qpdf; QPDFObjGen og; }; - -// PDF 1.4: Table F.4 -struct QPDF::HPageOffsetEntry -{ - int delta_nobjects{0}; // 1 - qpdf_offset_t delta_page_length{0}; // 2 - // vectors' sizes = nshared_objects - int nshared_objects{0}; // 3 - std::vector shared_identifiers; // 4 - std::vector shared_numerators; // 5 - qpdf_offset_t delta_content_offset{0}; // 6 - qpdf_offset_t delta_content_length{0}; // 7 -}; - -// PDF 1.4: Table F.3 -struct QPDF::HPageOffset -{ - int min_nobjects{0}; // 1 - qpdf_offset_t first_page_offset{0}; // 2 - int nbits_delta_nobjects{0}; // 3 - int min_page_length{0}; // 4 - int nbits_delta_page_length{0}; // 5 - int min_content_offset{0}; // 6 - int nbits_delta_content_offset{0}; // 7 - int min_content_length{0}; // 8 - int nbits_delta_content_length{0}; // 9 - int nbits_nshared_objects{0}; // 10 - int nbits_shared_identifier{0}; // 11 - int nbits_shared_numerator{0}; // 12 - int shared_denominator{0}; // 13 - // vector size is npages - std::vector entries; -}; - -// PDF 1.4: Table F.6 -struct QPDF::HSharedObjectEntry -{ - // Item 3 is a 128-bit signature (unsupported by Acrobat) - int delta_group_length{0}; // 1 - int signature_present{0}; // 2 -- always 0 - int nobjects_minus_one{0}; // 4 -- always 0 -}; - -// PDF 1.4: Table F.5 -struct QPDF::HSharedObject -{ - int first_shared_obj{0}; // 1 - qpdf_offset_t first_shared_offset{0}; // 2 - int nshared_first_page{0}; // 3 - int nshared_total{0}; // 4 - int nbits_nobjects{0}; // 5 - int min_group_length{0}; // 6 - int nbits_delta_group_length{0}; // 7 - // vector size is nshared_total - std::vector entries; -}; - -// PDF 1.4: Table F.9 -struct QPDF::HGeneric -{ - int first_object{0}; // 1 - qpdf_offset_t first_object_offset{0}; // 2 - int nobjects{0}; // 3 - int group_length{0}; // 4 -}; - // Other linearization data structures -// Initialized from Linearization Parameter dictionary -struct QPDF::LinParameters -{ - qpdf_offset_t file_size{0}; // /L - int first_page_object{0}; // /O - qpdf_offset_t first_page_end{0}; // /E - size_t npages{0}; // /N - qpdf_offset_t xref_zero_offset{0}; // /T - int first_page{0}; // /P - qpdf_offset_t H_offset{0}; // offset of primary hint stream - qpdf_offset_t H_length{0}; // length of primary hint stream -}; - -// Computed hint table value data structures. These tables contain the computed values on which -// the hint table values are based. They exclude things like number of bits and store actual -// values instead of mins and deltas. File offsets are also absolute rather than being offset -// by the size of the primary hint table. We populate the hint table structures from these -// during writing and compare the hint table values with these during validation. We ignore -// some values for various reasons described in the code. Those values are omitted from these -// structures. Note also that object numbers are object numbers from the input file, not the -// output file. - -// Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. - -struct QPDF::CHPageOffsetEntry -{ - int nobjects{0}; - int nshared_objects{0}; - // vectors' sizes = nshared_objects - std::vector shared_identifiers; -}; - -struct QPDF::CHPageOffset -{ - // vector size is npages - std::vector entries; -}; - -struct QPDF::CHSharedObjectEntry -{ - CHSharedObjectEntry(int object) : - object(object) - { - } - - int object; -}; - -// PDF 1.4: Table F.5 -struct QPDF::CHSharedObject -{ - int first_shared_obj{0}; - int nshared_first_page{0}; - int nshared_total{0}; - // vector size is nshared_total - std::vector entries; -}; - -// No need for CHGeneric -- HGeneric is fine as is. - -// Data structures to support optimization -- implemented in QPDF_optimization.cc - -class QPDF::ObjUser -{ - public: - enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; - - ObjUser() = delete; - - // type must be ou_root - ObjUser(user_e type); - - // type must be one of ou_page or ou_thumb - ObjUser(user_e type, size_t pageno); - - // type must be one of ou_trailer_key or ou_root_key - ObjUser(user_e type, std::string const& key); - - bool operator<(ObjUser const&) const; - - user_e ou_type; - size_t pageno{0}; // if ou_page; - std::string key; // if ou_trailer_key or ou_root_key -}; - -struct QPDF::UpdateObjectMapsFrame -{ - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top); - - ObjUser const& ou; - QPDFObjectHandle oh; - bool top; -}; - class QPDF::PatternFinder final: public InputSource::Finder { public: @@ -296,7 +266,6 @@ class QPDF::Doc { public: class Encryption; - class JobSetter; class Linearization; class Objects; class Pages; @@ -352,7 +321,9 @@ class QPDF::Doc QPDF& qpdf; QPDF::Members* m; + qpdf::Doc::Config& cf; QPDF::Doc::Pages& pages; + QPDF::Doc::Objects& objects; }; Doc() = delete; @@ -368,6 +339,18 @@ class QPDF::Doc { } + qpdf::Doc::Config& + config() + { + return cf; + } + + void + config(qpdf::Doc::Config val) + { + cf = val; + } + inline Linearization& linearization(); inline Objects& objects(); @@ -421,10 +404,13 @@ class QPDF::Doc return *page_labels_; } - private: + protected: QPDF& qpdf; QPDF::Members* m; + qpdf::Doc::Config cf; + + private: // Document Helpers; std::unique_ptr acroform_; std::unique_ptr embedded_files_; @@ -564,6 +550,10 @@ class QPDF::Doc::Linearization: Common { } + bool linearized(); + bool check(); + void show_data(); + // For QPDFWriter: template @@ -593,6 +583,168 @@ class QPDF::Doc::Linearization: Common int& O, bool compressed); + private: + // Data structures to support optimization -- implemented in QPDF_optimization.cc + + class ObjUser + { + public: + enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; + + ObjUser() = delete; + + // type must be ou_root + ObjUser(user_e type); + + // type must be one of ou_page or ou_thumb + ObjUser(user_e type, size_t pageno); + + // type must be one of ou_trailer_key or ou_root_key + ObjUser(user_e type, std::string const& key); + + bool operator<(ObjUser const&) const; + + user_e ou_type; + size_t pageno{0}; // if ou_page; + std::string key; // if ou_trailer_key or ou_root_key + }; + + struct UpdateObjectMapsFrame + { + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top); + + ObjUser const& ou; + QPDFObjectHandle oh; + bool top; + }; + + // PDF 1.4: Table F.4 + struct HPageOffsetEntry + { + int delta_nobjects{0}; // 1 + qpdf_offset_t delta_page_length{0}; // 2 + // vectors' sizes = nshared_objects + int nshared_objects{0}; // 3 + std::vector shared_identifiers; // 4 + std::vector shared_numerators; // 5 + qpdf_offset_t delta_content_offset{0}; // 6 + qpdf_offset_t delta_content_length{0}; // 7 + }; + + // PDF 1.4: Table F.3 + struct HPageOffset + { + int min_nobjects{0}; // 1 + qpdf_offset_t first_page_offset{0}; // 2 + int nbits_delta_nobjects{0}; // 3 + int min_page_length{0}; // 4 + int nbits_delta_page_length{0}; // 5 + int min_content_offset{0}; // 6 + int nbits_delta_content_offset{0}; // 7 + int min_content_length{0}; // 8 + int nbits_delta_content_length{0}; // 9 + int nbits_nshared_objects{0}; // 10 + int nbits_shared_identifier{0}; // 11 + int nbits_shared_numerator{0}; // 12 + int shared_denominator{0}; // 13 + // vector size is npages + std::vector entries; + }; + + // PDF 1.4: Table F.6 + struct HSharedObjectEntry + { + // Item 3 is a 128-bit signature (unsupported by Acrobat) + int delta_group_length{0}; // 1 + int signature_present{0}; // 2 -- always 0 + int nobjects_minus_one{0}; // 4 -- always 0 + }; + + // PDF 1.4: Table F.5 + struct HSharedObject + { + int first_shared_obj{0}; // 1 + qpdf_offset_t first_shared_offset{0}; // 2 + int nshared_first_page{0}; // 3 + int nshared_total{0}; // 4 + int nbits_nobjects{0}; // 5 + int min_group_length{0}; // 6 + int nbits_delta_group_length{0}; // 7 + // vector size is nshared_total + std::vector entries; + }; + + // PDF 1.4: Table F.9 + struct HGeneric + { + int first_object{0}; // 1 + qpdf_offset_t first_object_offset{0}; // 2 + int nobjects{0}; // 3 + int group_length{0}; // 4 + }; + + // Other linearization data structures + + // Initialized from Linearization Parameter dictionary + struct LinParameters + { + qpdf_offset_t file_size{0}; // /L + int first_page_object{0}; // /O + qpdf_offset_t first_page_end{0}; // /E + size_t npages{0}; // /N + qpdf_offset_t xref_zero_offset{0}; // /T + int first_page{0}; // /P + qpdf_offset_t H_offset{0}; // offset of primary hint stream + qpdf_offset_t H_length{0}; // length of primary hint stream + }; + + // Computed hint table value data structures. These tables contain the computed values on which + // the hint table values are based. They exclude things like number of bits and store actual + // values instead of mins and deltas. File offsets are also absolute rather than being offset + // by the size of the primary hint table. We populate the hint table structures from these + // during writing and compare the hint table values with these during validation. We ignore + // some values for various reasons described in the code. Those values are omitted from these + // structures. Note also that object numbers are object numbers from the input file, not the + // output file. + + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. + + struct CHPageOffsetEntry + { + int nobjects{0}; + int nshared_objects{0}; + // vectors' sizes = nshared_objects + std::vector shared_identifiers; + }; + + struct CHPageOffset + { + // vector size is npages + std::vector entries; + }; + + struct CHSharedObjectEntry + { + CHSharedObjectEntry(int object) : + object(object) + { + } + + int object; + }; + + // PDF 1.4: Table F.5 + struct CHSharedObject + { + int first_shared_obj{0}; + int nshared_first_page{0}; + int nshared_total{0}; + // vector size is nshared_total + std::vector entries; + }; + + // No need for CHGeneric -- HGeneric is fine as is. + // methods to support linearization checking -- implemented in QPDF_linearization.cc void readLinearizationData(); @@ -647,6 +799,36 @@ class QPDF::Doc::Linearization: Common std::function skip_stream_parameters); void filterCompressedObjects(std::map const& object_stream_data); void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); + + // Optimization data + std::map> obj_user_to_objects_; + std::map> object_to_obj_users_; + + // Linearization data + bool linearization_warnings_{false}; // set by linearizationWarning, used by checkLinearization + + // Linearization parameter dictionary and hint table data: may be read from file or computed + // prior to writing a linearized file + QPDFObjectHandle lindict_; + LinParameters linp_; + HPageOffset page_offset_hints_; + HSharedObject shared_object_hints_; + HGeneric outline_hints_; + + // Computed linearization data: used to populate above tables during writing and to compare + // with them during validation. c_ means computed. + LinParameters c_linp_; + CHPageOffset c_page_offset_data_; + CHSharedObject c_shared_object_data_; + HGeneric c_outline_data_; + + // Object ordering data for linearized files: initialized by calculateLinearizationData(). + // Part numbers refer to the PDF 1.4 specification. + std::vector part4_; + std::vector part6_; + std::vector part7_; + std::vector part8_; + std::vector part9_; }; class QPDF::Doc::Objects: Common @@ -743,7 +925,7 @@ class QPDF::Doc::Objects: Common return copier_; } - bool immediate_copy_from() const; + // bool immediate_copy_from() const; private: std::shared_ptr copier_; @@ -776,6 +958,17 @@ class QPDF::Doc::Objects: Common return streams_; } + // actual value from file + qpdf_offset_t + first_xref_item_offset() const + { + return first_xref_item_offset_; + } + bool + uncompressed_after_compressed() const + { + return uncompressed_after_compressed_; + } void parse(char const* password); std::shared_ptr const& resolve(QPDFObjGen og); void inParse(bool); @@ -847,6 +1040,10 @@ class QPDF::Doc::Objects: Common Foreign foreign_; Streams streams_; + + // Linearization data + qpdf_offset_t first_xref_item_offset_{0}; // actual value from file + bool uncompressed_after_compressed_{false}; }; // class QPDF::Doc::Objects // This class is used to represent a PDF Pages tree. @@ -969,18 +1166,11 @@ class QPDF::Members: Doc Doc::Linearization lin; Doc::Objects objects; Doc::Pages pages; - std::shared_ptr log; unsigned long long unique_id{0}; qpdf::Tokenizer tokenizer; std::shared_ptr file; std::string last_object_description; std::shared_ptr last_ostream_description; - bool provided_password_is_hex_key{false}; - bool ignore_xref_streams{false}; - bool suppress_warnings{false}; - size_t max_warnings{0}; - bool attempt_recovery{true}; - bool check_mode{false}; std::shared_ptr encp; std::string pdf_version; std::map xref_table; @@ -995,42 +1185,9 @@ class QPDF::Members: Doc bool reconstructed_xref{false}; bool in_read_xref_stream{false}; bool fixed_dangling_refs{false}; - bool immediate_copy_from{false}; bool in_parse{false}; bool parsed{false}; std::set resolved_object_streams; - - // Linearization data - qpdf_offset_t first_xref_item_offset{0}; // actual value from file - bool uncompressed_after_compressed{false}; - bool linearization_warnings{false}; // set by linearizationWarning, used by checkLinearization - - // Linearization parameter dictionary and hint table data: may be read from file or computed - // prior to writing a linearized file - QPDFObjectHandle lindict; - LinParameters linp; - HPageOffset page_offset_hints; - HSharedObject shared_object_hints; - HGeneric outline_hints; - - // Computed linearization data: used to populate above tables during writing and to compare - // with them during validation. c_ means computed. - LinParameters c_linp; - CHPageOffset c_page_offset_data; - CHSharedObject c_shared_object_data; - HGeneric c_outline_data; - - // Object ordering data for linearized files: initialized by calculateLinearizationData(). - // Part numbers refer to the PDF 1.4 specification. - std::vector part4; - std::vector part6; - std::vector part7; - std::vector part8; - std::vector part9; - - // Optimization data - std::map> obj_user_to_objects; - std::map> object_to_obj_users; }; // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve @@ -1051,7 +1208,9 @@ class QPDF::Doc::Resolver inline QPDF::Doc::Common::Common(QPDF& qpdf, QPDF::Members* m) : qpdf(qpdf), m(m), - pages(m->pages) + cf(m->cf), + pages(m->pages), + objects(m->objects) { } diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index dfbc25c..9d7fee2 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -248,7 +248,6 @@ QPDFJob password not encodable 0 QPDFJob auto-encode password 0 QPDFJob bytes fallback warning 0 QPDFJob invalid utf-8 in auto 0 -QPDFJob input password hex-bytes 0 QPDFFormFieldObjectHelper replaced BMC at EOF 0 QPDFFormFieldObjectHelper fallback Tf 0 QPDFPageObjectHelper copy shared attribute 1 diff --git a/qpdf/qtest/qpdf/catalgg.out b/qpdf/qtest/qpdf/catalgg.out index 58fb244..3c3b806 100644 --- a/qpdf/qtest/qpdf/catalgg.out +++ b/qpdf/qtest/qpdf/catalgg.out @@ -1,5 +1,5 @@ -checking catalgg.pdf WARNING: catalgg.pdf: catalog /Type entry missing or invalid +checking catalgg.pdf PDF Version: 1.3 File is not encrypted File is not linearized