diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 7dfcbeb..cecb242 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -747,20 +747,7 @@ class QPDF class ResolveRecorder; class JSONReactor; - void stopOnError(std::string const& message); - inline void - no_ci_stop_if(bool condition, std::string const& message, std::string const& context = {}); void removeObject(QPDFObjGen og); - static QPDFExc damagedPDF( - InputSource& input, - std::string const& object, - qpdf_offset_t offset, - std::string const& message); - QPDFExc damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message); - QPDFExc damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message); - QPDFExc damagedPDF(std::string const& object, std::string const& message); - QPDFExc damagedPDF(qpdf_offset_t offset, std::string const& message); - QPDFExc damagedPDF(std::string const& message); // Calls finish() on the pipeline when done but does not delete it bool pipeStreamData( diff --git a/include/qpdf/QPDFPageDocumentHelper.hh b/include/qpdf/QPDFPageDocumentHelper.hh index 3714258..2820b93 100644 --- a/include/qpdf/QPDFPageDocumentHelper.hh +++ b/include/qpdf/QPDFPageDocumentHelper.hh @@ -120,13 +120,6 @@ class QPDFPageDocumentHelper: public QPDFDocumentHelper void flattenAnnotations(int required_flags = 0, int forbidden_flags = an_invisible | an_hidden); private: - void flattenAnnotationsForPage( - QPDFPageObjectHelper& page, - QPDFObjectHandle& resources, - QPDFAcroFormDocumentHelper& afdh, - int required_flags, - int forbidden_flags); - class Members; std::shared_ptr m; diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index ade7a2d..d8f15ca 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -76,7 +76,6 @@ set(libqpdf_SOURCES QPDFObjectHelper.cc QPDFOutlineDocumentHelper.cc QPDFOutlineObjectHelper.cc - QPDFPageDocumentHelper.cc QPDFPageLabelDocumentHelper.cc QPDFPageObjectHelper.cc QPDFParser.cc @@ -94,7 +93,6 @@ set(libqpdf_SOURCES QPDF_json.cc QPDF_linearization.cc QPDF_objects.cc - QPDF_optimization.cc QPDF_pages.cc QTC.cc QUtil.cc diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 010e101..f0d3882 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -27,7 +27,9 @@ using namespace qpdf; using namespace std::literals; -using Objects = QPDF::Doc::Objects; +using Doc = QPDF::Doc; +using Common = Doc::Common; +using Objects = Doc::Objects; using Foreign = Objects::Foreign; using Streams = Objects::Streams; @@ -126,10 +128,11 @@ QPDF::QPDFVersion() } QPDF::Members::Members(QPDF& qpdf) : - doc(qpdf, *this), - lin(doc.linearization()), - objects(doc.objects()), - pages(doc.pages()), + Doc(qpdf, this), + c(qpdf, this), + lin(*this), + objects(*this), + pages(*this), log(QPDFLogger::defaultLogger()), file(std::make_shared()), encp(std::make_shared()) @@ -363,6 +366,12 @@ QPDF::findHeader() void QPDF::warn(QPDFExc const& e) { + m->c.warn(e); +} + +void +Common::warn(QPDFExc const& e) +{ if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) { stopOnError("Too many warnings - file is too badly damaged"); } @@ -379,7 +388,17 @@ QPDF::warn( qpdf_offset_t offset, std::string const& message) { - warn(QPDFExc(error_code, getFilename(), object, offset, message)); + m->c.warn(QPDFExc(error_code, getFilename(), object, offset, message)); +} + +void +Common::warn( + qpdf_error_code_e error_code, + std::string const& object, + qpdf_offset_t offset, + std::string const& message) +{ + warn(QPDFExc(error_code, qpdf.getFilename(), object, offset, message)); } QPDFObjectHandle @@ -514,7 +533,7 @@ Objects::Foreign::Copier::copied(QPDFObjectHandle const& foreign) auto og = foreign.getObjGen(); if (!object_map.contains(og)) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( foreign.qpdf()->getFilename() + " object " + og.unparse(' '), foreign.offset(), "unexpected reference to /Pages object while copying foreign object; replacing with " @@ -692,12 +711,12 @@ QPDF::getRoot() { QPDFObjectHandle root = m->trailer.getKey("/Root"); if (!root.isDictionary()) { - throw damagedPDF("", -1, "unable to find /Root dictionary"); + throw m->c.damagedPDF("", -1, "unable to find /Root dictionary"); } else if ( // Check_mode is an interim solution to request #810 pending a more comprehensive review of // the approach to more extensive checks and warning levels. m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { - warn(damagedPDF("", -1, "catalog /Type entry missing or invalid")); + warn(m->c.damagedPDF("", -1, "catalog /Type entry missing or invalid")); root.replaceKey("/Type", "/Catalog"_qpdf); } return root; @@ -743,7 +762,7 @@ QPDF::pipeStreamData( try { auto buf = file->read(length, offset); if (buf.size() != length) { - throw damagedPDF( + throw qpdf_for_warning.m->c.damagedPDF( *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data"); } pipeline->write(buf.data(), length); @@ -759,7 +778,7 @@ QPDF::pipeStreamData( QTC::TC("qpdf", "QPDF decoding error warning"); qpdf_for_warning.warn( // line-break - damagedPDF( + qpdf_for_warning.m->c.damagedPDF( *file, "", file->getLastOffset(), @@ -768,7 +787,7 @@ QPDF::pipeStreamData( if (will_retry) { qpdf_for_warning.warn( // line-break - damagedPDF( + qpdf_for_warning.m->c.damagedPDF( *file, "", file->getLastOffset(), @@ -814,14 +833,14 @@ QPDF::pipeStreamData( // Throw a generic exception when we lack context for something more specific. New code should not // use this. void -QPDF::stopOnError(std::string const& message) +Common::stopOnError(std::string const& message) { throw damagedPDF("", message); } // Return an exception of type qpdf_e_damaged_pdf. QPDFExc -QPDF::damagedPDF( +Common::damagedPDF( InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) { return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true}; @@ -830,14 +849,15 @@ QPDF::damagedPDF( // Return an exception of type qpdf_e_damaged_pdf. The object is taken from // m->last_object_description. QPDFExc -QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) +Common::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) const { return damagedPDF(input, m->last_object_description, offset, message); } // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. QPDFExc -QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) +Common::damagedPDF( + std::string const& object, qpdf_offset_t offset, std::string const& message) const { return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true}; } @@ -845,7 +865,7 @@ QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string co // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the // offset from .m->file->getLastOffset(). QPDFExc -QPDF::damagedPDF(std::string const& object, std::string const& message) +Common::damagedPDF(std::string const& object, std::string const& message) const { return damagedPDF(object, m->file->getLastOffset(), message); } @@ -853,7 +873,7 @@ QPDF::damagedPDF(std::string const& object, std::string const& message) // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object // from .m->last_object_description. QPDFExc -QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) +Common::damagedPDF(qpdf_offset_t offset, std::string const& message) const { return damagedPDF(m->last_object_description, offset, message); } @@ -861,7 +881,7 @@ QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object // from m->last_object_description and the offset from m->file->getLastOffset(). QPDFExc -QPDF::damagedPDF(std::string const& message) +Common::damagedPDF(std::string const& message) const { return damagedPDF(m->last_object_description, m->file->getLastOffset(), message); } @@ -869,13 +889,13 @@ QPDF::damagedPDF(std::string const& message) bool QPDF::everCalledGetAllPages() const { - return m->ever_called_get_all_pages; + return m->pages.ever_called_get_all_pages(); } bool QPDF::everPushedInheritedAttributesToPages() const { - return m->ever_pushed_inherited_attributes_to_pages; + return m->pages.ever_pushed_inherited_attributes_to_pages(); } void diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index cdcadc9..287bfee 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -30,8 +30,11 @@ using namespace qpdf; +using Doc = QPDF::Doc; +using Pages = Doc::Pages; + // JobSetter class is restricted to QPDFJob. -class QPDF::Doc::JobSetter +class Doc::JobSetter { public: // Enable enhanced warnings for pdf file checking. @@ -746,7 +749,7 @@ QPDFJob::doCheck(QPDF& pdf) bool okay = true; auto& cout = *m->log->getInfo(); cout << "checking " << m->infile_name() << "\n"; - QPDF::Doc::JobSetter::setCheckMode(pdf, true); + Doc::JobSetter::setCheckMode(pdf, true); try { int extension_level = pdf.getExtensionLevel(); cout << "PDF Version: " << pdf.getPDFVersion(); @@ -852,7 +855,7 @@ QPDFJob::doShowPages(QPDF& pdf) { int pageno = 0; auto& cout = *m->log->getInfo(); - for (auto& page: pdf.getAllPages()) { + for (auto& page: pdf.doc().pages()) { QPDFPageObjectHelper ph(page); ++pageno; @@ -1040,13 +1043,14 @@ QPDFJob::doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf) void QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf) { + auto& doc = pdf.doc(); JSON::writeDictionaryKey(p, first, "pages", 1); bool first_page = true; JSON::writeArrayOpen(p, first_page, 2); - auto& pldh = pdf.doc().page_labels(); - auto& odh = pdf.doc().outlines(); + auto& pldh = doc.page_labels(); + auto& odh = doc.outlines(); int pageno = -1; - for (auto& page: pdf.getAllPages()) { + for (auto& page: doc.pages()) { ++pageno; JSON j_page = JSON::makeDictionary(); QPDFPageObjectHelper ph(page); @@ -1105,9 +1109,10 @@ QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf) void QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf) { + auto& doc = pdf.doc(); JSON j_labels = JSON::makeArray(); - auto& pldh = pdf.doc().page_labels(); - long long npages = QIntC::to_longlong(pdf.getAllPages().size()); + auto& pldh = doc.page_labels(); + long long npages = QIntC::to_longlong(doc.pages().size()); if (pldh.hasPageLabels()) { std::vector labels; pldh.getLabelsForPageRange(0, npages - 1, 0, labels); @@ -1153,27 +1158,29 @@ QPDFJob::addOutlinesToJson( void QPDFJob::doJSONOutlines(Pipeline* p, bool& first, QPDF& pdf) { + auto& doc = pdf.doc(); std::map page_numbers; int n = 0; - for (auto const& oh: pdf.getAllPages()) { + for (auto const& oh: doc.pages()) { page_numbers[oh] = ++n; } JSON j_outlines = JSON::makeArray(); - addOutlinesToJson(pdf.doc().outlines().getTopLevelOutlines(), j_outlines, page_numbers); + addOutlinesToJson(doc.outlines().getTopLevelOutlines(), j_outlines, page_numbers); JSON::writeDictionaryItem(p, first, "outlines", j_outlines, 1); } void QPDFJob::doJSONAcroform(Pipeline* p, bool& first, QPDF& pdf) { + auto& doc = pdf.doc(); JSON j_acroform = JSON::makeDictionary(); - auto& afdh = pdf.doc().acroform(); + auto& afdh = doc.acroform(); j_acroform.addDictionaryMember("hasacroform", JSON::makeBool(afdh.hasAcroForm())); j_acroform.addDictionaryMember("needappearances", JSON::makeBool(afdh.getNeedAppearances())); JSON j_fields = j_acroform.addDictionaryMember("fields", JSON::makeArray()); int pagepos1 = 0; - for (auto const& page: pdf.getAllPages()) { + for (auto const& page: doc.pages()) { ++pagepos1; for (auto& aoh: afdh.getWidgetAnnotationsForPage({page})) { QPDFFormFieldObjectHelper ffh = afdh.getFieldForAnnotation(aoh); @@ -1852,7 +1859,7 @@ QPDFJob::validateUnderOverlay(QPDF& pdf, UnderOverlay* uo) processFile(uo->pdf, uo->filename.data(), uo->password.data(), true, false); try { uo->to_pagenos = - QUtil::parse_numrange(uo->to_nr.data(), static_cast(pdf.getAllPages().size())); + QUtil::parse_numrange(uo->to_nr.data(), static_cast(pdf.doc().pages().size())); } catch (std::runtime_error& e) { throw std::runtime_error( "parsing numeric range for " + uo->which + " \"to\" pages: " + e.what()); @@ -1861,7 +1868,7 @@ QPDFJob::validateUnderOverlay(QPDF& pdf, UnderOverlay* uo) if (uo->from_nr.empty()) { uo->from_nr = uo->repeat_nr; } - int uo_npages = static_cast(uo->pdf->getAllPages().size()); + int uo_npages = static_cast(uo->pdf->doc().pages().size()); uo->from_pagenos = QUtil::parse_numrange(uo->from_nr.data(), uo_npages); if (!uo->repeat_nr.empty()) { uo->repeat_pagenos = QUtil::parse_numrange(uo->repeat_nr.data(), uo_npages); @@ -1887,7 +1894,7 @@ QPDFJob::doUnderOverlayForPage( } auto& dest_afdh = dest_page.qpdf()->doc().acroform(); - auto const& pages = uo.pdf->getAllPages(); + auto const& pages = uo.pdf->doc().pages().all(); std::string content; int min_suffix = 1; QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true); @@ -1957,7 +1964,7 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) validateUnderOverlay(pdf, &uo); } - auto const& dest_pages = pdf.getAllPages(); + auto const& dest_pages = pdf.doc().pages().all(); // First vector key is 0-based page number. Second is index into the overlay/underlay vector. // Watch out to not reverse the keys or be off by one. @@ -2349,14 +2356,15 @@ QPDFJob::Input::initialize(Inputs& in, QPDF* a_qpdf) { qpdf = a_qpdf ? a_qpdf : qpdf_p.get(); if (qpdf) { - orig_pages = qpdf->getAllPages(); + auto& doc = qpdf->doc(); + orig_pages = doc.pages().all(); n_pages = static_cast(orig_pages.size()); copied_pages = std::vector(orig_pages.size(), false); if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) { remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf); } - if (qpdf->doc().page_labels().hasPageLabels()) { + if (doc.page_labels().hasPageLabels()) { in.any_page_labels = true; } } @@ -3001,6 +3009,8 @@ QPDFJob::setWriterOptions(QPDFWriter& w) void QPDFJob::doSplitPages(QPDF& pdf) { + auto& doc = pdf.doc(); + // Generate output file pattern std::string before; std::string after; @@ -3024,9 +3034,9 @@ QPDFJob::doSplitPages(QPDF& pdf) QPDFPageDocumentHelper dh(pdf); dh.removeUnreferencedResources(); } - auto& pldh = pdf.doc().page_labels(); - auto& afdh = pdf.doc().acroform(); - std::vector const& pages = pdf.getAllPages(); + auto& pldh = doc.page_labels(); + auto& afdh = doc.acroform(); + std::vector const& pages = doc.pages().all(); size_t pageno_len = std::to_string(pages.size()).length(); size_t num_pages = pages.size(); for (size_t i = 0; i < num_pages; i += QIntC::to_size(m->split_pages)) { diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 2abb4db..9029399 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -2099,22 +2099,22 @@ bool QPDFObjectHandle::isPageObject() const { // See comments in QPDFObjectHandle.hh. - if (getOwningQPDF() == nullptr) { + if (!qpdf()) { return false; } // getAllPages repairs /Type when traversing the page tree. - getOwningQPDF()->getAllPages(); + (void)qpdf()->doc().pages().all(); return isDictionaryOfType("/Page"); } bool QPDFObjectHandle::isPagesObject() const { - if (getOwningQPDF() == nullptr) { + if (!qpdf()) { return false; } // getAllPages repairs /Type when traversing the page tree. - getOwningQPDF()->getAllPages(); + (void)qpdf()->doc().pages().all(); return isDictionaryOfType("/Pages"); } diff --git a/libqpdf/QPDFPageDocumentHelper.cc b/libqpdf/QPDFPageDocumentHelper.cc deleted file mode 100644 index 8f1c323..0000000 --- a/libqpdf/QPDFPageDocumentHelper.cc +++ /dev/null @@ -1,176 +0,0 @@ -#include - -#include -#include -#include -#include -#include - -class QPDFPageDocumentHelper::Members -{ -}; - -QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) : - QPDFDocumentHelper(qpdf) -{ -} - -QPDFPageDocumentHelper& -QPDFPageDocumentHelper::get(QPDF& qpdf) -{ - return qpdf.doc().page_dh(); -} - -void -QPDFPageDocumentHelper::validate(bool repair) -{ -} - -std::vector -QPDFPageDocumentHelper::getAllPages() -{ - std::vector pages; - for (auto const& iter: qpdf.getAllPages()) { - pages.emplace_back(iter); - } - return pages; -} - -void -QPDFPageDocumentHelper::pushInheritedAttributesToPage() -{ - qpdf.pushInheritedAttributesToPage(); -} - -void -QPDFPageDocumentHelper::removeUnreferencedResources() -{ - for (auto& ph: getAllPages()) { - ph.removeUnreferencedResources(); - } -} - -void -QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first) -{ - qpdf.addPage(newpage.getObjectHandle(), first); -} - -void -QPDFPageDocumentHelper::addPageAt( - QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage) -{ - qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle()); -} - -void -QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page) -{ - qpdf.removePage(page.getObjectHandle()); -} - -void -QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags) -{ - auto& afdh = qpdf.doc().acroform(); - if (afdh.getNeedAppearances()) { - qpdf.getRoot() - .getKey("/AcroForm") - .warn( - "document does not have updated appearance streams, so form fields " - "will not be flattened"); - } - for (auto& ph: getAllPages()) { - QPDFObjectHandle resources = ph.getAttribute("/Resources", true); - if (!resources.isDictionary()) { - // As of #1521, this should be impossible unless a user inserted an invalid page. - resources = ph.getObjectHandle().replaceKeyAndGetNew( - "/Resources", QPDFObjectHandle::newDictionary()); - } - flattenAnnotationsForPage(ph, resources, afdh, required_flags, forbidden_flags); - } - if (!afdh.getNeedAppearances()) { - qpdf.getRoot().removeKey("/AcroForm"); - } -} - -void -QPDFPageDocumentHelper::flattenAnnotationsForPage( - QPDFPageObjectHelper& page, - QPDFObjectHandle& resources, - QPDFAcroFormDocumentHelper& afdh, - int required_flags, - int forbidden_flags) -{ - bool need_appearances = afdh.getNeedAppearances(); - std::vector annots = page.getAnnotations(); - std::vector new_annots; - std::string new_content; - int rotate = 0; - QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate"); - if (rotate_obj.isInteger() && rotate_obj.getIntValue()) { - rotate = rotate_obj.getIntValueAsInt(); - } - int next_fx = 1; - for (auto& aoh: annots) { - QPDFObjectHandle as = aoh.getAppearanceStream("/N"); - bool is_widget = (aoh.getSubtype() == "/Widget"); - bool process = true; - if (need_appearances && is_widget) { - QTC::TC("qpdf", "QPDFPageDocumentHelper skip widget need appearances"); - process = false; - } - if (process && as.isStream()) { - if (is_widget) { - QTC::TC("qpdf", "QPDFPageDocumentHelper merge DR"); - QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh); - QPDFObjectHandle as_resources = as.getDict().getKey("/Resources"); - if (as_resources.isIndirect()) { - QTC::TC("qpdf", "QPDFPageDocumentHelper indirect as resources"); - as.getDict().replaceKey("/Resources", as_resources.shallowCopy()); - as_resources = as.getDict().getKey("/Resources"); - } - as_resources.mergeResources(ff.getDefaultResources()); - } else { - QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation"); - } - std::string name = resources.getUniqueResourceName("/Fxo", next_fx); - std::string content = - aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags); - if (!content.empty()) { - resources.mergeResources("<< /XObject << >> >>"_qpdf); - resources.getKey("/XObject").replaceKey(name, as); - ++next_fx; - } - new_content += content; - } else if (process && !aoh.getAppearanceDictionary().null()) { - // If an annotation has no selected appearance stream, just drop the annotation when - // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows - // associated with comments that aren't visible, and other types of annotations that - // aren't visible. Annotations that have no appearance streams at all, such as Link, - // Popup, and Projection, should be preserved. - QTC::TC("qpdf", "QPDFPageDocumentHelper ignore annotation with no appearance"); - } else { - new_annots.push_back(aoh.getObjectHandle()); - } - } - if (new_annots.size() != annots.size()) { - QPDFObjectHandle page_oh = page.getObjectHandle(); - if (new_annots.empty()) { - QTC::TC("qpdf", "QPDFPageDocumentHelper remove annots"); - page_oh.removeKey("/Annots"); - } else { - QPDFObjectHandle old_annots = page_oh.getKey("/Annots"); - QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots); - if (old_annots.isIndirect()) { - QTC::TC("qpdf", "QPDFPageDocumentHelper replace indirect annots"); - qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh); - } else { - QTC::TC("qpdf", "QPDFPageDocumentHelper replace direct annots"); - page_oh.replaceKey("/Annots", new_annots_oh); - } - } - page.addPageContents(qpdf.newStream("q\n"), true); - page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false); - } -} diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 1052e8f..3977bb4 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -27,7 +27,8 @@ using namespace std::literals; using namespace qpdf; -using Encryption = QPDF::Doc::Encryption; +using Doc = QPDF::Doc; +using Encryption = Doc::Encryption; QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default) { @@ -262,13 +263,13 @@ Pl_stack::Popper::pop() } // Writer class is restricted to QPDFWriter so that only it can call certain methods. -class QPDF::Doc::Writer +class Doc::Writer: Doc::Common { friend class QPDFWriter; - Writer(QPDF& pdf) : - pdf(pdf), - lin(pdf.m->lin), - objects(pdf.m->objects) + Writer(QPDF& qpdf) : + Common(qpdf, qpdf.doc().m), + lin(m->lin), + objects(m->objects) { } @@ -326,10 +327,9 @@ class QPDF::Doc::Writer size_t tableSize() { - return pdf.m->objects.tableSize(); + return qpdf.m->objects.tableSize(); } - QPDF& pdf; QPDF::Doc::Linearization& lin; QPDF::Doc::Objects& objects; }; @@ -352,7 +352,8 @@ class QPDFWriter::Members: QPDF::Doc::Writer QPDF::Doc::Writer(pdf), w(w), root_og( - pdf.getRoot().getObjGen().isIndirect() ? pdf.getRoot().getObjGen() : QPDFObjGen(-1, 0)), + qpdf.getRoot().getObjGen().isIndirect() ? qpdf.getRoot().getObjGen() + : QPDFObjGen(-1, 0)), pipeline_stack(pipeline) { } @@ -1372,7 +1373,7 @@ QPDFWriter::Members::enqueueObject(QPDFObjectHandle object) // object to have an owning QPDF that is from another file if a direct QPDFObjectHandle from // one file was insert into another file without copying. Doing that is safe even if the // original QPDF gets destroyed, which just disconnects the QPDFObjectHandle from its owner. - if (object.getOwningQPDF() != &pdf) { + if (object.getOwningQPDF() != &qpdf) { throw std::logic_error( "QPDFObjectHandle from different QPDF found while writing. Use " "QPDF::copyForeignObject to add objects from another file."); @@ -1396,7 +1397,7 @@ QPDFWriter::Members::enqueueObject(QPDFObjectHandle object) // stream. Object streams always have generation 0. // Detect loops by storing invalid object ID -1, which will get overwritten later. o.renumber = -1; - enqueueObject(pdf.getObject(o.object_stream, 0)); + enqueueObject(qpdf.getObject(o.object_stream, 0)); } else { object_queue.emplace_back(object); o.renumber = next_objid++; @@ -1907,7 +1908,7 @@ QPDFWriter::Members::writeObjectStream(QPDFObjectHandle object) // reporting, decrement in pass 1. indicateProgress(true, false); - QPDFObjectHandle obj_to_write = pdf.getObject(og); + QPDFObjectHandle obj_to_write = qpdf.getObject(og); if (obj_to_write.isStream()) { // This condition occurred in a fuzz input. Ideally we should block it at parse // time, but it's not clear to me how to construct a case for this. @@ -2024,7 +2025,7 @@ QPDFWriter::Members::writeObject(QPDFObjectHandle object, int object_stream_inde std::string QPDFWriter::Members::getOriginalID1() { - QPDFObjectHandle trailer = pdf.getTrailer(); + QPDFObjectHandle trailer = qpdf.getTrailer(); if (trailer.hasKey("/ID")) { return trailer.getKey("/ID").getArrayItem(0).getStringValue(); } else { @@ -2042,7 +2043,7 @@ QPDFWriter::Members::generateID(bool encrypted) return; } - QPDFObjectHandle trailer = pdf.getTrailer(); + QPDFObjectHandle trailer = qpdf.getTrailer(); std::string result; @@ -2126,7 +2127,6 @@ void QPDFWriter::Members::initializeSpecialStreams() { // Mark all page content streams in case we are filtering or normalizing. - std::vector pages = pdf.getAllPages(); int num = 0; for (auto& page: pages) { page_object_to_seq[page.getObjGen()] = ++num; @@ -2220,13 +2220,13 @@ QPDFWriter::Members::generateObjectStreams() ++n_per; } unsigned int n = 0; - int cur_ostream = pdf.newIndirectNull().getObjectID(); + int cur_ostream = qpdf.newIndirectNull().getObjectID(); for (auto const& item: eligible) { if (n == n_per) { n = 0; // Construct a new null object as the "original" object stream. The rest of the code // knows that this means we're creating the object stream from scratch. - cur_ostream = pdf.newIndirectNull().getObjectID(); + cur_ostream = qpdf.newIndirectNull().getObjectID(); } auto& o = obj[item]; o.object_stream = cur_ostream; @@ -2240,7 +2240,7 @@ QPDFWriter::Members::trimmed_trailer() { // Remove keys from the trailer that necessarily have to be replaced when writing the file. - Dictionary trailer = pdf.getTrailer().unsafeShallowCopy(); + Dictionary trailer = qpdf.getTrailer().unsafeShallowCopy(); // Remove encryption keys trailer.erase("/ID"); @@ -2265,8 +2265,8 @@ QPDFWriter::Members::trimmed_trailer() void QPDFWriter::Members::prepareFileForWrite() { - pdf.fixDanglingReferences(); - auto root = pdf.getRoot(); + qpdf.fixDanglingReferences(); + auto root = qpdf.getRoot(); auto oh = root.getKey("/Extensions"); if (oh.isDictionary()) { const bool extensions_indirect = oh.isIndirect(); @@ -2335,7 +2335,7 @@ QPDFWriter::Members::doWriteSetup() } if (preserve_encryption) { - copyEncryptionParameters(pdf); + copyEncryptionParameters(qpdf); } if (!forced_pdf_version.empty()) { @@ -2380,7 +2380,7 @@ QPDFWriter::Members::doWriteSetup() if (!obj.streams_empty) { if (linearized) { // Page dictionaries are not allowed to be compressed objects. - for (auto& page: pdf.getAllPages()) { + for (auto& page: pages) { if (obj[page].object_stream > 0) { obj[page].object_stream = 0; } @@ -2416,7 +2416,7 @@ QPDFWriter::Members::doWriteSetup() } } - setMinimumPDFVersion(pdf.getPDFVersion(), pdf.getExtensionLevel()); + setMinimumPDFVersion(qpdf.getPDFVersion(), qpdf.getExtensionLevel()); final_pdf_version = min_pdf_version; final_extension_level = min_extension_level; if (!forced_pdf_version.empty()) { @@ -2438,7 +2438,7 @@ QPDFWriter::Members::write() // Set up progress reporting. For linearized files, we write two passes. events_expected is an // approximation, but it's good enough for progress reporting, which is mostly a guess anyway. - events_expected = QIntC::to_int(pdf.getObjectCount() * (linearized ? 2 : 1)); + events_expected = QIntC::to_int(qpdf.getObjectCount() * (linearized ? 2 : 1)); prepareFileForWrite(); @@ -2910,14 +2910,11 @@ QPDFWriter::Members::writeLinearized() openObject(lindict_id); write("<<"); if (pass == 2) { - std::vector const& pages = pdf.getAllPages(); - int first_page_object = obj[pages.at(0)].renumber; - write(" /Linearized 1 /L ").write(file_size + hint_length); // Implementation note 121 states that a space is mandatory after this open bracket. write(" /H [ ").write(new_obj[hint_id].xref.getOffset()).write(" "); write(hint_length); - write(" ] /O ").write(first_page_object); + write(" ] /O ").write(obj[pages.all().at(0)].renumber); write(" /E ").write(part6_end_offset + hint_length); write(" /N ").write(pages.size()); write(" /T ").write(space_before_zero + hint_length); @@ -3110,7 +3107,7 @@ void QPDFWriter::Members::enqueueObjectsStandard() { if (preserve_unreferenced_objects) { - for (auto const& oh: pdf.getAllObjects()) { + for (auto const& oh: qpdf.getAllObjects()) { enqueueObject(oh); } } @@ -3136,20 +3133,18 @@ QPDFWriter::Members::enqueueObjectsPCLm() std::string image_transform_content = "q /image Do Q\n"; // enqueue all pages first - std::vector all = pdf.getAllPages(); - for (auto& page: all) { + for (auto& page: pages) { // enqueue page enqueueObject(page); // enqueue page contents stream - enqueueObject(page.getKey("/Contents")); + enqueueObject(page["/Contents"]); // enqueue all the strips for each page - QPDFObjectHandle strips = page.getKey("/Resources").getKey("/XObject"); - for (auto& image: strips.as_dictionary()) { + for (auto& image: Dictionary(page["/Resources"]["/XObject"])) { if (!image.second.null()) { enqueueObject(image.second); - enqueueObject(QPDFObjectHandle::newStream(&pdf, image_transform_content)); + enqueueObject(QPDFObjectHandle::newStream(&qpdf, image_transform_content)); } } } diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index e47fe59..153852d 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -30,7 +30,7 @@ using Streams = QPDF::Doc::Objects::Streams; bool Streams::immediate_copy_from() const { - return qpdf_.m->immediate_copy_from; + return qpdf.m->immediate_copy_from; } class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider @@ -83,10 +83,10 @@ class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider if (data != copied_data.end()) { auto& fd = data->second; QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1); - if (streams.qpdf().pipeStreamData( + if (streams.qpdf.pipeStreamData( fd.encp, fd.file, - streams.qpdf(), + streams.qpdf, fd.source_og, fd.offset, fd.length, @@ -128,8 +128,8 @@ class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider std::map copied_data; }; -Streams::Streams(QPDF& qpdf) : - qpdf_(qpdf), +Streams::Streams(Common& common) : + Common(common), copier_(std::make_shared(*this)) { } diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index f45a72c..91b3819 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -734,15 +734,16 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf) } encryption_initialized = true; + auto& c = qpdf.m->c; auto& qm = *qpdf.m; auto& trailer = qm.trailer; auto& file = qm.file; - auto warn_damaged_pdf = [&qpdf](std::string const& msg) { - qpdf.warn(qpdf.damagedPDF("encryption dictionary", msg)); + auto warn_damaged_pdf = [&qpdf, c](std::string const& msg) { + qpdf.warn(c.damagedPDF("encryption dictionary", msg)); }; auto throw_damaged_pdf = [&qpdf](std::string const& msg) { - throw qpdf.damagedPDF("encryption dictionary", msg); + throw qpdf.m->c.damagedPDF("encryption dictionary", msg); }; auto unsupported = [&file](std::string const& msg) -> QPDFExc { return { @@ -770,14 +771,14 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf) if (id_obj.size() != 2 || !id_obj.getArrayItem(0).isString()) { // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted // files with no /ID that poppler can read but Adobe Reader can't. - qpdf.warn(qpdf.damagedPDF("trailer", "invalid /ID in trailer dictionary")); + qpdf.warn(qpdf.m->c.damagedPDF("trailer", "invalid /ID in trailer dictionary")); } else { id1 = id_obj.getArrayItem(0).getStringValue(); } auto encryption_dict = trailer.getKey("/Encrypt"); if (!encryption_dict.isDictionary()) { - throw qpdf.damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); + throw qpdf.m->c.damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); } if (Name(encryption_dict["/Filter"]) != "/Standard") { @@ -984,7 +985,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen og) break; default: - warn(damagedPDF( + warn(m->c.damagedPDF( "unknown encryption filter for strings (check /StrF in " "/Encrypt dictionary); strings may be decrypted improperly")); // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4. @@ -1017,7 +1018,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen og) } catch (QPDFExc&) { throw; } catch (std::runtime_error& e) { - throw damagedPDF("error decrypting string for object " + og.unparse() + ": " + e.what()); + throw m->c.damagedPDF( + "error decrypting string for object " + og.unparse() + ": " + e.what()); } } diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index e4a61f9..663599a 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -460,110 +460,111 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) next_state = st_ignore; auto state = stack.back().state; if (state == st_ignore) { - QTC::TC("qpdf", "QPDF_json ignoring in st_ignore"); - // ignore - } else if (state == st_top) { + return true; // ignore + } + if (state == st_top) { if (key == "qpdf") { - this->saw_qpdf = true; + saw_qpdf = true; if (!value.isArray()) { - QTC::TC("qpdf", "QPDF_json qpdf not array"); error(value.getStart(), "\"qpdf\" must be an array"); } else { next_state = st_qpdf; } - } else { - // Ignore all other fields. - QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key"); + return true; } - } else if (state == st_qpdf_meta) { + return true; // Ignore all other fields. + } + + if (state == st_qpdf_meta) { if (key == "pdfversion") { - this->saw_pdf_version = true; + saw_pdf_version = true; std::string v; - bool okay = false; if (value.getString(v)) { std::string version; char const* p = v.c_str(); if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) { - this->pdf.m->pdf_version = version; - okay = true; + pdf.m->pdf_version = version; + return true; } } - if (!okay) { - QTC::TC("qpdf", "QPDF_json bad pdf version"); - error(value.getStart(), "invalid PDF version (must be \"x.y\")"); - } - } else if (key == "jsonversion") { - this->saw_json_version = true; + error(value.getStart(), "invalid PDF version (must be \"x.y\")"); + return true; + } + if (key == "jsonversion") { + saw_json_version = true; std::string v; - bool okay = false; if (value.getNumber(v)) { std::string version; if (QUtil::string_to_int(v.c_str()) == 2) { - okay = true; + return true; } } - if (!okay) { - QTC::TC("qpdf", "QPDF_json bad json version"); - error(value.getStart(), "invalid JSON version (must be numeric value 2)"); - } - } else if (key == "pushedinheritedpageresources") { + error(value.getStart(), "invalid JSON version (must be numeric value 2)"); + return true; + } + if (key == "pushedinheritedpageresources") { bool v; if (value.getBool(v)) { - if (!this->must_be_complete && v) { - this->pdf.pushInheritedAttributesToPage(); + if (!must_be_complete && v) { + pdf.pushInheritedAttributesToPage(); } - } else { - QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources"); - error(value.getStart(), "pushedinheritedpageresources must be a boolean"); + return true; } - } else if (key == "calledgetallpages") { + error(value.getStart(), "pushedinheritedpageresources must be a boolean"); + return true; + } + if (key == "calledgetallpages") { bool v; if (value.getBool(v)) { - if (!this->must_be_complete && v) { - this->pdf.getAllPages(); + if (!must_be_complete && v) { + (void)pdf.doc().pages().all(); } - } else { - QTC::TC("qpdf", "QPDF_json bad calledgetallpages"); - error(value.getStart(), "calledgetallpages must be a boolean"); + return true; } - } else { - // ignore unknown keys for forward compatibility and to skip keys we don't care about - // like "maxobjectid". - QTC::TC("qpdf", "QPDF_json ignore second-level key"); + error(value.getStart(), "calledgetallpages must be a boolean"); + return true; } - } else if (state == st_objects) { - int obj = 0; - int gen = 0; + // ignore unknown keys for forward compatibility and to skip keys we don't care about + // like "maxobjectid". + return true; + } + + if (state == st_objects) { if (key == "trailer") { - this->saw_trailer = true; - this->cur_object = "trailer"; + saw_trailer = true; + cur_object = "trailer"; setNextStateIfDictionary(key, value, st_trailer); - } else if (is_obj_key(key, obj, gen)) { - this->cur_object = key; + return true; + } + + int obj = 0; + int gen = 0; + if (is_obj_key(key, obj, gen)) { + cur_object = key; if (setNextStateIfDictionary(key, value, st_object_top)) { next_obj = objects.getObjectForJSON(obj, gen); } - } else { - QTC::TC("qpdf", "QPDF_json bad object key"); - error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\""); - } - } else if (state == st_object_top) { - if (stack.empty()) { - throw std::logic_error("stack empty in st_object_top"); + return true; } + error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\""); + return true; + } + + if (state == st_object_top) { + util::assertion(!stack.empty(), "QPDF_json: stack empty in st_object_top"); auto& tos = stack.back(); - if (!tos.object) { - throw std::logic_error("current object uninitialized in st_object_top"); - } + util::assertion(!!tos.object, "current object uninitialized in st_object_top"); if (key == "value") { // Don't use setNextStateIfDictionary since this can have any type. - this->saw_value = true; + saw_value = true; replaceObject(makeObject(value), value); next_state = st_object; - } else if (key == "stream") { - this->saw_stream = true; + return true; + } + if (key == "stream") { + saw_stream = true; if (setNextStateIfDictionary(key, value, st_stream)) { - this->this_stream_needs_data = false; + this_stream_needs_data = false; if (tos.object.isStream()) { QTC::TC("qpdf", "QPDF_json updating existing stream"); } else { @@ -574,97 +575,87 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) value); } next_obj = tos.object; - } else { - // Error message already given above - QTC::TC("qpdf", "QPDF_json stream not a dictionary"); + return true; } - } else { - // Ignore unknown keys for forward compatibility - QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top"); + return true; // Error message already given above } - } else if (state == st_trailer) { + return true; // Ignore unknown keys for forward compatibility + } + + if (state == st_trailer) { if (key == "value") { - this->saw_value = true; + saw_value = true; // The trailer must be a dictionary, so we can use setNextStateIfDictionary. if (setNextStateIfDictionary("trailer.value", value, st_object)) { - this->pdf.m->trailer = makeObject(value); - setObjectDescription(this->pdf.m->trailer, value); + pdf.m->trailer = makeObject(value); + setObjectDescription(pdf.m->trailer, value); } - } else if (key == "stream") { + return true; + } + if (key == "stream") { // Don't need to set saw_stream here since there's already an error. - QTC::TC("qpdf", "QPDF_json trailer stream"); error(value.getStart(), "the trailer may not be a stream"); - } else { - // Ignore unknown keys for forward compatibility - QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer"); - } - } else if (state == st_stream) { - if (stack.empty()) { - throw std::logic_error("stack empty in st_stream"); + return true; } + return true; // Ignore unknown keys for forward compatibility + } + + if (state == st_stream) { + util::assertion(!stack.empty(), "stack empty in st_stream"); auto& tos = stack.back(); - if (!tos.object.isStream()) { - throw std::logic_error("current object is not stream in st_stream"); - } + util::assertion(tos.object.isStream(), "current object is not stream in st_stream"); if (key == "dict") { - this->saw_dict = true; + saw_dict = true; if (setNextStateIfDictionary("stream.dict", value, st_object)) { tos.object.replaceDict(makeObject(value)); - } else { - // An error had already been given by setNextStateIfDictionary - QTC::TC("qpdf", "QPDF_json stream dict not dict"); + return true; } - } else if (key == "data") { - this->saw_data = true; + return true; // An error had already been given by setNextStateIfDictionary + } + if (key == "data") { + saw_data = true; std::string v; if (!value.getString(v)) { - QTC::TC("qpdf", "QPDF_json stream data not string"); error(value.getStart(), "\"stream.data\" must be a string"); tos.object.replaceStreamData("", {}, {}); - } else { - // The range includes the quotes. - auto start = value.getStart() + 1; - auto end = value.getEnd() - 1; - if (end < start) { - throw std::logic_error("QPDF_json: JSON string length < 0"); - } - tos.object.replaceStreamData(provide_data(is, start, end), {}, {}); + return true; } - } else if (key == "datafile") { - this->saw_datafile = true; + // The range includes the quotes. + auto start = value.getStart() + 1; + auto end = value.getEnd() - 1; + util::assertion(end >= start, "QPDF_json: JSON string length < 0"); + tos.object.replaceStreamData(provide_data(is, start, end), {}, {}); + return true; + } + if (key == "datafile") { + saw_datafile = true; std::string filename; if (!value.getString(filename)) { - QTC::TC("qpdf", "QPDF_json stream datafile not string"); error( value.getStart(), "\"stream.datafile\" must be a string containing a file name"); tos.object.replaceStreamData("", {}, {}); - } else { - tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {}); + return true; } - } else { - // Ignore unknown keys for forward compatibility. - QTC::TC("qpdf", "QPDF_json ignore unknown key in stream"); - } - } else if (state == st_object) { - if (stack.empty()) { - throw std::logic_error("stack empty in st_object"); + tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {}); + return true; } - auto& tos = stack.back(); - auto dict = tos.object; - if (dict.isStream()) { - dict = dict.getDict(); - } - if (!dict.isDictionary()) { - throw std::logic_error( - "current object is not stream or dictionary in st_object dictionary item"); - } - dict.replaceKey( - is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key, - makeObject(value)); - } else { - throw std::logic_error("QPDF_json: unknown state " + std::to_string(state)); + return true; // Ignore unknown keys for forward compatibility. } + + util::assertion(state == st_object, "QPDF_json: unknown state " + std::to_string(state)); + util::assertion(!stack.empty(), "stack empty in st_object"); + auto& tos = stack.back(); + auto dict = tos.object; + if (dict.isStream()) { + dict = dict.getDict(); + } + util::assertion( + dict.isDictionary(), + "current object is not stream or dictionary in st_object dictionary item"); + dict.replaceKey( + is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key, + makeObject(value)); return true; } diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 3ac797b..9313d43 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -69,11 +69,298 @@ load_vector_vector( bit_stream.skipToNextByte(); } +QPDF::ObjUser::ObjUser(user_e type) : + ou_type(type) +{ + qpdf_expect(type == ou_root); +} + +QPDF::ObjUser::ObjUser(user_e type, size_t pageno) : + ou_type(type), + pageno(pageno) +{ + qpdf_expect(type == ou_page || type == ou_thumb); +} + +QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : + ou_type(type), + key(key) +{ + qpdf_expect(type == ou_trailer_key || type == ou_root_key); +} + +bool +QPDF::ObjUser::operator<(ObjUser const& rhs) const +{ + if (ou_type < rhs.ou_type) { + return true; + } + if (ou_type == rhs.ou_type) { + if (pageno < rhs.pageno) { + return true; + } + if (pageno == rhs.pageno) { + return key < rhs.key; + } + } + return false; +} + +QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame( + QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) : + ou(ou), + oh(oh), + top(top) +{ +} + +void +QPDF::optimize( + std::map const& object_stream_data, + bool allow_changes, + std::function skip_stream_parameters) +{ + m->lin.optimize_internal(object_stream_data, allow_changes, skip_stream_parameters); +} + +void +Lin::optimize( + QPDFWriter::ObjTable const& obj, std::function skip_stream_parameters) +{ + optimize_internal(obj, true, skip_stream_parameters); +} + +template +void +Lin::optimize_internal( + T const& object_stream_data, + bool allow_changes, + std::function skip_stream_parameters) +{ + if (!m->obj_user_to_objects.empty()) { + // already optimized + return; + } + + // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force + // it to be so if it exists and is direct. (This has been seen in the wild.) + QPDFObjectHandle root = qpdf.getRoot(); + if (root.getKey("/Outlines").isDictionary()) { + QPDFObjectHandle outlines = root.getKey("/Outlines"); + if (!outlines.isIndirect()) { + root.replaceKey("/Outlines", qpdf.makeIndirectObject(outlines)); + } + } + + // Traverse pages tree pushing all inherited resources down to the page level. This also + // initializes m->all_pages. + m->pages.pushInheritedAttributesToPage(allow_changes, false); + // Traverse pages + + size_t n = 0; + for (auto const& page: m->pages) { + updateObjectMaps(ObjUser(ObjUser::ou_page, n), page, skip_stream_parameters); + ++n; + } + + // Traverse document-level items + for (auto const& [key, value]: m->trailer.as_dictionary()) { + if (key == "/Root") { + // handled separately + } else { + if (!value.null()) { + updateObjectMaps( + ObjUser(ObjUser::ou_trailer_key, key), value, skip_stream_parameters); + } + } + } + + for (auto const& [key, value]: root.as_dictionary()) { + // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but + // we are going to disregard that specification for now. There is loads of evidence that + // pdlin and Acrobat both disregard things like this from time to time, so this is almost + // certain not to cause any problems. + if (!value.null()) { + updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), value, skip_stream_parameters); + } + } + + ObjUser root_ou = ObjUser(ObjUser::ou_root); + auto root_og = QPDFObjGen(root.getObjGen()); + m->obj_user_to_objects[root_ou].insert(root_og); + m->object_to_obj_users[root_og].insert(root_ou); + + filterCompressedObjects(object_stream_data); +} + +void +Lin::updateObjectMaps( + ObjUser const& first_ou, + QPDFObjectHandle first_oh, + std::function skip_stream_parameters) +{ + QPDFObjGen::set visited; + std::vector pending; + pending.emplace_back(first_ou, first_oh, true); + // Traverse the object tree from this point taking care to avoid crossing page boundaries. + std::unique_ptr thumb_ou; + while (!pending.empty()) { + auto cur = pending.back(); + pending.pop_back(); + + bool is_page_node = false; + + if (cur.oh.isDictionaryOfType("/Page")) { + is_page_node = true; + if (!cur.top) { + continue; + } + } + + if (cur.oh.isIndirect()) { + QPDFObjGen og(cur.oh.getObjGen()); + if (!visited.add(og)) { + QTC::TC("qpdf", "QPDF opt loop detected"); + continue; + } + m->obj_user_to_objects[cur.ou].insert(og); + m->object_to_obj_users[og].insert(cur.ou); + } + + if (cur.oh.isArray()) { + for (auto const& item: cur.oh.as_array()) { + pending.emplace_back(cur.ou, item, false); + } + } else if (cur.oh.isDictionary() || cur.oh.isStream()) { + QPDFObjectHandle dict = cur.oh; + bool is_stream = cur.oh.isStream(); + int ssp = 0; + if (is_stream) { + dict = cur.oh.getDict(); + if (skip_stream_parameters) { + ssp = skip_stream_parameters(cur.oh); + } + } + + for (auto& [key, value]: dict.as_dictionary()) { + if (value.null()) { + continue; + } + + if (is_page_node && (key == "/Thumb")) { + // Traverse page thumbnail dictionaries as a special case. There can only ever + // be one /Thumb key on a page, and we see at most one page node per call. + thumb_ou = std::make_unique(ObjUser::ou_thumb, cur.ou.pageno); + pending.emplace_back(*thumb_ou, dict.getKey(key), false); + } else if (is_page_node && (key == "/Parent")) { + // Don't traverse back up the page tree + } else if ( + ((ssp >= 1) && (key == "/Length")) || + ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { + // Don't traverse into stream parameters that we are not going to write. + } else { + pending.emplace_back(cur.ou, value, false); + } + } + } + } +} + +void +Lin::filterCompressedObjects(std::map const& object_stream_data) +{ + if (object_stream_data.empty()) { + return; + } + + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed + // objects. If something is a user of a compressed object, then it is really a user of the + // object stream that contains it. + + std::map> t_obj_user_to_objects; + std::map> t_object_to_obj_users; + + for (auto const& i1: m->obj_user_to_objects) { + ObjUser const& ou = i1.first; + // Loop over objects. + for (auto const& og: i1.second) { + auto i2 = object_stream_data.find(og.getObj()); + if (i2 == object_stream_data.end()) { + t_obj_user_to_objects[ou].insert(og); + } else { + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0)); + } + } + } + + for (auto const& i1: m->object_to_obj_users) { + QPDFObjGen const& og = i1.first; + // Loop over obj_users. + for (auto const& ou: i1.second) { + auto i2 = object_stream_data.find(og.getObj()); + if (i2 == object_stream_data.end()) { + t_object_to_obj_users[og].insert(ou); + } else { + t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou); + } + } + } + + m->obj_user_to_objects = t_obj_user_to_objects; + m->object_to_obj_users = t_object_to_obj_users; +} + +void +Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) +{ + if (obj.getStreamsEmpty()) { + return; + } + + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed + // objects. If something is a user of a compressed object, then it is really a user of the + // object stream that contains it. + + std::map> t_obj_user_to_objects; + std::map> t_object_to_obj_users; + + for (auto const& i1: m->obj_user_to_objects) { + ObjUser const& ou = i1.first; + // Loop over objects. + for (auto const& og: i1.second) { + if (obj.contains(og)) { + if (auto const& i2 = obj[og].object_stream; i2 <= 0) { + t_obj_user_to_objects[ou].insert(og); + } else { + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0)); + } + } + } + } + + for (auto const& i1: m->object_to_obj_users) { + QPDFObjGen const& og = i1.first; + if (obj.contains(og)) { + // Loop over obj_users. + for (auto const& ou: i1.second) { + if (auto i2 = obj[og].object_stream; i2 <= 0) { + t_object_to_obj_users[og].insert(ou); + } else { + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); + } + } + } + } + + m->obj_user_to_objects = t_obj_user_to_objects; + m->object_to_obj_users = t_object_to_obj_users; +} + void Lin::linearizationWarning(std::string_view msg) { m->linearization_warnings = true; - qpdf.warn(qpdf_e_linearization, "", 0, std::string(msg)); + warn(qpdf_e_linearization, "", 0, std::string(msg)); } bool @@ -166,19 +453,19 @@ Lin::readLinearizationData() Integer P = P_oh; // first page number QTC::TC("qpdf", "QPDF P absent in lindict", P ? 0 : 1); - qpdf.no_ci_stop_if( + no_ci_stop_if( !(H && O && E && N && T && (P || P_oh.null())), "some keys in linearization dictionary are of the wrong type", "linearization dictionary" // ); - qpdf.no_ci_stop_if( + no_ci_stop_if( !(H_size == 2 || H_size == 4), "H has the wrong number of items", "linearization dictionary" // ); - qpdf.no_ci_stop_if( + no_ci_stop_if( !(H_0 && H_1 && (H_size == 2 || (H_2 && H_3))), "some H items are of the wrong type", "linearization dictionary" // @@ -188,8 +475,8 @@ Lin::readLinearizationData() // Various places in the code use linp.npages, which is initialized from N, to pre-allocate // memory, so make sure it's accurate and bail right now if it's not. - qpdf.no_ci_stop_if( - N != qpdf.getAllPages().size(), + no_ci_stop_if( + N != pages.size(), "/N does not match number of pages", "linearization dictionary" // ); @@ -234,13 +521,12 @@ Lin::readLinearizationData() size_t HSi = HS; if (HSi < 0 || HSi >= h_size) { - throw qpdf.damagedPDF( - "linearization hint table", "/S (shared object) offset is out of bounds"); + throw damagedPDF("linearization hint table", "/S (shared object) offset is out of bounds"); } readHSharedObject(BitStream(h_buf + HSi, h_size - HSi)); if (HO) { - qpdf.no_ci_stop_if( + no_ci_stop_if( HO < 0 || HO >= h_size, "/O (outline) offset is out of bounds", "linearization dictionary" // @@ -257,7 +543,7 @@ Lin::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) ObjCache& oc = m->obj_cache[H]; qpdf_offset_t min_end_offset = oc.end_before_space; qpdf_offset_t max_end_offset = oc.end_after_space; - qpdf.no_ci_stop_if( + no_ci_stop_if( !H.isStream(), "hint table is not a stream", "linearization dictionary" // ); @@ -275,7 +561,7 @@ Lin::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) QTC::TC("qpdf", "QPDF hint table length direct"); } qpdf_offset_t computed_end = offset + toO(length); - qpdf.no_ci_stop_if( + no_ci_stop_if( computed_end < min_end_offset || computed_end > max_end_offset, "hint table length mismatch (expected = " + std::to_string(computed_end) + "; actual = " + std::to_string(min_end_offset) + ".." + std::to_string(max_end_offset) + ")", @@ -391,20 +677,20 @@ Lin::checkLinearizationInternal() // L: file size in bytes -- checked by isLinearized // O: object number of first page - std::vector const& pages = qpdf.getAllPages(); - if (p.first_page_object != pages.at(0).getObjectID()) { + auto const& all_pages = pages.all(); + if (p.first_page_object != all_pages.at(0).getObjectID()) { linearizationWarning("first page object (/O) mismatch"); } // N: number of pages - size_t npages = pages.size(); + size_t npages = all_pages.size(); if (std::cmp_not_equal(p.npages, npages)) { // Not tested in the test suite linearizationWarning("page count (/N) mismatch"); } int i = 0; - for (auto const& page: pages) { + for (auto const& page: all_pages) { if (m->xref_table[page].getType() == 2) { linearizationWarning( "page dictionary for page " + std::to_string(i) + " is compressed"); @@ -464,7 +750,7 @@ Lin::checkLinearizationInternal() // are present. In that case, it would probably agree with pdlin. As of this writing, the test // suite doesn't contain any files with threads. - qpdf.no_ci_stop_if( + no_ci_stop_if( m->part6.empty(), "linearization part 6 unexpectedly empty" // ); qpdf_offset_t min_E = -1; @@ -486,22 +772,22 @@ Lin::checkLinearizationInternal() // Check hint tables std::map shared_idx_to_obj; - checkHSharedObject(pages, shared_idx_to_obj); - checkHPageOffset(pages, shared_idx_to_obj); + checkHSharedObject(all_pages, shared_idx_to_obj); + checkHPageOffset(all_pages, shared_idx_to_obj); checkHOutlines(); } qpdf_offset_t Lin::maxEnd(ObjUser const& ou) { - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->obj_user_to_objects.contains(ou), "no entry in object user table for requested object user" // ); qpdf_offset_t end = 0; for (auto const& og: m->obj_user_to_objects[ou]) { - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->obj_cache.contains(og), "unknown object referenced in object user table" // ); end = std::max(end, m->obj_cache[og].end_after_space); @@ -517,7 +803,7 @@ Lin::getLinearizationOffset(QPDFObjGen og) if (typ == 1) { return entry.getOffset(); } - qpdf.no_ci_stop_if( + no_ci_stop_if( typ != 2, "getLinearizationOffset called for xref entry not of type 1 or 2" // ); // For compressed objects, return the offset of the object stream that contains them. @@ -551,7 +837,7 @@ Lin::lengthNextN(int first_object, int n) for (int i = 0; i < n; ++i) { QPDFObjGen og(first_object + i, 0); if (m->xref_table.contains(og)) { - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->obj_cache.contains(og), "found unknown object while calculating length for linearization data" // ); @@ -585,7 +871,7 @@ Lin::checkHPageOffset( qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); QPDFObjGen first_page_og(pages.at(0).getObjGen()); if (!m->xref_table.contains(first_page_og)) { - qpdf.stopOnError("supposed first page object is not known"); + stopOnError("supposed first page object is not known"); } qpdf_offset_t offset = getLinearizationOffset(first_page_og); if (table_offset != offset) { @@ -596,7 +882,7 @@ Lin::checkHPageOffset( QPDFObjGen page_og(pages.at(pageno).getObjGen()); int first_object = page_og.getObj(); if (!m->xref_table.contains(page_og)) { - qpdf.stopOnError("unknown object in page offset hint table"); + stopOnError("unknown object in page offset hint table"); } offset = getLinearizationOffset(page_og); @@ -636,7 +922,7 @@ Lin::checkHPageOffset( for (size_t i = 0; i < toS(he.nshared_objects); ++i) { int idx = he.shared_identifiers.at(i); - qpdf.no_ci_stop_if( + no_ci_stop_if( !shared_idx_to_obj.contains(idx), "unable to get object for item in shared objects hint table"); @@ -645,7 +931,7 @@ Lin::checkHPageOffset( for (size_t i = 0; i < toS(ce.nshared_objects); ++i) { int idx = ce.shared_identifiers.at(i); - qpdf.no_ci_stop_if( + no_ci_stop_if( idx >= m->c_shared_object_data.nshared_total, "index out of bounds for shared object hint table" // ); @@ -718,7 +1004,7 @@ Lin::checkHSharedObject(std::vector const& pages, std::mapxref_table.contains(og)) { - qpdf.stopOnError("unknown object in shared object hint table"); + stopOnError("unknown object in shared object hint table"); } qpdf_offset_t offset = getLinearizationOffset(og); qpdf_offset_t h_offset = adjusted_offset(so.first_shared_offset); @@ -768,7 +1054,7 @@ Lin::checkHOutlines() return; } QPDFObjGen og(outlines.getObjGen()); - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->xref_table.contains(og), "unknown object in outlines hint table" // ); qpdf_offset_t offset = getLinearizationOffset(og); @@ -1105,12 +1391,12 @@ Lin::calculateLinearizationData(T const& object_stream_data) // We seem to traverse the page tree a lot in this code, but we can address this for a future // code optimization if necessary. Premature optimization is the root of all evil. - std::vector pages; + std::vector uc_pages; { // local scope // Map all page objects to the containing object stream. This should be a no-op in a // properly linearized file. - for (auto oh: qpdf.getAllPages()) { - pages.emplace_back(getUncompressedObject(oh, object_stream_data)); + for (auto oh: pages) { + uc_pages.emplace_back(getUncompressedObject(oh, object_stream_data)); } } size_t npages = pages.size(); @@ -1128,7 +1414,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) // Part 4: open document objects. We don't care about the order. - qpdf.no_ci_stop_if( + no_ci_stop_if( lc_root.size() != 1, "found other than one root while calculating linearization data" // ); @@ -1142,15 +1428,15 @@ Lin::calculateLinearizationData(T const& object_stream_data) // any option to set this and also disregards /OpenAction. We will do the same. // First, place the actual first page object itself. - qpdf.no_ci_stop_if( + no_ci_stop_if( pages.empty(), "no pages found while calculating linearization data" // ); - QPDFObjGen first_page_og(pages.at(0).getObjGen()); - qpdf.no_ci_stop_if( + QPDFObjGen first_page_og(uc_pages.at(0).getObjGen()); + no_ci_stop_if( !lc_first_page_private.erase(first_page_og), "unable to linearize first page" // ); - m->c_linp.first_page_object = pages.at(0).getObjectID(); - m->part6.emplace_back(pages.at(0)); + m->c_linp.first_page_object = uc_pages.at(0).getObjectID(); + m->part6.emplace_back(uc_pages.at(0)); // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard // it except to the extent that it groups private and shared objects contiguously for the sake @@ -1181,13 +1467,13 @@ Lin::calculateLinearizationData(T const& object_stream_data) for (size_t i = 1; i < npages; ++i) { // Place this page's page object - QPDFObjGen page_og(pages.at(i).getObjGen()); - qpdf.no_ci_stop_if( + QPDFObjGen page_og(uc_pages.at(i).getObjGen()); + no_ci_stop_if( !lc_other_page_private.erase(page_og), "unable to linearize page " + std::to_string(i) // ); - m->part7.emplace_back(pages.at(i)); + m->part7.emplace_back(uc_pages.at(i)); // Place all non-shared objects referenced by this page, updating the page object count for // the hint table. @@ -1195,7 +1481,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) m->c_page_offset_data.entries.at(i).nobjects = 1; ObjUser ou(ObjUser::ou_page, i); - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->obj_user_to_objects.contains(ou), "found unreferenced page while calculating linearization data" // ); @@ -1231,7 +1517,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) // Place the pages tree. std::set pages_ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; - qpdf.no_ci_stop_if( + no_ci_stop_if( pages_ogs.empty(), "found empty pages tree while calculating linearization data" // ); for (auto const& og: pages_ogs) { @@ -1243,7 +1529,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) // Place private thumbnail images in page order. Slightly more information would be required if // we were going to bother with thumbnail hint tables. for (size_t i = 0; i < npages; ++i) { - QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); + QPDFObjectHandle thumb = uc_pages.at(i).getKey("/Thumb"); thumb = getUncompressedObject(thumb, object_stream_data); QPDFObjGen thumb_og(thumb.getObjGen()); // Output the thumbnail itself @@ -1288,7 +1574,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) size_t num_placed = m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); size_t num_wanted = m->object_to_obj_users.size(); - qpdf.no_ci_stop_if( + no_ci_stop_if( // This can happen with damaged files, e.g. if the root is part of the the pages tree. num_placed != num_wanted, "QPDF::calculateLinearizationData: wrong number of objects placed (num_placed = " + @@ -1326,7 +1612,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) shared.emplace_back(obj); } } - qpdf.no_ci_stop_if( + no_ci_stop_if( std::cmp_not_equal( m->c_shared_object_data.nshared_total, m->c_shared_object_data.entries.size()), "shared object hint table has wrong number of entries" // @@ -1337,7 +1623,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) for (size_t i = 1; i < npages; ++i) { CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); ObjUser ou(ObjUser::ou_page, i); - qpdf.no_ci_stop_if( + no_ci_stop_if( !m->obj_user_to_objects.contains(ou), "found unreferenced page while calculating linearization data" // ); @@ -1420,12 +1706,12 @@ Lin::outputLengthNextN( int first = obj[in_object].renumber; int last = first + n; - qpdf.no_ci_stop_if( + no_ci_stop_if( first <= 0, "found object that is not renumbered while writing linearization data"); qpdf_offset_t length = 0; for (int i = first; i < last; ++i) { auto l = new_obj[i].length; - qpdf.no_ci_stop_if( + no_ci_stop_if( l == 0, "found item with unknown length while writing linearization data" // ); length += l; @@ -1440,8 +1726,8 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob // We are purposely leaving some values set to their initial zero values. - std::vector const& pages = qpdf.getAllPages(); - size_t npages = pages.size(); + auto const& all_pages = pages.all(); + size_t npages = all_pages.size(); CHPageOffset& cph = m->c_page_offset_data; std::vector& cphe = cph.entries; @@ -1467,7 +1753,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob // assignments. int nobjects = cphe.at(i).nobjects; - int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj); + int length = outputLengthNextN(all_pages.at(i).getObjectID(), nobjects, new_obj, obj); int nshared = cphe.at(i).nshared_objects; min_nobjects = std::min(min_nobjects, nobjects); @@ -1483,7 +1769,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob } ph.min_nobjects = min_nobjects; - ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset(); + ph.first_page_offset = new_obj[obj[all_pages.at(0)].renumber].xref.getOffset(); ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); ph.min_page_length = min_length; ph.nbits_delta_page_length = nbits(max_length - min_length); @@ -1502,7 +1788,7 @@ Lin::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::Ob for (auto& phe_i: phe) { // Adjust delta entries if (phe_i.delta_nobjects < min_nobjects || phe_i.delta_page_length < min_length) { - qpdf.stopOnError( + stopOnError( "found too small delta nobjects or delta page length while writing " "linearization data"); } @@ -1537,7 +1823,7 @@ Lin::calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter:: soe.emplace_back(); soe.at(i).delta_group_length = length; } - qpdf.no_ci_stop_if( + no_ci_stop_if( soe.size() != toS(cso.nshared_total), "soe has wrong size after initialization" // ); @@ -1553,7 +1839,7 @@ Lin::calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter:: for (size_t i = 0; i < toS(cso.nshared_total); ++i) { // Adjust deltas - qpdf.no_ci_stop_if( + no_ci_stop_if( soe.at(i).delta_group_length < min_length, "found too small group length while writing linearization data" // ); @@ -1632,7 +1918,7 @@ Lin::writeHPageOffset(BitWriter& w) w.writeBitsInt(t.nbits_shared_numerator, 16); // 12 w.writeBitsInt(t.shared_denominator, 16); // 13 - int nitems = toI(qpdf.getAllPages().size()); + int nitems = toI(pages.size()); std::vector& entries = t.entries; write_vector_int(w, nitems, entries, t.nbits_delta_nobjects, &HPageOffsetEntry::delta_nobjects); @@ -1687,7 +1973,7 @@ Lin::writeHSharedObject(BitWriter& w) for (size_t i = 0; i < toS(nitems); ++i) { // If signature were present, we'd have to write a 128-bit hash. if (entries.at(i).signature_present != 0) { - qpdf.stopOnError("found unexpected signature present while writing linearization data"); + stopOnError("found unexpected signature present while writing linearization data"); } } write_vector_int(w, nitems, entries, t.nbits_nobjects, &HSharedObjectEntry::nobjects_minus_one); diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 8d399f3..ea3144b 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -123,7 +123,7 @@ Objects::parse(char const* password) // Find the header anywhere in the first 1024 bytes of the file. PatternFinder hf(qpdf, &QPDF::findHeader); if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { - qpdf.warn(qpdf.damagedPDF("", -1, "can't find PDF header")); + warn(damagedPDF("", -1, "can't find PDF header")); // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode m->pdf_version = "1.2"; } @@ -147,14 +147,14 @@ Objects::parse(char const* password) try { if (xref_offset == 0) { - throw qpdf.damagedPDF("", -1, "can't find startxref"); + throw damagedPDF("", -1, "can't find startxref"); } try { read_xref(xref_offset); } catch (QPDFExc&) { throw; } catch (std::exception& e) { - throw qpdf.damagedPDF("", -1, std::string("error reading xref: ") + e.what()); + throw damagedPDF("", -1, std::string("error reading xref: ") + e.what()); } } catch (QPDFExc& e) { if (m->attempt_recovery) { @@ -168,7 +168,7 @@ Objects::parse(char const* password) m->parsed = true; if (!m->xref_table.empty() && !qpdf.getRoot().getKey("/Pages").isDictionary()) { // QPDFs created from JSON have an empty xref table and no root object yet. - throw qpdf.damagedPDF("", -1, "unable to find page tree"); + throw damagedPDF("", -1, "unable to find page tree"); } } @@ -208,8 +208,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) const auto max_warnings = m->warnings.size() + 1000U; auto check_warnings = [this, max_warnings]() { if (m->warnings.size() > max_warnings) { - throw qpdf.damagedPDF( - "", -1, "too many errors while reconstructing cross-reference table"); + throw damagedPDF("", -1, "too many errors while reconstructing cross-reference table"); } }; @@ -217,9 +216,9 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) // We may find more objects, which may contain dangling references. m->fixed_dangling_refs = false; - qpdf.warn(qpdf.damagedPDF("", -1, "file is damaged")); - qpdf.warn(e); - qpdf.warn(qpdf.damagedPDF("", -1, "Attempting to reconstruct cross-reference table")); + warn(damagedPDF("", -1, "file is damaged")); + warn(e); + warn(damagedPDF("", -1, "Attempting to reconstruct cross-reference table")); // Delete all references to type 1 (uncompressed) objects std::vector to_delete; @@ -253,7 +252,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) if (obj <= m->xref_table_max_id) { found_objects.emplace_back(obj, gen, token_start); } else { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, "ignoring object with impossibly large id " + std::to_string(obj))); } } @@ -278,7 +277,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) if (qpdf.getRoot().getKey("/Pages").isDictionary()) { QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, "startxref was more than 1024 bytes before end of file")); qpdf.initializeEncryption(); m->parsed = true; @@ -313,7 +312,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) m->trailer = t; break; } - qpdf.warn(qpdf.damagedPDF("trailer", *it, "recovered trailer has no /Root entry")); + warn(damagedPDF("trailer", *it, "recovered trailer has no /Root entry")); } check_warnings(); } @@ -347,7 +346,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) try { read_xref(max_offset, true); } catch (std::exception&) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, "error decoding candidate xref stream while recovering damaged file")); } QTC::TC("qpdf", "QPDF recover xref stream"); @@ -368,7 +367,7 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) } if (root) { if (!m->trailer) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, "unable to find trailer dictionary while recovering damaged file")); m->trailer = QPDFObjectHandle::newDictionary(); } @@ -381,23 +380,20 @@ Objects::reconstruct_xref(QPDFExc& e, bool found_startxref) // could try to get the trailer from there. This may make it possible to recover files with // bad startxref pointers even when they have object streams. - throw qpdf.damagedPDF( - "", -1, "unable to find trailer dictionary while recovering damaged file"); + throw damagedPDF("", -1, "unable to find trailer dictionary while recovering damaged file"); } if (m->xref_table.empty()) { // We cannot check for an empty xref table in parse because empty tables are valid when // creating QPDF objects from JSON. - throw qpdf.damagedPDF("", -1, "unable to find objects while recovering damaged file"); + throw damagedPDF("", -1, "unable to find objects while recovering damaged file"); } check_warnings(); if (!m->parsed) { - m->parsed = true; - qpdf.getAllPages(); - check_warnings(); - if (m->all_pages.empty()) { - m->parsed = false; - throw qpdf.damagedPDF("", -1, "unable to find any pages while recovering damaged file"); + m->parsed = !m->pages.empty(); + if (!m->parsed) { + throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); } + check_warnings(); } // We could iterate through the objects looking for streams and try to find objects inside of @@ -443,7 +439,7 @@ Objects::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) // where it is terminated by arbitrary whitespace. if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) { if (skipped_space) { - qpdf.warn(qpdf.damagedPDF("", -1, "extraneous whitespace seen before xref")); + warn(damagedPDF("", -1, "extraneous whitespace seen before xref")); } QTC::TC( "qpdf", @@ -462,12 +458,12 @@ Objects::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) xref_offset = read_xrefStream(xref_offset, in_stream_recovery); } if (visited.contains(xref_offset)) { - throw qpdf.damagedPDF("", -1, "loop detected following xref tables"); + throw damagedPDF("", -1, "loop detected following xref tables"); } } if (!m->trailer) { - throw qpdf.damagedPDF("", -1, "unable to find trailer while reading xref"); + throw damagedPDF("", -1, "unable to find trailer while reading xref"); } int size = m->trailer.getKey("/Size").getIntValueAsInt(); int max_obj = 0; @@ -478,7 +474,7 @@ Objects::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) max_obj = std::max(max_obj, *(m->deleted_objects.rbegin())); } if ((size < 1) || (size - 1 != max_obj)) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, ("reported number of objects (" + std::to_string(size) + @@ -615,7 +611,7 @@ Objects::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) } if (invalid) { - qpdf.warn(qpdf.damagedPDF("xref table", "accepting invalid xref table entry")); + warn(damagedPDF("xref table", "accepting invalid xref table entry")); } f1 = QUtil::string_to_ll(f1_str.c_str()); @@ -692,7 +688,7 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) int num = 0; int bytes = 0; if (!parse_xrefFirst(line, obj, num, bytes)) { - throw qpdf.damagedPDF("xref table", "xref syntax invalid"); + throw damagedPDF("xref table", "xref syntax invalid"); } m->file->seek(m->file->getLastOffset() + bytes, SEEK_SET); for (qpdf_offset_t i = obj; i - num < obj; ++i) { @@ -705,7 +701,7 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) int f2 = 0; char type = '\0'; if (!read_xrefEntry(f1, f2, type)) { - throw qpdf.damagedPDF( + throw damagedPDF( "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); } if (type == 'f') { @@ -725,17 +721,17 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) // Set offset to previous xref table if any QPDFObjectHandle cur_trailer = m->objects.readTrailer(); if (!cur_trailer.isDictionary()) { - throw qpdf.damagedPDF("", "expected trailer dictionary"); + throw damagedPDF("", "expected trailer dictionary"); } if (!m->trailer) { setTrailer(cur_trailer); if (!m->trailer.hasKey("/Size")) { - throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key"); + throw damagedPDF("trailer", "trailer dictionary lacks /Size key"); } if (!m->trailer.getKey("/Size").isInteger()) { - throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); + throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); } } @@ -748,14 +744,14 @@ Objects::read_xrefTable(qpdf_offset_t xref_offset) // /Prev key instead of the xref stream's. (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); } else { - throw qpdf.damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); + throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); } } } if (cur_trailer.hasKey("/Prev")) { if (!cur_trailer.getKey("/Prev").isInteger()) { - throw qpdf.damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer"); + throw damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer"); } return cur_trailer.getKey("/Prev").getIntValue(); } @@ -781,7 +777,7 @@ Objects::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery) } } - throw qpdf.damagedPDF("", xref_offset, "xref not found"); + throw damagedPDF("", xref_offset, "xref not found"); return 0; // unreachable } @@ -912,7 +908,7 @@ Objects::processXRefStream( qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery) { auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { - return qpdf.damagedPDF("xref stream", xref_offset, msg.data()); + return damagedPDF("xref stream", xref_offset, msg.data()); }; auto dict = xref_obj.getDict(); @@ -932,7 +928,7 @@ Objects::processXRefStream( if (expected_size > actual_size) { throw x; } else { - qpdf.warn(x); + warn(x); } } @@ -992,7 +988,7 @@ Objects::processXRefStream( if (dict.hasKey("/Prev")) { if (!dict.getKey("/Prev").isInteger()) { - throw qpdf.damagedPDF( + throw damagedPDF( "xref stream", "/Prev key in xref stream dictionary is not an integer"); } return dict.getKey("/Prev").getIntValue(); @@ -1030,13 +1026,13 @@ Objects::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) if (f0 == 2) { if (f1 == obj) { - qpdf.warn(qpdf.damagedPDF( - "xref stream", "self-referential object stream " + std::to_string(obj))); + warn( + damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj))); return; } if (f1 > m->xref_table_max_id) { // ignore impossibly large object stream ids - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "xref stream", "object stream id " + std::to_string(f1) + " for object " + std::to_string(obj) + " is impossibly large")); @@ -1061,8 +1057,7 @@ Objects::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) break; default: - throw qpdf.damagedPDF( - "xref stream", "unknown xref stream entry type " + std::to_string(f0)); + throw damagedPDF("xref stream", "unknown xref stream entry type " + std::to_string(f0)); break; } } @@ -1182,9 +1177,9 @@ Objects::readTrailer() if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. - qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null")); + warn(damagedPDF("trailer", "empty object treated as null")); } else if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { - qpdf.warn(qpdf.damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); + warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); } // Override last_offset so that it points to the beginning of the object we just read m->file->setLastOffset(offset); @@ -1210,8 +1205,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. - qpdf.warn( - qpdf.damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null")); + warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null")); return object; } auto token = readToken(*m->file); @@ -1220,7 +1214,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) token = readToken(*m->file); } if (!token.isWord("endobj")) { - qpdf.warn(qpdf.damagedPDF("expected endobj")); + warn(damagedPDF("expected endobj")); } return object; } @@ -1241,9 +1235,9 @@ Objects::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offse if (!length_obj.isInteger()) { if (length_obj.null()) { - throw qpdf.damagedPDF(offset, "stream dictionary lacks /Length key"); + throw damagedPDF(offset, "stream dictionary lacks /Length key"); } - throw qpdf.damagedPDF(offset, "/Length key in stream dictionary is not an integer"); + throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); } length = toS(length_obj.getUIntValue()); @@ -1251,11 +1245,11 @@ Objects::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offse m->file->seek(stream_offset, SEEK_SET); m->file->seek(toO(length), SEEK_CUR); if (!readToken(*m->file).isWord("endstream")) { - throw qpdf.damagedPDF("expected endstream"); + throw damagedPDF("expected endstream"); } } catch (QPDFExc& e) { if (m->attempt_recovery) { - qpdf.warn(e); + warn(e); length = recoverStreamLength(m->file, og, stream_offset); } else { throw; @@ -1295,7 +1289,7 @@ Objects::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_off // Treat the \r by itself as the whitespace after endstream and start reading // stream data in spite of not having seen a newline. m->file->unreadCh(ch); - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( m->file->tell(), "stream keyword followed by carriage return only")); } } @@ -1303,12 +1297,11 @@ Objects::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_off } if (!util::is_space(ch)) { m->file->unreadCh(ch); - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( m->file->tell(), "stream keyword not followed by proper line terminator")); return; } - qpdf.warn( - qpdf.damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); } } @@ -1319,7 +1312,7 @@ Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. - qpdf.warn(QPDFExc( + warn(QPDFExc( qpdf_e_damaged_pdf, m->file->getName() + " object stream " + std::to_string(stream_id), +"object " + std::to_string(obj_id) + " 0, offset " + @@ -1347,7 +1340,7 @@ Objects::recoverStreamLength( std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset) { // Try to reconstruct stream length by looking for endstream or endobj - qpdf.warn(qpdf.damagedPDF(*input, stream_offset, "attempting to recover stream length")); + warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); PatternFinder ef(qpdf, &QPDF::findEndstream); size_t length = 0; @@ -1386,10 +1379,10 @@ Objects::recoverStreamLength( } if (length == 0) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( *input, stream_offset, "unable to recover stream data; treating stream as empty")); } else { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( *input, stream_offset, "recovered stream length: " + std::to_string(length))); } @@ -1409,24 +1402,24 @@ Objects::read_object_start(qpdf_offset_t offset) QPDFTokenizer::Token tobjid = readToken(*m->file); bool objidok = tobjid.isInteger(); if (!objidok) { - throw qpdf.damagedPDF(offset, "expected n n obj"); + throw damagedPDF(offset, "expected n n obj"); } QPDFTokenizer::Token tgen = readToken(*m->file); bool genok = tgen.isInteger(); if (!genok) { - throw qpdf.damagedPDF(offset, "expected n n obj"); + throw damagedPDF(offset, "expected n n obj"); } QPDFTokenizer::Token tobj = readToken(*m->file); bool objok = tobj.isWord("obj"); if (!objok) { - throw qpdf.damagedPDF(offset, "expected n n obj"); + throw damagedPDF(offset, "expected n n obj"); } int objid = QUtil::string_to_int(tobjid.getValue().c_str()); int generation = QUtil::string_to_int(tgen.getValue().c_str()); if (objid == 0) { - throw qpdf.damagedPDF(offset, "object with ID 0"); + throw damagedPDF(offset, "object with ID 0"); } return {objid, generation}; } @@ -1447,20 +1440,20 @@ Objects::readObjectAtOffset( // "0000000000 00000 n", which is not correct, but it won't hurt anything for us to ignore // these. if (offset == 0) { - qpdf.warn(qpdf.damagedPDF(-1, "object has offset 0")); + warn(damagedPDF(-1, "object has offset 0")); return; } try { og = read_object_start(offset); if (exp_og != og) { - QPDFExc e = qpdf.damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj"); + QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj"); if (try_recovery) { // Will be retried below throw e; } else { // We can try reading the object anyway even if the ID doesn't match. - qpdf.warn(e); + warn(e); } } } catch (QPDFExc& e) { @@ -1474,7 +1467,7 @@ Objects::readObjectAtOffset( readObjectAtOffset(false, new_offset, description, exp_og); return; } - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, ("object " + exp_og.unparse(' ') + @@ -1493,7 +1486,7 @@ Objects::readObjectAtOffset( while (true) { char ch; if (!m->file->read(&ch, 1)) { - throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj"); + throw damagedPDF(m->file->tell(), "EOF after endobj"); } if (!isspace(static_cast(ch))) { m->file->seek(-1, SEEK_CUR); @@ -1552,7 +1545,7 @@ Objects::readObjectAtOffset( while (true) { char ch; if (!m->file->read(&ch, 1)) { - throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj"); + throw damagedPDF(m->file->tell(), "EOF after endobj"); } if (!isspace(static_cast(ch))) { m->file->seek(-1, SEEK_CUR); @@ -1574,7 +1567,7 @@ Objects::resolve(QPDFObjGen og) if (m->resolving.contains(og)) { // This can happen if an object references itself directly or indirectly in some key that // has to be resolved during object parsing, such as stream length. - qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' '))); + warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); updateCache(og, QPDFObject::create(), -1, -1); return m->obj_cache[og].object; } @@ -1594,13 +1587,13 @@ Objects::resolve(QPDFObjGen og) break; default: - throw qpdf.damagedPDF( + throw damagedPDF( "", -1, ("object " + og.unparse('/') + " has unexpected xref entry type")); } } catch (QPDFExc& e) { - qpdf.warn(e); + warn(e); } catch (std::exception& e) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "", -1, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); } } @@ -1636,7 +1629,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) // Force resolution of object stream Stream obj_stream = qpdf.getObject(obj_stream_number, 0); if (!obj_stream) { - throw qpdf.damagedPDF( + throw damagedPDF( "object " + std::to_string(obj_stream_number) + " 0", "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); } @@ -1649,7 +1642,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) QPDFObjectHandle dict = obj_stream.getDict(); if (!dict.isDictionaryOfType("/ObjStm")) { - qpdf.warn(qpdf.damagedPDF( + warn(damagedPDF( "object " + std::to_string(obj_stream_number) + " 0", "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type")); } @@ -1657,7 +1650,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) unsigned int n{0}; int first{0}; if (!(dict.getKey("/N").getValueAsUInt(n) && dict.getKey("/First").getValueAsInt(first))) { - throw qpdf.damagedPDF( + throw damagedPDF( "object " + std::to_string(obj_stream_number) + " 0", "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); } @@ -1674,7 +1667,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) auto b_start = stream_data.data(); if (first >= end_offset) { - throw qpdf.damagedPDF( + throw damagedPDF( "object " + std::to_string(obj_stream_number) + " 0", "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry"); } @@ -1694,17 +1687,17 @@ Objects::resolveObjectsInStream(int obj_stream_number) long long offset = QUtil::string_to_int(toffset.getValue().c_str()); if (num == obj_stream_number) { - qpdf.warn(damaged(num, id_offset, "object stream claims to contain itself")); + warn(damaged(num, id_offset, "object stream claims to contain itself")); continue; } if (num < 1) { - qpdf.warn(damaged(num, id_offset, "object id is invalid"s)); + warn(damaged(num, id_offset, "object id is invalid"s)); continue; } if (offset <= last_offset) { - qpdf.warn(damaged( + warn(damaged( num, input.getLastOffset(), "offset " + std::to_string(offset) + @@ -1718,7 +1711,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) } if (first + offset >= end_offset) { - qpdf.warn(damaged( + warn(damaged( num, input.getLastOffset(), "offset " + std::to_string(offset) + " is too large")); continue; } @@ -1934,7 +1927,7 @@ Objects::tableSize() // Temporary fix. Long-term solution is // - QPDFObjGen to enforce objgens are valid and sensible // - xref table and obj cache to protect against insertion of impossibly large obj ids - qpdf.stopOnError("Impossibly large object id encountered."); + stopOnError("Impossibly large object id encountered."); } if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) { return toS(++max_obj); diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc deleted file mode 100644 index 9cfb605..0000000 --- a/libqpdf/QPDF_optimization.cc +++ /dev/null @@ -1,429 +0,0 @@ -// See the "Optimization" section of the manual. - -#include - -#include -#include -#include -#include - -using Lin = QPDF::Doc::Linearization; -using Pages = QPDF::Doc::Pages; - -QPDF::ObjUser::ObjUser(user_e type) : - ou_type(type) -{ - qpdf_assert_debug(type == ou_root); -} - -QPDF::ObjUser::ObjUser(user_e type, size_t pageno) : - ou_type(type), - pageno(pageno) -{ - qpdf_assert_debug((type == ou_page) || (type == ou_thumb)); -} - -QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : - ou_type(type), - key(key) -{ - qpdf_assert_debug((type == ou_trailer_key) || (type == ou_root_key)); -} - -bool -QPDF::ObjUser::operator<(ObjUser const& rhs) const -{ - if (ou_type < rhs.ou_type) { - return true; - } - if (ou_type == rhs.ou_type) { - if (pageno < rhs.pageno) { - return true; - } - if (pageno == rhs.pageno) { - return key < rhs.key; - } - } - return false; -} - -QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame( - QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) : - ou(ou), - oh(oh), - top(top) -{ -} - -void -QPDF::optimize( - std::map const& object_stream_data, - bool allow_changes, - std::function skip_stream_parameters) -{ - m->lin.optimize_internal(object_stream_data, allow_changes, skip_stream_parameters); -} - -void -Lin::optimize( - QPDFWriter::ObjTable const& obj, std::function skip_stream_parameters) -{ - optimize_internal(obj, true, skip_stream_parameters); -} - -template -void -Lin::optimize_internal( - T const& object_stream_data, - bool allow_changes, - std::function skip_stream_parameters) -{ - if (!m->obj_user_to_objects.empty()) { - // already optimized - return; - } - - // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force - // it to be so if it exists and is direct. (This has been seen in the wild.) - QPDFObjectHandle root = qpdf.getRoot(); - if (root.getKey("/Outlines").isDictionary()) { - QPDFObjectHandle outlines = root.getKey("/Outlines"); - if (!outlines.isIndirect()) { - root.replaceKey("/Outlines", qpdf.makeIndirectObject(outlines)); - } - } - - // Traverse pages tree pushing all inherited resources down to the page level. This also - // initializes m->all_pages. - m->pages.pushInheritedAttributesToPage(allow_changes, false); - - // Traverse pages - size_t n = m->all_pages.size(); - for (size_t pageno = 0; pageno < n; ++pageno) { - updateObjectMaps( - ObjUser(ObjUser::ou_page, pageno), m->all_pages.at(pageno), skip_stream_parameters); - } - - // Traverse document-level items - for (auto const& [key, value]: m->trailer.as_dictionary()) { - if (key == "/Root") { - // handled separately - } else { - if (!value.null()) { - updateObjectMaps( - ObjUser(ObjUser::ou_trailer_key, key), value, skip_stream_parameters); - } - } - } - - for (auto const& [key, value]: root.as_dictionary()) { - // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but - // we are going to disregard that specification for now. There is loads of evidence that - // pdlin and Acrobat both disregard things like this from time to time, so this is almost - // certain not to cause any problems. - if (!value.null()) { - updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), value, skip_stream_parameters); - } - } - - ObjUser root_ou = ObjUser(ObjUser::ou_root); - auto root_og = QPDFObjGen(root.getObjGen()); - m->obj_user_to_objects[root_ou].insert(root_og); - m->object_to_obj_users[root_og].insert(root_ou); - - filterCompressedObjects(object_stream_data); -} - -void -QPDF::pushInheritedAttributesToPage() -{ - // Public API should not have access to allow_changes. - m->pages.pushInheritedAttributesToPage(true, false); -} - -void -Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) -{ - // Traverse pages tree pushing all inherited resources down to the page level. - - // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning - // for skipped keys, re-traverse unconditionally. - if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { - return; - } - - // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects - // loops, so we don't have to do those activities here. - qpdf.getAllPages(); - - // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain - // values for them. - std::map> key_ancestors; - pushInheritedAttributesToPageInternal( - m->trailer.getKey("/Root").getKey("/Pages"), - key_ancestors, - allow_changes, - warn_skipped_keys); - if (!key_ancestors.empty()) { - throw std::logic_error( - "key_ancestors not empty after pushing inherited attributes to pages"); - } - m->pushed_inherited_attributes_to_pages = true; - m->ever_pushed_inherited_attributes_to_pages = true; -} - -void -Pages ::pushInheritedAttributesToPageInternal( - QPDFObjectHandle cur_pages, - std::map>& key_ancestors, - bool allow_changes, - bool warn_skipped_keys) -{ - // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate - // are inheritable attributes. Push this object onto the stack of pages nodes that have values - // for this attribute. - - std::set inheritable_keys; - for (auto const& key: cur_pages.getKeys()) { - if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") { - if (!allow_changes) { - throw QPDFExc( - qpdf_e_internal, - m->file->getName(), - m->last_object_description, - m->file->getLastOffset(), - "optimize detected an inheritable attribute when called in no-change mode"); - } - - // This is an inheritable resource - inheritable_keys.insert(key); - QPDFObjectHandle oh = cur_pages.getKey(key); - QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1); - if (!oh.indirect()) { - if (!oh.isScalar()) { - // Replace shared direct object non-scalar resources with indirect objects to - // avoid copying large structures around. - cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh)); - oh = cur_pages.getKey(key); - } else { - // It's okay to copy scalars. - } - } - key_ancestors[key].push_back(oh); - if (key_ancestors[key].size() > 1) { - } - // Remove this resource from this node. It will be reattached at the page level. - cur_pages.removeKey(key); - } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) { - // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not - // set), as we don't change these; but flattening removes intermediate /Pages nodes. - if (warn_skipped_keys && cur_pages.hasKey("/Parent")) { - qpdf.warn( - qpdf_e_pages, - "Pages object: object " + cur_pages.id_gen().unparse(' '), - 0, - ("Unknown key " + key + - " in /Pages object is being discarded as a result of flattening the /Pages " - "tree")); - } - } - } - - // Process descendant nodes. This method does not perform loop detection because all code paths - // that lead here follow a call to getAllPages, which already throws an exception in the event - // of a loop in the pages tree. - for (auto& kid: cur_pages.getKey("/Kids").aitems()) { - if (kid.isDictionaryOfType("/Pages")) { - pushInheritedAttributesToPageInternal( - kid, key_ancestors, allow_changes, warn_skipped_keys); - } else { - // Add all available inheritable attributes not present in this object to this object. - for (auto const& iter: key_ancestors) { - std::string const& key = iter.first; - if (!kid.hasKey(key)) { - kid.replaceKey(key, iter.second.back()); - } else { - QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); - } - } - } - } - - // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. - // That way, the invariant that the list of keys in key_ancestors is exactly those keys for - // which inheritable attributes are available. - - if (!inheritable_keys.empty()) { - for (auto const& key: inheritable_keys) { - key_ancestors[key].pop_back(); - if (key_ancestors[key].empty()) { - key_ancestors.erase(key); - } - } - } else { - QTC::TC("qpdf", "QPDF opt no inheritable keys"); - } -} - -void -Lin::updateObjectMaps( - ObjUser const& first_ou, - QPDFObjectHandle first_oh, - std::function skip_stream_parameters) -{ - QPDFObjGen::set visited; - std::vector pending; - pending.emplace_back(first_ou, first_oh, true); - // Traverse the object tree from this point taking care to avoid crossing page boundaries. - std::unique_ptr thumb_ou; - while (!pending.empty()) { - auto cur = pending.back(); - pending.pop_back(); - - bool is_page_node = false; - - if (cur.oh.isDictionaryOfType("/Page")) { - is_page_node = true; - if (!cur.top) { - continue; - } - } - - if (cur.oh.isIndirect()) { - QPDFObjGen og(cur.oh.getObjGen()); - if (!visited.add(og)) { - QTC::TC("qpdf", "QPDF opt loop detected"); - continue; - } - m->obj_user_to_objects[cur.ou].insert(og); - m->object_to_obj_users[og].insert(cur.ou); - } - - if (cur.oh.isArray()) { - for (auto const& item: cur.oh.as_array()) { - pending.emplace_back(cur.ou, item, false); - } - } else if (cur.oh.isDictionary() || cur.oh.isStream()) { - QPDFObjectHandle dict = cur.oh; - bool is_stream = cur.oh.isStream(); - int ssp = 0; - if (is_stream) { - dict = cur.oh.getDict(); - if (skip_stream_parameters) { - ssp = skip_stream_parameters(cur.oh); - } - } - - for (auto& [key, value]: dict.as_dictionary()) { - if (value.null()) { - continue; - } - - if (is_page_node && (key == "/Thumb")) { - // Traverse page thumbnail dictionaries as a special case. There can only ever - // be one /Thumb key on a page, and we see at most one page node per call. - thumb_ou = std::make_unique(ObjUser::ou_thumb, cur.ou.pageno); - pending.emplace_back(*thumb_ou, dict.getKey(key), false); - } else if (is_page_node && (key == "/Parent")) { - // Don't traverse back up the page tree - } else if ( - ((ssp >= 1) && (key == "/Length")) || - ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { - // Don't traverse into stream parameters that we are not going to write. - } else { - pending.emplace_back(cur.ou, value, false); - } - } - } - } -} - -void -Lin::filterCompressedObjects(std::map const& object_stream_data) -{ - if (object_stream_data.empty()) { - return; - } - - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed - // objects. If something is a user of a compressed object, then it is really a user of the - // object stream that contains it. - - std::map> t_obj_user_to_objects; - std::map> t_object_to_obj_users; - - for (auto const& i1: m->obj_user_to_objects) { - ObjUser const& ou = i1.first; - // Loop over objects. - for (auto const& og: i1.second) { - auto i2 = object_stream_data.find(og.getObj()); - if (i2 == object_stream_data.end()) { - t_obj_user_to_objects[ou].insert(og); - } else { - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0)); - } - } - } - - for (auto const& i1: m->object_to_obj_users) { - QPDFObjGen const& og = i1.first; - // Loop over obj_users. - for (auto const& ou: i1.second) { - auto i2 = object_stream_data.find(og.getObj()); - if (i2 == object_stream_data.end()) { - t_object_to_obj_users[og].insert(ou); - } else { - t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou); - } - } - } - - m->obj_user_to_objects = t_obj_user_to_objects; - m->object_to_obj_users = t_object_to_obj_users; -} - -void -Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) -{ - if (obj.getStreamsEmpty()) { - return; - } - - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed - // objects. If something is a user of a compressed object, then it is really a user of the - // object stream that contains it. - - std::map> t_obj_user_to_objects; - std::map> t_object_to_obj_users; - - for (auto const& i1: m->obj_user_to_objects) { - ObjUser const& ou = i1.first; - // Loop over objects. - for (auto const& og: i1.second) { - if (obj.contains(og)) { - if (auto const& i2 = obj[og].object_stream; i2 <= 0) { - t_obj_user_to_objects[ou].insert(og); - } else { - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0)); - } - } - } - } - - for (auto const& i1: m->object_to_obj_users) { - QPDFObjGen const& og = i1.first; - if (obj.contains(og)) { - // Loop over obj_users. - for (auto const& ou: i1.second) { - if (auto i2 = obj[og].object_stream; i2 <= 0) { - t_object_to_obj_users[og].insert(ou); - } else { - t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); - } - } - } - } - - m->obj_user_to_objects = t_obj_user_to_objects; - m->object_to_obj_users = t_object_to_obj_users; -} diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 0239314..62f1407 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -1,9 +1,12 @@ +#include #include +#include #include #include #include #include +#include // In support of page manipulation APIs, these methods internally maintain state about pages in a // pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos, @@ -42,10 +45,16 @@ using Pages = QPDF::Doc::Pages; std::vector const& QPDF::getAllPages() { + return m->pages.all(); +} + +std::vector const& +Pages::cache() +{ // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. - if (m->all_pages.empty() && !m->invalid_page_found) { - m->ever_called_get_all_pages = true; - auto root = getRoot(); + if (all_pages.empty() && !invalid_page_found) { + ever_called_get_all_pages_ = true; + auto root = qpdf.getRoot(); QPDFObjGen::set visited; QPDFObjGen::set seen; QPDFObjectHandle pages = root.getKey("/Pages"); @@ -77,18 +86,18 @@ QPDF::getAllPages() qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); } try { - m->pages.getAllPagesInternal(pages, visited, seen, false, false); + getAllPagesInternal(pages, visited, seen, false, false); } catch (...) { - m->all_pages.clear(); - m->invalid_page_found = false; + all_pages.clear(); + invalid_page_found = false; throw; } - if (m->invalid_page_found) { - m->pages.flattenPagesTree(); - m->invalid_page_found = false; + if (invalid_page_found) { + flattenPagesTree(); + invalid_page_found = false; } } - return m->all_pages; + return all_pages; } void @@ -137,7 +146,7 @@ Pages::getAllPagesInternal( if (!kid.isDictionary()) { kid.warn("Pages tree includes non-dictionary object; ignoring"); - m->invalid_page_found = true; + invalid_page_found = true; continue; } if (!kid.isIndirect()) { @@ -206,7 +215,7 @@ Pages::getAllPagesInternal( cur_node.warn( "kid " + std::to_string(i) + " (from 0) appears more than once in the pages tree; ignoring duplicate"); - m->invalid_page_found = true; + invalid_page_found = true; kid = QPDFObjectHandle::newNull(); continue; } @@ -223,11 +232,11 @@ Pages::getAllPagesInternal( if (m->reconstructed_xref && errors > 2) { cur_node.warn( "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); - m->invalid_page_found = true; + invalid_page_found = true; kid = QPDFObjectHandle::newNull(); continue; } - m->all_pages.emplace_back(kid); + all_pages.emplace_back(kid); } } } @@ -235,13 +244,19 @@ Pages::getAllPagesInternal( void QPDF::updateAllPagesCache() { + m->pages.update_cache(); +} + +void +Pages::update_cache() +{ // Force regeneration of the pages cache. We force immediate recalculation of all_pages since // users may have references to it that they got from calls to getAllPages(). We can defer // recalculation of pageobj_to_pages_pos until needed. - m->all_pages.clear(); - m->pageobj_to_pages_pos.clear(); - m->pushed_inherited_attributes_to_pages = false; - getAllPages(); + all_pages.clear(); + pageobj_to_pages_pos.clear(); + pushed_inherited_attributes_to_pages = false; + cache(); } void @@ -249,30 +264,30 @@ Pages::flattenPagesTree() { // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. - if (!m->pageobj_to_pages_pos.empty()) { + if (!pageobj_to_pages_pos.empty()) { return; } - // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be + // Push inherited objects down to the /Page level. As a side effect all_pages will also be // generated. pushInheritedAttributesToPage(true, true); QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); - size_t const len = m->all_pages.size(); + size_t const len = all_pages.size(); for (size_t pos = 0; pos < len; ++pos) { // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at // this point because pushInheritedAttributesToPage calls getAllPages which resolves // duplicates. - insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); - m->all_pages.at(pos).replaceKey("/Parent", pages); + insertPageobjToPage(all_pages.at(pos), toI(pos), true); + all_pages.at(pos).replaceKey("/Parent", pages); } - pages.replaceKey("/Kids", QPDFObjectHandle::newArray(m->all_pages)); + pages.replaceKey("/Kids", Array(all_pages)); // /Count has not changed if (pages.getKey("/Count").getUIntValue() != len) { - if (m->invalid_page_found && pages.getKey("/Count").getUIntValue() > len) { - pages.replaceKey("/Count", QPDFObjectHandle::newInteger(toI(len))); + if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) { + pages.replaceKey("/Count", Integer(len)); } else { throw std::runtime_error("/Count is wrong after flattening pages tree"); } @@ -280,11 +295,141 @@ Pages::flattenPagesTree() } void +QPDF::pushInheritedAttributesToPage() +{ + // Public API should not have access to allow_changes. + m->pages.pushInheritedAttributesToPage(true, false); +} + +void +Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) +{ + // Traverse pages tree pushing all inherited resources down to the page level. + + // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning + // for skipped keys, re-traverse unconditionally. + if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) { + return; + } + + // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects + // loops, so we don't have to do those activities here. + (void)cache(); + + // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain + // values for them. + std::map> key_ancestors; + pushInheritedAttributesToPageInternal( + m->trailer.getKey("/Root").getKey("/Pages"), + key_ancestors, + allow_changes, + warn_skipped_keys); + util::assertion( + key_ancestors.empty(), + "key_ancestors not empty after pushing inherited attributes to pages"); + pushed_inherited_attributes_to_pages = true; + ever_pushed_inherited_attributes_to_pages_ = true; +} + +void +Pages::pushInheritedAttributesToPageInternal( + QPDFObjectHandle cur_pages, + std::map>& key_ancestors, + bool allow_changes, + bool warn_skipped_keys) +{ + // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate + // are inheritable attributes. Push this object onto the stack of pages nodes that have values + // for this attribute. + + std::set inheritable_keys; + for (auto const& key: cur_pages.getKeys()) { + if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") { + if (!allow_changes) { + throw QPDFExc( + qpdf_e_internal, + m->file->getName(), + m->last_object_description, + m->file->getLastOffset(), + "optimize detected an inheritable attribute when called in no-change mode"); + } + + // This is an inheritable resource + inheritable_keys.insert(key); + QPDFObjectHandle oh = cur_pages.getKey(key); + QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1); + if (!oh.indirect()) { + if (!oh.isScalar()) { + // Replace shared direct object non-scalar resources with indirect objects to + // avoid copying large structures around. + cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh)); + oh = cur_pages.getKey(key); + } else { + // It's okay to copy scalars. + } + } + key_ancestors[key].push_back(oh); + if (key_ancestors[key].size() > 1) { + } + // Remove this resource from this node. It will be reattached at the page level. + cur_pages.removeKey(key); + } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) { + // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not + // set), as we don't change these; but flattening removes intermediate /Pages nodes. + if (warn_skipped_keys && cur_pages.hasKey("/Parent")) { + warn( + qpdf_e_pages, + "Pages object: object " + cur_pages.id_gen().unparse(' '), + 0, + ("Unknown key " + key + + " in /Pages object is being discarded as a result of flattening the /Pages " + "tree")); + } + } + } + + // Process descendant nodes. This method does not perform loop detection because all code paths + // that lead here follow a call to getAllPages, which already throws an exception in the event + // of a loop in the pages tree. + for (auto& kid: cur_pages.getKey("/Kids").aitems()) { + if (kid.isDictionaryOfType("/Pages")) { + pushInheritedAttributesToPageInternal( + kid, key_ancestors, allow_changes, warn_skipped_keys); + } else { + // Add all available inheritable attributes not present in this object to this object. + for (auto const& iter: key_ancestors) { + std::string const& key = iter.first; + if (!kid.hasKey(key)) { + kid.replaceKey(key, iter.second.back()); + } else { + QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); + } + } + } + } + + // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. + // That way, the invariant that the list of keys in key_ancestors is exactly those keys for + // which inheritable attributes are available. + + if (!inheritable_keys.empty()) { + for (auto const& key: inheritable_keys) { + key_ancestors[key].pop_back(); + if (key_ancestors[key].empty()) { + key_ancestors.erase(key); + } + } + } else { + QTC::TC("qpdf", "QPDF opt no inheritable keys"); + } +} + +void Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate) { QPDFObjGen og(obj.getObjGen()); if (check_duplicate) { - if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { + if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { // The library never calls insertPageobjToPage in a way that causes this to happen. throw QPDFExc( qpdf_e_pages, @@ -294,52 +439,51 @@ Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupl "duplicate page reference found; this would cause loss of data"); } } else { - m->pageobj_to_pages_pos[og] = pos; + pageobj_to_pages_pos[og] = pos; } } void -Pages::insertPage(QPDFObjectHandle newpage, int pos) +Pages::insert(QPDFObjectHandle newpage, int pos) { // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. flattenPagesTree(); - if (!newpage.isIndirect()) { + if (!newpage.indirect()) { newpage = qpdf.makeIndirectObject(newpage); - } else if (newpage.getOwningQPDF() != &qpdf) { - newpage.getQPDF().pushInheritedAttributesToPage(); + } else if (newpage.qpdf() != &qpdf) { + newpage.qpdf()->pushInheritedAttributesToPage(); newpage = qpdf.copyForeignObject(newpage); } else { QTC::TC("qpdf", "QPDF insert indirect page"); } - if (pos < 0 || toS(pos) > m->all_pages.size()) { + if (pos < 0 || std::cmp_greater(pos, all_pages.size())) { throw std::runtime_error("QPDF::insertPage called with pos out of range"); } QTC::TC( "qpdf", "QPDF insert page", - (pos == 0) ? 0 : // insert at beginning - (pos == toI(m->all_pages.size())) ? 1 // at end - : 2); // insert in middle + pos == 0 ? 0 : // insert at beginning + std::cmp_equal(pos, size()) ? 1 // at end + : 2); // insert in middle - auto og = newpage.getObjGen(); - if (m->pageobj_to_pages_pos.contains(og)) { - newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy()); + if (pageobj_to_pages_pos.contains(newpage)) { + newpage = qpdf.makeIndirectObject(newpage.copy()); } - QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); - QPDFObjectHandle kids = pages.getKey("/Kids"); + auto pages = qpdf.getRoot()["/Pages"]; + Array kids = pages["/Kids"]; newpage.replaceKey("/Parent", pages); - kids.insertItem(pos, newpage); - int npages = static_cast(kids.size()); - pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); - m->all_pages.insert(m->all_pages.begin() + pos, newpage); - for (int i = pos + 1; i < npages; ++i) { - insertPageobjToPage(m->all_pages.at(toS(i)), i, false); + kids.insert(pos, newpage); + size_t npages = kids.size(); + pages.replaceKey("/Count", Integer(npages)); + all_pages.insert(all_pages.begin() + pos, newpage); + for (size_t i = static_cast(pos) + 1; i < npages; ++i) { + insertPageobjToPage(all_pages.at(i), static_cast(i), false); } insertPageobjToPage(newpage, pos, true); } @@ -347,24 +491,30 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos) void QPDF::removePage(QPDFObjectHandle page) { - int pos = findPage(page); // also ensures flat /Pages + m->pages.erase(page); +} + +void +Pages::erase(QPDFObjectHandle& page) +{ + int pos = qpdf.findPage(page); // also ensures flat /Pages QTC::TC( "qpdf", "QPDF remove page", - (pos == 0) ? 0 : // remove at beginning - (pos == toI(m->all_pages.size() - 1)) ? 1 // end - : 2); // remove in middle + (pos == 0) ? 0 : // remove at beginning + (pos == toI(all_pages.size() - 1)) ? 1 // end + : 2); // remove in middle - QPDFObjectHandle pages = getRoot().getKey("/Pages"); + QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); QPDFObjectHandle kids = pages.getKey("/Kids"); kids.eraseItem(pos); int npages = static_cast(kids.size()); pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); - m->all_pages.erase(m->all_pages.begin() + pos); - m->pageobj_to_pages_pos.erase(page.getObjGen()); + all_pages.erase(all_pages.begin() + pos); + pageobj_to_pages_pos.erase(page.getObjGen()); for (int i = pos; i < npages; ++i) { - m->pages.insertPageobjToPage(m->all_pages.at(toS(i)), i, false); + m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false); } } @@ -375,17 +525,16 @@ QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage) if (!before) { ++refpos; } - m->pages.insertPage(newpage, refpos); + m->pages.insert(newpage, refpos); } void QPDF::addPage(QPDFObjectHandle newpage, bool first) { if (first) { - m->pages.insertPage(newpage, 0); + m->pages.insert(newpage, 0); } else { - m->pages.insertPage( - newpage, getRoot().getKey("/Pages").getKey("/Count").getIntValueAsInt()); + m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt()); } } @@ -398,9 +547,15 @@ QPDF::findPage(QPDFObjectHandle& page) int QPDF::findPage(QPDFObjGen og) { - m->pages.flattenPagesTree(); - auto it = m->pageobj_to_pages_pos.find(og); - if (it == m->pageobj_to_pages_pos.end()) { + return m->pages.find(og); +} + +int +Pages::find(QPDFObjGen og) +{ + flattenPagesTree(); + auto it = pageobj_to_pages_pos.find(og); + if (it == pageobj_to_pages_pos.end()) { throw QPDFExc( qpdf_e_pages, m->file->getName(), @@ -410,3 +565,168 @@ QPDF::findPage(QPDFObjGen og) } return (*it).second; } + +class QPDFPageDocumentHelper::Members +{ +}; + +QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) : + QPDFDocumentHelper(qpdf) +{ +} + +QPDFPageDocumentHelper& +QPDFPageDocumentHelper::get(QPDF& qpdf) +{ + return qpdf.doc().page_dh(); +} + +void +QPDFPageDocumentHelper::validate(bool repair) +{ +} + +std::vector +QPDFPageDocumentHelper::getAllPages() +{ + auto& pp = qpdf.doc().pages(); + return {pp.begin(), pp.end()}; +} + +void +QPDFPageDocumentHelper::pushInheritedAttributesToPage() +{ + qpdf.pushInheritedAttributesToPage(); +} + +void +QPDFPageDocumentHelper::removeUnreferencedResources() +{ + for (auto& ph: getAllPages()) { + ph.removeUnreferencedResources(); + } +} + +void +QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first) +{ + qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size()); +} + +void +QPDFPageDocumentHelper::addPageAt( + QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage) +{ + qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle()); +} + +void +QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page) +{ + qpdf.removePage(page.getObjectHandle()); +} + +void +QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags) +{ + qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags); +} + +void +Pages::flatten_annotations(int required_flags, int forbidden_flags) +{ + auto& afdh = qpdf.doc().acroform(); + if (afdh.getNeedAppearances()) { + qpdf.getRoot() + .getKey("/AcroForm") + .warn( + "document does not have updated appearance streams, so form fields " + "will not be flattened"); + } + for (QPDFPageObjectHelper ph: all()) { + QPDFObjectHandle resources = ph.getAttribute("/Resources", true); + if (!resources.isDictionary()) { + // As of #1521, this should be impossible unless a user inserted an invalid page. + resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty()); + } + flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags); + } + if (!afdh.getNeedAppearances()) { + qpdf.getRoot().removeKey("/AcroForm"); + } +} + +void +Pages::flatten_annotations_for_page( + QPDFPageObjectHelper& page, + QPDFObjectHandle& resources, + QPDFAcroFormDocumentHelper& afdh, + int required_flags, + int forbidden_flags) +{ + bool need_appearances = afdh.getNeedAppearances(); + std::vector annots = page.getAnnotations(); + std::vector new_annots; + std::string new_content; + int rotate = 0; + QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate"); + if (rotate_obj.isInteger() && rotate_obj.getIntValue()) { + rotate = rotate_obj.getIntValueAsInt(); + } + int next_fx = 1; + for (auto& aoh: annots) { + QPDFObjectHandle as = aoh.getAppearanceStream("/N"); + bool is_widget = (aoh.getSubtype() == "/Widget"); + bool process = true; + if (need_appearances && is_widget) { + process = false; + } + if (process && as.isStream()) { + if (is_widget) { + QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh); + QPDFObjectHandle as_resources = as.getDict().getKey("/Resources"); + if (as_resources.isIndirect()) { + ; + as.getDict().replaceKey("/Resources", as_resources.shallowCopy()); + as_resources = as.getDict().getKey("/Resources"); + } + as_resources.mergeResources(ff.getDefaultResources()); + } else { + QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation"); + } + std::string name = resources.getUniqueResourceName("/Fxo", next_fx); + std::string content = + aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags); + if (!content.empty()) { + resources.mergeResources("<< /XObject << >> >>"_qpdf); + resources.getKey("/XObject").replaceKey(name, as); + ++next_fx; + } + new_content += content; + } else if (process && !aoh.getAppearanceDictionary().null()) { + // If an annotation has no selected appearance stream, just drop the annotation when + // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows + // associated with comments that aren't visible, and other types of annotations that + // aren't visible. Annotations that have no appearance streams at all, such as Link, + // Popup, and Projection, should be preserved. + } else { + new_annots.push_back(aoh.getObjectHandle()); + } + } + if (new_annots.size() != annots.size()) { + QPDFObjectHandle page_oh = page.getObjectHandle(); + if (new_annots.empty()) { + page_oh.removeKey("/Annots"); + } else { + QPDFObjectHandle old_annots = page_oh.getKey("/Annots"); + QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots); + if (old_annots.isIndirect()) { + qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh); + } else { + page_oh.replaceKey("/Annots", new_annots_oh); + } + } + page.addPageContents(qpdf.newStream("q\n"), true); + page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false); + } +} diff --git a/libqpdf/qpdf-c.cc b/libqpdf/qpdf-c.cc index a491218..5740cba 100644 --- a/libqpdf/qpdf-c.cc +++ b/libqpdf/qpdf-c.cc @@ -1,6 +1,6 @@ #include -#include +#include #include #include @@ -1804,10 +1804,9 @@ qpdf_oh_replace_stream_data( int qpdf_get_num_pages(qpdf_data qpdf) { - QTC::TC("qpdf", "qpdf-c called qpdf_num_pages"); int n = -1; QPDF_ERROR_CODE code = - trap_errors(qpdf, [&n](qpdf_data q) { n = QIntC::to_int(q->qpdf->getAllPages().size()); }); + trap_errors(qpdf, [&n](qpdf_data q) { n = QIntC::to_int(q->qpdf->doc().pages().size()); }); if (code & QPDF_ERRORS) { return -1; } @@ -1817,10 +1816,10 @@ qpdf_get_num_pages(qpdf_data qpdf) qpdf_oh qpdf_get_page_n(qpdf_data qpdf, size_t i) { - QTC::TC("qpdf", "qpdf-c called qpdf_get_page_n"); qpdf_oh result = 0; - QPDF_ERROR_CODE code = trap_errors( - qpdf, [&result, i](qpdf_data q) { result = new_object(q, q->qpdf->getAllPages().at(i)); }); + QPDF_ERROR_CODE code = trap_errors(qpdf, [&result, i](qpdf_data q) { + result = new_object(q, q->qpdf->doc().pages().all().at(i)); + }); if ((code & QPDF_ERRORS) || (result == 0)) { return qpdf_oh_new_uninitialized(qpdf); } diff --git a/libqpdf/qpdf/QPDFObjectHandle_private.hh b/libqpdf/qpdf/QPDFObjectHandle_private.hh index 6cf3424..3e82bca 100644 --- a/libqpdf/qpdf/QPDFObjectHandle_private.hh +++ b/libqpdf/qpdf/QPDFObjectHandle_private.hh @@ -305,10 +305,10 @@ namespace qpdf explicit Integer(std::integral auto value) : Integer(static_cast(value)) { - if constexpr ( - std::numeric_limits::max() > - std::numeric_limits::max()) { - if (value > std::numeric_limits::max()) { + if constexpr (std::cmp_greater( + std::numeric_limits::max(), + std::numeric_limits::max())) { + if (std::cmp_greater(value, std::numeric_limits::max())) { throw std::overflow_error("overflow constructing Integer"); } } diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 5c15132..ed426aa 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -295,491 +295,65 @@ class QPDF::PatternFinder final: public InputSource::Finder class QPDF::Doc { public: + class Encryption; class JobSetter; + class Linearization; + class Objects; + class Pages; class ParseGuard; class Resolver; class Writer; - class Encryption + // This is the common base-class for all document components. It is used by the other document + // components to access common functionality. It is not meant to be used directly by the user. + class Common { public: - // This class holds data read from the encryption dictionary. - Encryption( - int V, - int R, - int Length_bytes, - int P, - std::string const& O, - std::string const& U, - std::string const& OE, - std::string const& UE, - std::string const& Perms, - std::string const& id1, - bool encrypt_metadata) : - V(V), - R(R), - Length_bytes(Length_bytes), - P(static_cast(P)), - O(O), - U(U), - OE(OE), - UE(UE), - Perms(Perms), - id1(id1), - encrypt_metadata(encrypt_metadata) - { - } - Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) : - V(V), - R(R), - Length_bytes(Length_bytes), - encrypt_metadata(encrypt_metadata) - { - } - - int getV() const; - int getR() const; - int getLengthBytes() const; - int getP() const; - // Bits in P are numbered from 1 as in the PDF spec. - bool getP(size_t bit) const; - std::string const& getO() const; - std::string const& getU() const; - std::string const& getOE() const; - std::string const& getUE() const; - std::string const& getPerms() const; - std::string const& getId1() const; - bool getEncryptMetadata() const; - // Bits in P are numbered from 1 as in the PDF spec. - void setP(size_t bit, bool val); - void setP(unsigned long val); - void setO(std::string const&); - void setU(std::string const&); - void setId1(std::string const& val); - void setV5EncryptionParameters( - std::string const& O, - std::string const& OE, - std::string const& U, - std::string const& UE, - std::string const& Perms); - - std::string compute_encryption_key(std::string const& password) const; - - bool - check_owner_password(std::string& user_password, std::string const& owner_password) const; - - bool check_user_password(std::string const& user_password) const; - - std::string - recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const; - - void compute_encryption_O_U(char const* user_password, char const* owner_password); - - std::string - compute_encryption_parameters_V5(char const* user_password, char const* owner_password); - - std::string compute_parameters(char const* user_password, char const* owner_password); + Common() = delete; + Common(Common const&) = default; + Common(Common&&) = delete; + Common& operator=(Common const&) = delete; + Common& operator=(Common&&) = delete; + ~Common() = default; + + inline Common(QPDF& qpdf, QPDF::Members* m); + + void stopOnError(std::string const& message); + void warn(QPDFExc const& e); + void warn( + qpdf_error_code_e error_code, + std::string const& object, + qpdf_offset_t offset, + std::string const& message); - private: - static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest) - - Encryption(Encryption const&) = delete; - Encryption& operator=(Encryption const&) = delete; - - std::string hash_V5( - std::string const& password, std::string const& salt, std::string const& udata) const; - std::string - compute_O_value(std::string const& user_password, std::string const& owner_password) const; - std::string compute_U_value(std::string const& user_password) const; - std::string compute_encryption_key_from_password(std::string const& password) const; - std::string recover_encryption_key_with_password(std::string const& password) const; - bool check_owner_password_V4( - std::string& user_password, std::string const& owner_password) const; - bool check_owner_password_V5(std::string const& owner_passworda) const; - std::string compute_Perms_value_V5_clear() const; - std::string compute_O_rc4_key( - std::string const& user_password, std::string const& owner_password) const; - std::string compute_U_value_R2(std::string const& user_password) const; - std::string compute_U_value_R3(std::string const& user_password) const; - bool check_user_password_V4(std::string const& user_password) const; - bool check_user_password_V5(std::string const& user_password) const; - - int V; - int R; - int Length_bytes; - std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared. - std::string O; - std::string U; - std::string OE; - std::string UE; - std::string Perms; - std::string id1; - bool encrypt_metadata; - }; // class QPDF::Doc::Encryption - - class Linearization - { - public: - Linearization() = delete; - Linearization(Linearization const&) = delete; - Linearization(Linearization&&) = delete; - Linearization& operator=(Linearization const&) = delete; - Linearization& operator=(Linearization&&) = delete; - ~Linearization() = default; - - Linearization(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m) - { - } + static QPDFExc damagedPDF( + InputSource& input, + std::string const& object, + qpdf_offset_t offset, + std::string const& message); + QPDFExc + damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) const; + QPDFExc damagedPDF( + std::string const& object, qpdf_offset_t offset, std::string const& message) const; + QPDFExc damagedPDF(std::string const& object, std::string const& message) const; + QPDFExc damagedPDF(qpdf_offset_t offset, std::string const& message) const; + QPDFExc damagedPDF(std::string const& message) const; - // For QPDFWriter: - - template - void optimize_internal( - T const& object_stream_data, - bool allow_changes = true, - std::function skip_stream_parameters = nullptr); - void optimize( - QPDFWriter::ObjTable const& obj, - std::function skip_stream_parameters); - - // Get lists of all objects in order according to the part of a linearized file that they - // belong to. - void getLinearizedParts( - QPDFWriter::ObjTable const& obj, - std::vector& part4, - std::vector& part6, - std::vector& part7, - std::vector& part8, - std::vector& part9); - - void generateHintStream( - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj, - std::string& hint_stream, - int& S, - int& O, - bool compressed); - - // methods to support linearization checking -- implemented in QPDF_linearization.cc - - void readLinearizationData(); - void checkLinearizationInternal(); - void dumpLinearizationDataInternal(); - void linearizationWarning(std::string_view); - qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length); - void readHPageOffset(BitStream); - void readHSharedObject(BitStream); - void readHGeneric(BitStream, HGeneric&); - qpdf_offset_t maxEnd(ObjUser const& ou); - qpdf_offset_t getLinearizationOffset(QPDFObjGen); - QPDFObjectHandle - getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); - int lengthNextN(int first_object, int n); - void checkHPageOffset( - std::vector const& pages, std::map& idx_to_obj); - void checkHSharedObject( - std::vector const& pages, std::map& idx_to_obj); - void checkHOutlines(); - void dumpHPageOffset(); - void dumpHSharedObject(); - void dumpHGeneric(HGeneric&); - qpdf_offset_t adjusted_offset(qpdf_offset_t offset); - template - void calculateLinearizationData(T const& object_stream_data); - template - void pushOutlinesToPart( - std::vector& part, - std::set& lc_outlines, - T const& object_stream_data); - int outputLengthNextN( - int in_object, - int n, - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj); - void calculateHPageOffset( - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); - void calculateHSharedObject( - QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); void - calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); - void writeHPageOffset(BitWriter&); - void writeHSharedObject(BitWriter&); - void writeHGeneric(BitWriter&, HGeneric&); - - // Methods to support optimization - - void updateObjectMaps( - ObjUser const& ou, - QPDFObjectHandle oh, - std::function skip_stream_parameters); - void filterCompressedObjects(std::map const& object_stream_data); - void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); - - private: - QPDF& qpdf; - QPDF::Members* m; - }; - - class Objects - { - public: - class Foreign + no_ci_stop_if( + bool condition, std::string const& message, std::string const& context = {}) const { - class Copier - { - public: - Copier(QPDF& qpdf) : - qpdf(qpdf) - { - } - - QPDFObjectHandle copied(QPDFObjectHandle const& foreign); - - private: - QPDFObjectHandle - replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false); - void reserve_objects(QPDFObjectHandle const& foreign, bool top = false); - - QPDF& qpdf; - std::map object_map; - std::vector to_copy; - QPDFObjGen::set visiting; - }; - - public: - Foreign(QPDF& qpdf) : - qpdf(qpdf) - { - } - - Foreign() = delete; - Foreign(Foreign const&) = delete; - Foreign(Foreign&&) = delete; - Foreign& operator=(Foreign const&) = delete; - Foreign& operator=(Foreign&&) = delete; - ~Foreign() = default; - - // Return a local handle to the foreign object. Copy the foreign object if necessary. - QPDFObjectHandle - copied(QPDFObjectHandle const& foreign) - { - return copier(foreign).copied(foreign); + if (condition) { + throw damagedPDF(context, message); } - - private: - Copier& copier(QPDFObjectHandle const& foreign); - - QPDF& qpdf; - std::map copiers; - }; // class QPDF::Doc::Objects::Foreign - - class Streams - { - // Copier manages the copying of streams into this PDF. It is used both for copying - // local and foreign streams. - class Copier; - - public: - Streams(QPDF& qpdf); - - Streams() = delete; - Streams(Streams const&) = delete; - Streams(Streams&&) = delete; - Streams& operator=(Streams const&) = delete; - Streams& operator=(Streams&&) = delete; - ~Streams() = default; - - public: - static bool - pipeStreamData( - QPDF* qpdf, - QPDFObjGen og, - qpdf_offset_t offset, - size_t length, - QPDFObjectHandle dict, - bool is_root_metadata, - Pipeline* pipeline, - bool suppress_warnings, - bool will_retry) - { - return qpdf->pipeStreamData( - og, - offset, - length, - dict, - is_root_metadata, - pipeline, - suppress_warnings, - will_retry); - } - - QPDF& - qpdf() const - { - return qpdf_; - } - - std::shared_ptr& - copier() - { - return copier_; - } - - bool immediate_copy_from() const; - - private: - QPDF& qpdf_; - - std::shared_ptr copier_; - }; // class QPDF::Doc::Objects::Streams - - public: - Objects() = delete; - Objects(Objects const&) = delete; - Objects(Objects&&) = delete; - Objects& operator=(Objects const&) = delete; - Objects& operator=(Objects&&) = delete; - ~Objects() = default; - - Objects(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m), - foreign_(qpdf), - streams_(qpdf) - { - } - - Foreign& - foreign() - { - return foreign_; - } - - Streams& - streams() - { - return streams_; } - void parse(char const* password); - std::shared_ptr const& resolve(QPDFObjGen og); - void inParse(bool); - QPDFObjGen nextObjGen(); - QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr const&); - void updateCache( - QPDFObjGen og, - std::shared_ptr const& object, - qpdf_offset_t end_before_space, - qpdf_offset_t end_after_space, - bool destroy = true); - bool resolveXRefTable(); - QPDFObjectHandle readObjectAtOffset( - qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref); - QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0); - QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr const& obj); - std::shared_ptr getObjectForParser(int id, int gen, bool parse_pdf); - std::shared_ptr getObjectForJSON(int id, int gen); - size_t tableSize(); - - // For QPDFWriter: - - std::map const& getXRefTableInternal(); - // Get a list of objects that would be permitted in an object stream. - template - std::vector getCompressibleObjGens(); - std::vector getCompressibleObjVector(); - std::vector getCompressibleObjSet(); - - private: - void setTrailer(QPDFObjectHandle obj); - void reconstruct_xref(QPDFExc& e, bool found_startxref = true); - void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); - bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); - bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); - bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); - qpdf_offset_t read_xrefTable(qpdf_offset_t offset); - qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); - qpdf_offset_t processXRefStream( - qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); - std::pair> - processXRefW(QPDFObjectHandle& dict, std::function damaged); - int processXRefSize( - QPDFObjectHandle& dict, - int entry_size, - std::function damaged); - std::pair>> processXRefIndex( - QPDFObjectHandle& dict, - int max_num_entries, - std::function damaged); - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); - void insertFreeXrefEntry(QPDFObjGen); - QPDFObjectHandle readTrailer(); - QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); - void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); - void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); - QPDFObjectHandle - readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); - size_t recoverStreamLength( - std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); - - QPDFObjGen read_object_start(qpdf_offset_t offset); - void readObjectAtOffset( - bool attempt_recovery, - qpdf_offset_t offset, - std::string const& description, - QPDFObjGen exp_og); - void resolveObjectsInStream(int obj_stream_number); - bool isCached(QPDFObjGen og); - bool isUnresolved(QPDFObjGen og); - void setLastObjectDescription(std::string const& description, QPDFObjGen og); - + protected: QPDF& qpdf; QPDF::Members* m; - Foreign foreign_; - Streams streams_; - }; // class QPDF::Doc::Objects - - // This class is used to represent a PDF Pages tree. - class Pages - { - public: - Pages() = delete; - Pages(Pages const&) = delete; - Pages(Pages&&) = delete; - Pages& operator=(Pages const&) = delete; - Pages& operator=(Pages&&) = delete; - ~Pages() = default; - - Pages(QPDF& qpdf, QPDF::Members* m) : - qpdf(qpdf), - m(m) - { - } - - void getAllPagesInternal( - QPDFObjectHandle cur_pages, - QPDFObjGen::set& visited, - QPDFObjGen::set& seen, - bool media_box, - bool resources); - void insertPage(QPDFObjectHandle newpage, int pos); - void flattenPagesTree(); - void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); - void pushInheritedAttributesToPageInternal( - QPDFObjectHandle, - std::map>&, - bool allow_changes, - bool warn_skipped_keys); - - private: - QPDF& qpdf; - QPDF::Members* m; - }; // class QPDF::Doc::Pages + QPDF::Doc::Pages& pages; + }; Doc() = delete; Doc(Doc const&) = delete; @@ -788,32 +362,17 @@ class QPDF::Doc Doc& operator=(Doc&&) = delete; ~Doc() = default; - Doc(QPDF& qpdf, QPDF::Members& m) : + Doc(QPDF& qpdf, QPDF::Members* m) : qpdf(qpdf), - m(m), - lin_(qpdf, &m), - objects_(qpdf, &m), - pages_(qpdf, &m) + m(m) { } - Linearization& - linearization() - { - return lin_; - }; + inline Linearization& linearization(); - Objects& - objects() - { - return objects_; - }; + inline Objects& objects(); - Pages& - pages() - { - return pages_; - } + inline Pages& pages(); bool reconstructed_xref() const; @@ -864,11 +423,7 @@ class QPDF::Doc private: QPDF& qpdf; - QPDF::Members& m; - - Linearization lin_; - Objects objects_; - Pages pages_; + QPDF::Members* m; // Document Helpers; std::unique_ptr acroform_; @@ -878,7 +433,528 @@ class QPDF::Doc std::unique_ptr page_labels_; }; -class QPDF::Members +class QPDF::Doc::Encryption +{ + public: + // This class holds data read from the encryption dictionary. + Encryption( + int V, + int R, + int Length_bytes, + int P, + std::string const& O, + std::string const& U, + std::string const& OE, + std::string const& UE, + std::string const& Perms, + std::string const& id1, + bool encrypt_metadata) : + V(V), + R(R), + Length_bytes(Length_bytes), + P(static_cast(P)), + O(O), + U(U), + OE(OE), + UE(UE), + Perms(Perms), + id1(id1), + encrypt_metadata(encrypt_metadata) + { + } + Encryption(int V, int R, int Length_bytes, bool encrypt_metadata) : + V(V), + R(R), + Length_bytes(Length_bytes), + encrypt_metadata(encrypt_metadata) + { + } + + int getV() const; + int getR() const; + int getLengthBytes() const; + int getP() const; + // Bits in P are numbered from 1 as in the PDF spec. + bool getP(size_t bit) const; + std::string const& getO() const; + std::string const& getU() const; + std::string const& getOE() const; + std::string const& getUE() const; + std::string const& getPerms() const; + std::string const& getId1() const; + bool getEncryptMetadata() const; + // Bits in P are numbered from 1 as in the PDF spec. + void setP(size_t bit, bool val); + void setP(unsigned long val); + void setO(std::string const&); + void setU(std::string const&); + void setId1(std::string const& val); + void setV5EncryptionParameters( + std::string const& O, + std::string const& OE, + std::string const& U, + std::string const& UE, + std::string const& Perms); + + std::string compute_encryption_key(std::string const& password) const; + + bool check_owner_password(std::string& user_password, std::string const& owner_password) const; + + bool check_user_password(std::string const& user_password) const; + + std::string + recover_encryption_key_with_password(std::string const& password, bool& perms_valid) const; + + void compute_encryption_O_U(char const* user_password, char const* owner_password); + + std::string + compute_encryption_parameters_V5(char const* user_password, char const* owner_password); + + std::string compute_parameters(char const* user_password, char const* owner_password); + + private: + static constexpr unsigned int OU_key_bytes_V4 = 16; // ( == sizeof(MD5::Digest) + + Encryption(Encryption const&) = delete; + Encryption& operator=(Encryption const&) = delete; + + std::string + hash_V5(std::string const& password, std::string const& salt, std::string const& udata) const; + std::string + compute_O_value(std::string const& user_password, std::string const& owner_password) const; + std::string compute_U_value(std::string const& user_password) const; + std::string compute_encryption_key_from_password(std::string const& password) const; + std::string recover_encryption_key_with_password(std::string const& password) const; + bool + check_owner_password_V4(std::string& user_password, std::string const& owner_password) const; + bool check_owner_password_V5(std::string const& owner_passworda) const; + std::string compute_Perms_value_V5_clear() const; + std::string + compute_O_rc4_key(std::string const& user_password, std::string const& owner_password) const; + std::string compute_U_value_R2(std::string const& user_password) const; + std::string compute_U_value_R3(std::string const& user_password) const; + bool check_user_password_V4(std::string const& user_password) const; + bool check_user_password_V5(std::string const& user_password) const; + + int V; + int R; + int Length_bytes; + std::bitset<32> P{0xfffffffc}; // Specification always requires bits 1 and 2 to be cleared. + std::string O; + std::string U; + std::string OE; + std::string UE; + std::string Perms; + std::string id1; + bool encrypt_metadata; +}; // class QPDF::Doc::Encryption + +class QPDF::Doc::Linearization: Common +{ + public: + Linearization() = delete; + Linearization(Linearization const&) = delete; + Linearization(Linearization&&) = delete; + Linearization& operator=(Linearization const&) = delete; + Linearization& operator=(Linearization&&) = delete; + ~Linearization() = default; + + Linearization(Doc& doc) : + Common(doc.qpdf, doc.m) + { + } + + // For QPDFWriter: + + template + void optimize_internal( + T const& object_stream_data, + bool allow_changes = true, + std::function skip_stream_parameters = nullptr); + void optimize( + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters); + + // Get lists of all objects in order according to the part of a linearized file that they + // belong to. + void getLinearizedParts( + QPDFWriter::ObjTable const& obj, + std::vector& part4, + std::vector& part6, + std::vector& part7, + std::vector& part8, + std::vector& part9); + + void generateHintStream( + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, + std::string& hint_stream, + int& S, + int& O, + bool compressed); + + // methods to support linearization checking -- implemented in QPDF_linearization.cc + + void readLinearizationData(); + void checkLinearizationInternal(); + void dumpLinearizationDataInternal(); + void linearizationWarning(std::string_view); + qpdf::Dictionary readHintStream(Pipeline&, qpdf_offset_t offset, size_t length); + void readHPageOffset(BitStream); + void readHSharedObject(BitStream); + void readHGeneric(BitStream, HGeneric&); + qpdf_offset_t maxEnd(ObjUser const& ou); + qpdf_offset_t getLinearizationOffset(QPDFObjGen); + QPDFObjectHandle + getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); + int lengthNextN(int first_object, int n); + void + checkHPageOffset(std::vector const& pages, std::map& idx_to_obj); + void + checkHSharedObject(std::vector const& pages, std::map& idx_to_obj); + void checkHOutlines(); + void dumpHPageOffset(); + void dumpHSharedObject(); + void dumpHGeneric(HGeneric&); + qpdf_offset_t adjusted_offset(qpdf_offset_t offset); + template + void calculateLinearizationData(T const& object_stream_data); + template + void pushOutlinesToPart( + std::vector& part, + std::set& lc_outlines, + T const& object_stream_data); + int outputLengthNextN( + int in_object, + int n, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj); + void + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void writeHPageOffset(BitWriter&); + void writeHSharedObject(BitWriter&); + void writeHGeneric(BitWriter&, HGeneric&); + + // Methods to support optimization + + void updateObjectMaps( + ObjUser const& ou, + QPDFObjectHandle oh, + std::function skip_stream_parameters); + void filterCompressedObjects(std::map const& object_stream_data); + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); +}; + +class QPDF::Doc::Objects: Common +{ + public: + class Foreign: Common + { + class Copier: Common + { + public: + inline Copier(QPDF& qpdf); + + QPDFObjectHandle copied(QPDFObjectHandle const& foreign); + + private: + QPDFObjectHandle + replace_indirect_object(QPDFObjectHandle const& foreign, bool top = false); + void reserve_objects(QPDFObjectHandle const& foreign, bool top = false); + + std::map object_map; + std::vector to_copy; + QPDFObjGen::set visiting; + }; + + public: + Foreign(Common& common) : + Common(common) + { + } + + Foreign() = delete; + Foreign(Foreign const&) = delete; + Foreign(Foreign&&) = delete; + Foreign& operator=(Foreign const&) = delete; + Foreign& operator=(Foreign&&) = delete; + ~Foreign() = default; + + // Return a local handle to the foreign object. Copy the foreign object if necessary. + QPDFObjectHandle + copied(QPDFObjectHandle const& foreign) + { + return copier(foreign).copied(foreign); + } + + private: + Copier& copier(QPDFObjectHandle const& foreign); + + std::map copiers; + }; // class QPDF::Doc::Objects::Foreign + + class Streams: Common + { + // Copier manages the copying of streams into this PDF. It is used both for copying + // local and foreign streams. + class Copier; + + public: + Streams(Common& common); + + Streams() = delete; + Streams(Streams const&) = delete; + Streams(Streams&&) = delete; + Streams& operator=(Streams const&) = delete; + Streams& operator=(Streams&&) = delete; + ~Streams() = default; + + public: + static bool + pipeStreamData( + QPDF* qpdf, + QPDFObjGen og, + qpdf_offset_t offset, + size_t length, + QPDFObjectHandle dict, + bool is_root_metadata, + Pipeline* pipeline, + bool suppress_warnings, + bool will_retry) + { + return qpdf->pipeStreamData( + og, + offset, + length, + dict, + is_root_metadata, + pipeline, + suppress_warnings, + will_retry); + } + + std::shared_ptr& + copier() + { + return copier_; + } + + bool immediate_copy_from() const; + + private: + std::shared_ptr copier_; + }; // class QPDF::Doc::Objects::Streams + + public: + Objects() = delete; + Objects(Objects const&) = delete; + Objects(Objects&&) = delete; + Objects& operator=(Objects const&) = delete; + Objects& operator=(Objects&&) = delete; + ~Objects() = default; + + Objects(Doc& doc) : + Common(doc.qpdf, doc.m), + foreign_(*this), + streams_(*this) + { + } + + Foreign& + foreign() + { + return foreign_; + } + + Streams& + streams() + { + return streams_; + } + + void parse(char const* password); + std::shared_ptr const& resolve(QPDFObjGen og); + void inParse(bool); + QPDFObjGen nextObjGen(); + QPDFObjectHandle newIndirect(QPDFObjGen, std::shared_ptr const&); + void updateCache( + QPDFObjGen og, + std::shared_ptr const& object, + qpdf_offset_t end_before_space, + qpdf_offset_t end_after_space, + bool destroy = true); + bool resolveXRefTable(); + QPDFObjectHandle readObjectAtOffset( + qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref); + QPDFTokenizer::Token readToken(InputSource& input, size_t max_len = 0); + QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr const& obj); + std::shared_ptr getObjectForParser(int id, int gen, bool parse_pdf); + std::shared_ptr getObjectForJSON(int id, int gen); + size_t tableSize(); + + // For QPDFWriter: + + std::map const& getXRefTableInternal(); + // Get a list of objects that would be permitted in an object stream. + template + std::vector getCompressibleObjGens(); + std::vector getCompressibleObjVector(); + std::vector getCompressibleObjSet(); + + private: + void setTrailer(QPDFObjectHandle obj); + void reconstruct_xref(QPDFExc& e, bool found_startxref = true); + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); + qpdf_offset_t read_xrefTable(qpdf_offset_t offset); + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery = false); + qpdf_offset_t processXRefStream( + qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery = false); + std::pair> + processXRefW(QPDFObjectHandle& dict, std::function damaged); + int processXRefSize( + QPDFObjectHandle& dict, int entry_size, std::function damaged); + std::pair>> processXRefIndex( + QPDFObjectHandle& dict, + int max_num_entries, + std::function damaged); + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); + void insertFreeXrefEntry(QPDFObjGen); + QPDFObjectHandle readTrailer(); + QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); + void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); + void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); + size_t recoverStreamLength( + std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); + + QPDFObjGen read_object_start(qpdf_offset_t offset); + void readObjectAtOffset( + bool attempt_recovery, + qpdf_offset_t offset, + std::string const& description, + QPDFObjGen exp_og); + void resolveObjectsInStream(int obj_stream_number); + bool isCached(QPDFObjGen og); + bool isUnresolved(QPDFObjGen og); + void setLastObjectDescription(std::string const& description, QPDFObjGen og); + + Foreign foreign_; + Streams streams_; +}; // class QPDF::Doc::Objects + +// This class is used to represent a PDF Pages tree. +class QPDF::Doc::Pages: Common +{ + public: + using iterator = std::vector::const_iterator; + + Pages() = delete; + Pages(Pages const&) = delete; + Pages(Pages&&) = delete; + Pages& operator=(Pages const&) = delete; + Pages& operator=(Pages&&) = delete; + ~Pages() = default; + + Pages(Doc& doc) : + Common(doc.qpdf, doc.m) + { + } + + std::vector const& + all() + { + return !all_pages.empty() ? all_pages : cache(); + } + + bool + empty() + { + return all().empty(); + } + + size_t + size() + { + return all().size(); + } + + iterator + begin() + { + return all().cbegin(); + } + + iterator + end() + { + return all().cend(); + } + + int find(QPDFObjGen og); + void insert(QPDFObjectHandle newpage, int pos); + void + insert(QPDFObjectHandle const& newpage, size_t pos) + { + insert(newpage, static_cast(pos)); + } + void erase(QPDFObjectHandle& page); + void update_cache(); + void flatten_annotations(int required_flags, int forbidden_flags); + + bool + ever_pushed_inherited_attributes_to_pages() const + { + return ever_pushed_inherited_attributes_to_pages_; + } + + bool + ever_called_get_all_pages() const + { + return ever_called_get_all_pages_; + } + + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); + + private: + void flattenPagesTree(); + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); + void pushInheritedAttributesToPageInternal( + QPDFObjectHandle, + std::map>&, + bool allow_changes, + bool warn_skipped_keys); + std::vector const& cache(); + void getAllPagesInternal( + QPDFObjectHandle cur_pages, + QPDFObjGen::set& visited, + QPDFObjGen::set& seen, + bool media_box, + bool resources); + void flatten_annotations_for_page( + QPDFPageObjectHelper& page, + QPDFObjectHandle& resources, + QPDFAcroFormDocumentHelper& afdh, + int required_flags, + int forbidden_flags); + + std::vector all_pages; + std::map pageobj_to_pages_pos; + + bool pushed_inherited_attributes_to_pages{false}; + bool invalid_page_found{false}; + bool ever_pushed_inherited_attributes_to_pages_{false}; + bool ever_called_get_all_pages_{false}; + +}; // class QPDF::Doc::Pages + +class QPDF::Members: Doc { friend class QPDF; friend class ResolveRecorder; @@ -889,10 +965,10 @@ class QPDF::Members ~Members() = default; private: - Doc doc; - Doc::Linearization& lin; - Doc::Objects& objects; - Doc::Pages& pages; + Doc::Common c; + Doc::Linearization lin; + Doc::Objects objects; + Doc::Pages pages; std::shared_ptr log; unsigned long long unique_id{0}; qpdf::Tokenizer tokenizer; @@ -915,12 +991,6 @@ class QPDF::Members std::map obj_cache; std::set resolving; QPDFObjectHandle trailer; - std::vector all_pages; - bool invalid_page_found{false}; - std::map pageobj_to_pages_pos; - bool pushed_inherited_attributes_to_pages{false}; - bool ever_pushed_inherited_attributes_to_pages{false}; - bool ever_called_get_all_pages{false}; std::vector warnings; bool reconstructed_xref{false}; bool in_read_xref_stream{false}; @@ -978,25 +1048,46 @@ class QPDF::Doc::Resolver } }; +inline QPDF::Doc::Common::Common(QPDF& qpdf, QPDF::Members* m) : + qpdf(qpdf), + m(m), + pages(m->pages) +{ +} + +inline QPDF::Doc::Linearization& +QPDF::Doc::linearization() +{ + return m->lin; +}; + +inline QPDF::Doc::Objects& +QPDF::Doc::objects() +{ + return m->objects; +}; + +inline QPDF::Doc::Pages& +QPDF::Doc::pages() +{ + return m->pages; +} + inline bool QPDF::Doc::reconstructed_xref() const { - return m.reconstructed_xref; + return m->reconstructed_xref; } inline QPDF::Doc& QPDF::doc() { - return m->doc; + return *m; } -// Throw a generic exception for unusual error conditions that do not be covered during CI testing. -inline void -QPDF::no_ci_stop_if(bool condition, std::string const& message, std::string const& context) +inline QPDF::Doc::Objects::Foreign::Copier::Copier(QPDF& qpdf) : + Common(qpdf, qpdf.doc().m) { - if (condition) { - throw damagedPDF(context, message); - } } #endif // QPDF_PRIVATE_HH diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index d73deef..dfbc25c 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -224,14 +224,8 @@ QPDFAnnotationObjectHelper default matrix 0 QPDFAnnotationObjectHelper rotate 90 0 QPDFAnnotationObjectHelper rotate 180 0 QPDFAnnotationObjectHelper rotate 270 0 -QPDFPageDocumentHelper skip widget need appearances 0 -QPDFPageDocumentHelper merge DR 0 QPDFPageDocumentHelper non-widget annotation 0 -QPDFPageDocumentHelper remove annots 0 -QPDFPageDocumentHelper replace indirect annots 0 -QPDFPageDocumentHelper replace direct annots 0 QPDFObjectHandle replace with copy 0 -QPDFPageDocumentHelper indirect as resources 0 QPDFAnnotationObjectHelper forbidden flags 0 QPDFAnnotationObjectHelper missing required flags 0 QPDFFormFieldObjectHelper checkbox kid widget 0 @@ -255,7 +249,6 @@ QPDFJob auto-encode password 0 QPDFJob bytes fallback warning 0 QPDFJob invalid utf-8 in auto 0 QPDFJob input password hex-bytes 0 -QPDFPageDocumentHelper ignore annotation with no appearance 0 QPDFFormFieldObjectHelper replaced BMC at EOF 0 QPDFFormFieldObjectHelper fallback Tf 0 QPDFPageObjectHelper copy shared attribute 1 @@ -383,8 +376,6 @@ qpdf-c warn about oh error 1 qpdf-c cleanup warned about unhandled error 0 qpdf-c called qpdf_get_object_by_id 0 qpdf-c called qpdf_replace_object 0 -qpdf-c called qpdf_num_pages 0 -qpdf-c called qpdf_get_page_n 0 qpdf-c called qpdf_update_all_pages_cache 0 qpdf-c called qpdf_find_page_by_id 0 qpdf-c called qpdf_find_page_by_oh 0 @@ -422,14 +413,9 @@ qpdf-c called qpdf_empty_pdf 0 QPDF_json missing qpdf 0 QPDF_json missing pdf version 0 QPDF_json top-level scalar 0 -QPDF_json bad pdf version 0 QPDF_json top-level array 0 -QPDF_json bad object key 0 -QPDF_json trailer stream 0 QPDF_json missing trailer 0 QPDF_json missing objects 0 -QPDF_json ignoring in st_ignore 0 -QPDF_json stream dict not dict 0 QPDF_json unrecognized string value 0 QPDF_json data datafile both or neither 0 QPDF_json stream no dict 0 @@ -438,25 +424,13 @@ QPDF_json value stream both or neither 0 QPDFJob need json-stream-prefix for stdout 0 QPDFJob write json to stdout 0 QPDFJob write json to file 0 -QPDF_json ignoring unknown top-level key 0 -QPDF_json ignore second-level key 0 -QPDF_json ignore unknown key in object_top 0 -QPDF_json ignore unknown key in trailer 0 -QPDF_json ignore unknown key in stream 0 QPDF_json data and datafile 0 QPDF_json no stream data in update mode 0 QPDF_json updating existing stream 0 -QPDF_json qpdf not array 0 QPDF_json more than two qpdf elements 0 QPDF_json missing json version 0 -QPDF_json bad json version 0 -QPDF_json bad calledgetallpages 0 -QPDF_json bad pushedinheritedpageresources 0 QPDFPageObjectHelper used fallback without copying 0 QPDF skipping cache for known unchecked object 0 QPDF recover xref stream 0 QPDFJob json over/under no file 0 QPDF_Array copy 1 -QPDF_json stream data not string 0 -QPDF_json stream datafile not string 0 -QPDF_json stream not a dictionary 0