From 8d36ca4d851ce62356a58f5455b23de5f84d8dbf Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 1 Dec 2025 19:36:24 +0000 Subject: [PATCH] Refactor `AcroForm`: centralize functionality within `AcroForm` class for improved modularity and reusability. --- libqpdf/QPDFAcroFormDocumentHelper.cc | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------- libqpdf/qpdf/AcroForm.hh | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 308 insertions(+), 46 deletions(-) diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 27b08dc..e1911c2 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -9,15 +9,15 @@ #include #include #include +#include #include #include using namespace qpdf; +using namespace qpdf::impl; using namespace std::literals; -using AcroForm = impl::AcroForm; - class QPDFAcroFormDocumentHelper::Members: public AcroForm { public: @@ -42,23 +42,41 @@ QPDFAcroFormDocumentHelper::get(QPDF& qpdf) void QPDFAcroFormDocumentHelper::validate(bool repair) { + m->validate(repair); +} + +void +AcroForm::validate(bool repair) +{ invalidateCache(); - m->analyze(); + analyze(); } void QPDFAcroFormDocumentHelper::invalidateCache() { - m->cache_valid_ = false; - m->fields_.clear(); - m->annotation_to_field_.clear(); - m->bad_fields_.clear(); - m->name_to_fields_.clear(); + m->invalidateCache(); +} + +void +AcroForm::invalidateCache() +{ + cache_valid_ = false; + fields_.clear(); + annotation_to_field_.clear(); + bad_fields_.clear(); + name_to_fields_.clear(); } bool QPDFAcroFormDocumentHelper::hasAcroForm() { + return m->hasAcroForm(); +} + +bool +AcroForm::hasAcroForm() +{ return qpdf.getRoot().hasKey("/AcroForm"); } @@ -76,19 +94,31 @@ AcroForm::getOrCreateAcroForm() void QPDFAcroFormDocumentHelper::addFormField(QPDFFormFieldObjectHelper ff) { - auto acroform = m->getOrCreateAcroForm(); + m->addFormField(ff); +} + +void +AcroForm::addFormField(QPDFFormFieldObjectHelper ff) +{ + auto acroform = getOrCreateAcroForm(); auto fields = acroform.getKey("/Fields"); if (!fields.isArray()) { fields = acroform.replaceKeyAndGetNew("/Fields", QPDFObjectHandle::newArray()); } fields.appendItem(ff.getObjectHandle()); - m->traverseField(ff.getObjectHandle(), {}, 0); + traverseField(ff.getObjectHandle(), {}, 0); } void QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector fields) { - m->analyze(); + m->addAndRenameFormFields(fields); +} + +void +AcroForm::addAndRenameFormFields(std::vector fields) +{ + analyze(); std::map renames; QPDFObjGen::set seen; for (std::list queue{fields.begin(), fields.end()}; !queue.empty(); @@ -139,6 +169,12 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector void QPDFAcroFormDocumentHelper::removeFormFields(std::set const& to_remove) { + m->removeFormFields(to_remove); +} + +void +AcroForm::removeFormFields(std::set const& to_remove) +{ auto acroform = qpdf.getRoot().getKey("/AcroForm"); if (!acroform.isDictionary()) { return; @@ -149,19 +185,19 @@ QPDFAcroFormDocumentHelper::removeFormFields(std::set const& to_remo } for (auto const& og: to_remove) { - auto it = m->fields_.find(og); - if (it != m->fields_.end()) { + auto it = fields_.find(og); + if (it != fields_.end()) { for (auto aoh: it->second.annotations) { - m->annotation_to_field_.erase(aoh.getObjectHandle().getObjGen()); + annotation_to_field_.erase(aoh.getObjectHandle().getObjGen()); } auto const& name = it->second.name; if (!name.empty()) { - m->name_to_fields_[name].erase(og); - if (m->name_to_fields_[name].empty()) { - m->name_to_fields_.erase(name); + name_to_fields_[name].erase(og); + if (name_to_fields_[name].empty()) { + name_to_fields_.erase(name); } } - m->fields_.erase(og); + fields_.erase(og); } } @@ -179,16 +215,28 @@ QPDFAcroFormDocumentHelper::removeFormFields(std::set const& to_remo void QPDFAcroFormDocumentHelper::setFormFieldName(QPDFFormFieldObjectHelper ff, std::string const& name) { + m->setFormFieldName(ff, name); +} + +void +AcroForm::setFormFieldName(QPDFFormFieldObjectHelper ff, std::string const& name) +{ ff.setFieldAttribute("/T", name); - m->traverseField(ff, ff["/Parent"], 0); + traverseField(ff, ff["/Parent"], 0); } std::vector QPDFAcroFormDocumentHelper::getFormFields() { - m->analyze(); + return m->getFormFields(); +} + +std::vector +AcroForm::getFormFields() +{ + analyze(); std::vector result; - for (auto const& [og, data]: m->fields_) { + for (auto const& [og, data]: fields_) { if (!data.annotations.empty()) { result.emplace_back(qpdf.getObject(og)); } @@ -199,10 +247,16 @@ QPDFAcroFormDocumentHelper::getFormFields() std::set QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name) { - m->analyze(); + return m->getFieldsWithQualifiedName(name); +} + +std::set +AcroForm::getFieldsWithQualifiedName(std::string const& name) +{ + analyze(); // Keep from creating an empty entry - auto iter = m->name_to_fields_.find(name); - if (iter != m->name_to_fields_.end()) { + auto iter = name_to_fields_.find(name); + if (iter != name_to_fields_.end()) { return iter->second; } return {}; @@ -211,11 +265,17 @@ QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name) std::vector QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h) { - m->analyze(); + return m->getAnnotationsForField(h); +} + +std::vector +AcroForm::getAnnotationsForField(QPDFFormFieldObjectHelper h) +{ + analyze(); std::vector result; QPDFObjGen og(h.getObjectHandle().getObjGen()); - if (m->fields_.contains(og)) { - result = m->fields_[og].annotations; + if (fields_.contains(og)) { + result = fields_[og].annotations; } return result; } @@ -235,7 +295,13 @@ AcroForm::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) std::vector QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) { - m->analyze(); + return m->getFormFieldsForPage(ph); +} + +std::vector +AcroForm::getFormFieldsForPage(QPDFPageObjectHelper ph) +{ + analyze(); QPDFObjGen::set todo; std::vector result; for (auto& annot: getWidgetAnnotationsForPage(ph)) { @@ -250,14 +316,20 @@ QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) QPDFFormFieldObjectHelper QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h) { + return m->getFieldForAnnotation(h); +} + +QPDFFormFieldObjectHelper +AcroForm::getFieldForAnnotation(QPDFAnnotationObjectHelper h) +{ QPDFObjectHandle oh = h.getObjectHandle(); if (!oh.isDictionaryOfType("", "/Widget")) { return Null::temp(); } - m->analyze(); + analyze(); QPDFObjGen og(oh.getObjGen()); - if (m->annotation_to_field_.contains(og)) { - return m->annotation_to_field_[og]; + if (annotation_to_field_.contains(og)) { + return annotation_to_field_[og]; } return Null::temp(); } @@ -412,6 +484,12 @@ AcroForm::traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, bool QPDFAcroFormDocumentHelper::getNeedAppearances() { + return m->getNeedAppearances(); +} + +bool +AcroForm::getNeedAppearances() +{ bool result = false; QPDFObjectHandle acroform = qpdf.getRoot().getKey("/AcroForm"); if (acroform.isDictionary() && acroform.getKey("/NeedAppearances").isBool()) { @@ -423,6 +501,12 @@ QPDFAcroFormDocumentHelper::getNeedAppearances() void QPDFAcroFormDocumentHelper::setNeedAppearances(bool val) { + m->setNeedAppearances(val); +} + +void +AcroForm::setNeedAppearances(bool val) +{ QPDFObjectHandle acroform = qpdf.getRoot().getKey("/AcroForm"); if (!acroform.isDictionary()) { qpdf.getRoot().warn( @@ -440,6 +524,12 @@ QPDFAcroFormDocumentHelper::setNeedAppearances(bool val) void QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() { + m->generateAppearancesIfNeeded(); +} + +void +AcroForm::generateAppearancesIfNeeded() +{ if (!getNeedAppearances()) { return; } @@ -466,6 +556,12 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() void QPDFAcroFormDocumentHelper::disableDigitalSignatures() { + m->disableDigitalSignatures(); +} + +void +AcroForm::disableDigitalSignatures() +{ qpdf.removeSecurityRestrictions(); std::set to_remove; auto fields = getFormFields(); @@ -744,7 +840,6 @@ QPDFAcroFormDocumentHelper::transformAnnotations( QPDF* from_qpdf, QPDFAcroFormDocumentHelper* from_afdh) { - Array old_annots = std::move(a_old_annots); if (!from_qpdf) { // Assume these are from the same QPDF. from_qpdf = &qpdf; @@ -752,6 +847,23 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } else if (from_qpdf != &qpdf && !from_afdh) { from_afdh = &QPDFAcroFormDocumentHelper::get(*from_qpdf); } + m->transformAnnotations( + a_old_annots, new_annots, new_fields, old_fields, cm, from_qpdf, from_afdh->m.get()); +} + +void +AcroForm::transformAnnotations( + QPDFObjectHandle a_old_annots, + std::vector& new_annots, + std::vector& new_fields, + std::set& old_fields, + QPDFMatrix const& cm, + QPDF* from_qpdf, + AcroForm* from_afdh) +{ + qpdf_expect(from_qpdf); + qpdf_expect(from_afdh); + Array old_annots = std::move(a_old_annots); const bool foreign = from_qpdf != &qpdf; // It's possible that we will transform annotations that don't include any form fields. This @@ -809,7 +921,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( // Ensure that we have a /DR that is an indirect // dictionary object. if (!acroform) { - acroform = m->getOrCreateAcroForm(); + acroform = getOrCreateAcroForm(); } dr = acroform["/DR"]; if (!dr) { @@ -874,7 +986,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } ++i; } - m->adjustInheritedFields( + adjustInheritedFields( obj, override_da, from_default_da, override_q, from_default_q); if (foreign) { // Lazily initialize our /DR and the conflict map. @@ -890,7 +1002,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( obj.replace("/DR", dr); } if (obj["/DA"].isString() && !dr_map.empty()) { - m->adjustDefaultAppearances(obj, dr_map); + adjustDefaultAppearances(obj, dr_map); } } } @@ -1037,7 +1149,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } Dictionary resources = dict["/Resources"]; if (!dr_map.empty() && resources) { - m->adjustAppearanceStream(stream, dr_map); + adjustAppearanceStream(stream, dr_map); } } auto rect = cm.transformRectangle(annot["/Rect"].getArrayAsRectangle()); @@ -1052,6 +1164,16 @@ QPDFAcroFormDocumentHelper::fixCopiedAnnotations( QPDFAcroFormDocumentHelper& from_afdh, std::set* added_fields) { + m->fixCopiedAnnotations(to_page, from_page, *from_afdh.m, added_fields); +} + +void +AcroForm::fixCopiedAnnotations( + QPDFObjectHandle to_page, + QPDFObjectHandle from_page, + AcroForm& from_afdh, + std::set* added_fields) +{ auto old_annots = from_page.getKey("/Annots"); if (old_annots.empty() || !old_annots.isArray()) { return; @@ -1066,7 +1188,7 @@ QPDFAcroFormDocumentHelper::fixCopiedAnnotations( new_fields, old_fields, QPDFMatrix(), - &(from_afdh.getQPDF()), + &(from_afdh.qpdf), &from_afdh); to_page.replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots)); diff --git a/libqpdf/qpdf/AcroForm.hh b/libqpdf/qpdf/AcroForm.hh index 7a32697..8f65d8c 100644 --- a/libqpdf/qpdf/AcroForm.hh +++ b/libqpdf/qpdf/AcroForm.hh @@ -11,6 +11,19 @@ class QPDFAnnotationObjectHelper; namespace qpdf::impl { + /// @class AcroForm + /// @brief Represents the interactive form dictionary and the interactive form tree within a + /// PDF document. + /// @par + /// The AcroForm class deals with interactive forms defined in section 12.7 of the PDF + /// specification. This defines a tree structure consisting of an interactive form or + /// `/AcroForm` dictionary (section 12.7.3) at its root. The attributes of the + /// `/AcroForm` dictionary are defined in table 224 of the PDF 2.0 / table 220 of the + /// PDF 1.7 specification. + /// @par + /// The nodes of the interactive forms tree are represented by the FormNode class. + /// + /// @since 12.3 class AcroForm: public Doc::Common { public: @@ -27,14 +40,61 @@ namespace qpdf::impl // We have to analyze up front. Otherwise, when we are adding annotations and fields, we // are in a temporarily unstable configuration where some widget annotations are not // reachable. - analyze(); + validate(); } - struct FieldData - { - std::vector annotations; - std::string name; - }; + // Re-validate the AcroForm structure. This is useful if you have modified the structure of + // the AcroForm dictionary in a way that would invalidate the cache. + // + // If repair is true, the document will be repaired if possible if the validation encounters + // errors. + void validate(bool repair = true); + + // This class lazily creates an internal cache of the mapping among form fields, + // annotations, and pages. Methods within this class preserve the validity of this cache. + // However, if you modify pages' annotation dictionaries, the document's /AcroForm + // dictionary, or any form fields manually in a way that alters the association between + // forms, fields, annotations, and pages, it may cause this cache to become invalid. This + // method marks the cache invalid and forces it to be regenerated the next time it is + // needed. + void invalidateCache(); + + bool hasAcroForm(); + + // Add a form field, initializing the document's AcroForm dictionary if needed, updating the + // cache if necessary. Note that if you are adding fields that are copies of other fields, + // this method may result in multiple fields existing with the same qualified name, which + // can have unexpected side effects. In that case, you should use addAndRenameFormFields() + // instead. + void addFormField(QPDFFormFieldObjectHelper); + + // Add a collection of form fields making sure that their fully qualified names don't + // conflict with already present form fields. Fields within the collection of new fields + // that have the same name as each other will continue to do so. + void addAndRenameFormFields(std::vector fields); + + // Remove fields from the fields array + void removeFormFields(std::set const&); + + // Set the name of a field, updating internal records of field names. Name should be UTF-8 + // encoded. + void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); + + // Return a vector of all terminal fields in a document. Terminal fields are fields that + // have no children that are also fields. Terminal fields may still have children that are + // annotations. Intermediate nodes in the fields tree are not included in this list, but you + // can still reach them through the getParent method of the field object helper. + std::vector getFormFields(); + + // Return all the form fields that have the given fully-qualified name and also have an + // explicit "/T" attribute. For this information to be accurate, any changes to field names + // must be done through setFormFieldName() above. + std::set getFieldsWithQualifiedName(std::string const& name); + + // Return the annotations associated with a terminal field. Note that in the case of a field + // having a single annotation, the underlying object will typically be the same as the + // underlying object for the field. + std::vector getAnnotationsForField(QPDFFormFieldObjectHelper); /// Retrieves a list of widget annotations for the specified page. /// @@ -50,6 +110,86 @@ namespace qpdf::impl std::vector getWidgetAnnotationsForPage(QPDFPageObjectHelper page); + // Return top-level form fields for a page. + std::vector getFormFieldsForPage(QPDFPageObjectHelper); + + // Return the terminal field that is associated with this annotation. If the annotation + // dictionary is merged with the field dictionary, the underlying object will be the same, + // but this is not always the case. Note that if you call this method with an annotation + // that is not a widget annotation, there will not be an associated field, and this method + // will return a helper associated with a null object (isNull() == true). + QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); + + // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return + // false as that is how PDF viewers are supposed to interpret it. + bool getNeedAppearances(); + + // Indicate whether appearance streams must be regenerated. If you modify a field value, you + // should call setNeedAppearances(true) unless you also generate an appearance stream for + // the corresponding annotation at the same time. If you generate appearance streams for all + // fields, you can call setNeedAppearances(false). If you use + // QPDFFormFieldObjectHelper::setV, it will automatically call this method unless you tell + // it not to. + void setNeedAppearances(bool); + + // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all + // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for + // generateAppearance for limitations. For checkbox and radio button fields, this code + // ensures that appearance state is consistent with the field's value and uses any + // pre-existing appearance streams. + void generateAppearancesIfNeeded(); + + // Disable Digital Signature Fields. Remove all digital signature fields from the document, + // leaving any annotation showing the content of the field intact. This also calls + // QPDF::removeSecurityRestrictions. + void disableDigitalSignatures(); + + // Note: this method works on all annotations, not just ones with associated fields. For + // each annotation in old_annots, apply the given transformation matrix to create a new + // annotation. New annotations are appended to new_annots. If the annotation is associated + // with a form field, a new form field is created that points to the new annotation and is + // appended to new_fields, and the old field is added to old_fields. + // + // old_annots may belong to a different QPDF object. In that case, you should pass in + // from_qpdf, and copyForeignObject will be called automatically. If this is the case, for + // efficiency, you may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the + // expensive process of creating one for each call to transformAnnotations. New fields and + // annotations are not added to the document or pages. You have to do that yourself after + // calling transformAnnotations. If this operation will leave orphaned fields behind, such + // as if you are replacing the old annotations with the new ones on the same page and the + // fields and annotations are not shared, you will also need to remove the old fields to + // prevent them from hanging around unreferenced. + void transformAnnotations( + QPDFObjectHandle old_annots, + std::vector& new_annots, + std::vector& new_fields, + std::set& old_fields, + QPDFMatrix const& cm, + QPDF* from_qpdf, + AcroForm* from_afdh); + + // Copy form fields and annotations from one page to another, allowing the from page to be + // in a different QPDF or in the same QPDF. This would typically be called after calling + // addPage to add field/annotation awareness. When just copying the page by itself, + // annotations end up being shared, and fields end up being omitted because there is no + // reference to the field from the page. This method ensures that each separate copy of a + // page has private annotations and that fields and annotations are properly updated to + // resolve conflicts that may occur from common resource and field names across documents. + // It is basically a wrapper around transformAnnotations that handles updating the receiving + // page. If new_fields is non-null, any newly created fields are added to it. + void fixCopiedAnnotations( + QPDFObjectHandle to_page, + QPDFObjectHandle from_page, + AcroForm& from_afdh, + std::set* new_fields = nullptr); + + private: + struct FieldData + { + std::vector annotations; + std::string name; + }; + /// Analyzes the AcroForm structure in the PDF document and updates the internal /// cache with the form fields and their corresponding widget annotations. /// @@ -374,7 +514,7 @@ namespace qpdf::impl std::string mapping_name() const; /// @brief Retrieves the field value (`/V` attribute) of a specified field, accounting for - /// inheritance through thehierarchy of ancestor nodes in the form field tree. + /// inheritance through the hierarchy of ancestor nodes in the form field tree. /// /// This function attempts to retrieve the `/V` attribute. If the `inherit` /// parameter is set to `true` and the `/V` is not found at the current level, the @@ -549,11 +689,11 @@ namespace qpdf::impl /// name. /// /// The method accesses the AcroForm dictionary within the root object of the PDF document. - /// If the the AcroForm dictionary contains the given field name, it retrieves the + /// If the AcroForm dictionary contains the given field name, it retrieves the /// corresponding entry. Otherwise, it returns a default-constructed object handle. /// /// @param name The name of the form field to retrieve. - /// @return A object handle corresponding to the specified name within the AcroForm + /// @return An object handle corresponding to the specified name within the AcroForm /// dictionary. QPDFObjectHandle const& from_AcroForm(std::string const& name) const -- libgit2 0.21.4