diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh index 3694681..9ef2a7a 100644 --- a/include/qpdf/QPDFAcroFormDocumentHelper.hh +++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh @@ -225,21 +225,6 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper std::set* new_fields = nullptr); private: - void analyze(); - bool traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth); - QPDFObjectHandle getOrCreateAcroForm(); - void adjustInheritedFields( - QPDFObjectHandle obj, - bool override_da, - std::string const& from_default_da, - bool override_q, - int from_default_q); - void adjustDefaultAppearances( - QPDFObjectHandle obj, - std::map> const& dr_map); - void adjustAppearanceStream( - QPDFObjectHandle stream, std::map> dr_map); - class Members; std::shared_ptr m; diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 531b26b..9f3f5d4 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -31,9 +31,6 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf), m(std::make_shared(qpdf)) { - // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in - // a temporarily unstable configuration where some widget annotations are not reachable. - analyze(); } QPDFAcroFormDocumentHelper& @@ -46,7 +43,7 @@ void QPDFAcroFormDocumentHelper::validate(bool repair) { invalidateCache(); - analyze(); + m->analyze(); } void @@ -65,7 +62,7 @@ QPDFAcroFormDocumentHelper::hasAcroForm() } QPDFObjectHandle -QPDFAcroFormDocumentHelper::getOrCreateAcroForm() +AcroForm::getOrCreateAcroForm() { auto acroform = qpdf.getRoot().getKey("/AcroForm"); if (!acroform.isDictionary()) { @@ -78,19 +75,19 @@ QPDFAcroFormDocumentHelper::getOrCreateAcroForm() void QPDFAcroFormDocumentHelper::addFormField(QPDFFormFieldObjectHelper ff) { - auto acroform = getOrCreateAcroForm(); + auto acroform = m->getOrCreateAcroForm(); auto fields = acroform.getKey("/Fields"); if (!fields.isArray()) { fields = acroform.replaceKeyAndGetNew("/Fields", QPDFObjectHandle::newArray()); } fields.appendItem(ff.getObjectHandle()); - traverseField(ff.getObjectHandle(), {}, 0); + m->traverseField(ff.getObjectHandle(), {}, 0); } void QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector fields) { - analyze(); + m->analyze(); std::map renames; QPDFObjGen::set seen; for (std::list queue{fields.begin(), fields.end()}; !queue.empty(); @@ -182,13 +179,13 @@ void QPDFAcroFormDocumentHelper::setFormFieldName(QPDFFormFieldObjectHelper ff, std::string const& name) { ff.setFieldAttribute("/T", name); - traverseField(ff, ff["/Parent"], 0); + m->traverseField(ff, ff["/Parent"], 0); } std::vector QPDFAcroFormDocumentHelper::getFormFields() { - analyze(); + m->analyze(); std::vector result; for (auto const& [og, data]: m->field_to) { if (!data.annotations.empty()) { @@ -201,7 +198,7 @@ QPDFAcroFormDocumentHelper::getFormFields() std::set QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name) { - analyze(); + m->analyze(); // Keep from creating an empty entry auto iter = m->name_to_fields.find(name); if (iter != m->name_to_fields.end()) { @@ -213,7 +210,7 @@ QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name) std::vector QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h) { - analyze(); + m->analyze(); std::vector result; QPDFObjGen og(h.getObjectHandle().getObjGen()); if (m->field_to.contains(og)) { @@ -225,13 +222,19 @@ QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h) std::vector QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) { + return m->getWidgetAnnotationsForPage(h); +} + +std::vector +AcroForm::getWidgetAnnotationsForPage(QPDFPageObjectHelper h) +{ return h.getAnnotations("/Widget"); } std::vector QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) { - analyze(); + m->analyze(); QPDFObjGen::set todo; std::vector result; for (auto& annot: getWidgetAnnotationsForPage(ph)) { @@ -250,7 +253,7 @@ QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h) if (!oh.isDictionaryOfType("", "/Widget")) { return Null::temp(); } - analyze(); + m->analyze(); QPDFObjGen og(oh.getObjGen()); if (m->annotation_to_field.contains(og)) { return m->annotation_to_field[og]; @@ -259,12 +262,12 @@ QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h) } void -QPDFAcroFormDocumentHelper::analyze() +AcroForm::analyze() { - if (m->cache_valid) { + if (cache_valid) { return; } - m->cache_valid = true; + cache_valid = true; QPDFObjectHandle acroform = qpdf.getRoot().getKey("/AcroForm"); if (!(acroform.isDictionary() && acroform.hasKey("/Fields"))) { return; @@ -287,11 +290,11 @@ QPDFAcroFormDocumentHelper::analyze() // a file that contains this kind of error will probably not // actually work with most viewers. - for (auto const& ph: QPDFPageDocumentHelper(qpdf).getAllPages()) { + for (QPDFPageObjectHelper ph: pages) { for (auto const& iter: getWidgetAnnotationsForPage(ph)) { QPDFObjectHandle annot(iter.getObjectHandle()); QPDFObjGen og(annot.getObjGen()); - if (!m->annotation_to_field.contains(og)) { + if (!annotation_to_field.contains(og)) { // This is not supposed to happen, but it's easy enough for us to handle this case. // Treat the annotation as its own field. This could allow qpdf to sensibly handle a // case such as a PDF creator adding a self-contained annotation (merged with the @@ -300,16 +303,15 @@ QPDFAcroFormDocumentHelper::analyze() annot.warn( "this widget annotation is not reachable from /AcroForm in the document " "catalog"); - m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); - m->field_to[og].annotations.emplace_back(annot); + annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); + field_to[og].annotations.emplace_back(annot); } } } } bool -QPDFAcroFormDocumentHelper::traverseField( - QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth) +AcroForm::traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth) { if (depth > 100) { // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that @@ -333,8 +335,7 @@ QPDFAcroFormDocumentHelper::traverseField( return false; } QPDFObjGen og(field.getObjGen()); - if (m->field_to.contains(og) || m->annotation_to_field.contains(og) || - m->bad_fields.contains(og)) { + if (field_to.contains(og) || annotation_to_field.contains(og) || bad_fields.contains(og)) { field.warn("loop detected while traversing /AcroForm"); return false; } @@ -362,8 +363,8 @@ QPDFAcroFormDocumentHelper::traverseField( if (is_annotation) { QPDFObjectHandle our_field = (is_field ? field : parent); - m->field_to[our_field.getObjGen()].annotations.emplace_back(field); - m->annotation_to_field[og] = QPDFFormFieldObjectHelper(our_field); + field_to[our_field.getObjGen()].annotations.emplace_back(field); + annotation_to_field[og] = QPDFFormFieldObjectHelper(our_field); } if (is_field && depth != 0 && field["/Parent"] != parent) { @@ -386,22 +387,22 @@ QPDFAcroFormDocumentHelper::traverseField( if (is_field && field.hasKey("/T")) { QPDFFormFieldObjectHelper foh(field); std::string name = foh.getFullyQualifiedName(); - auto old = m->field_to.find(og); - if (old != m->field_to.end() && !old->second.name.empty()) { + auto old = field_to.find(og); + if (old != field_to.end() && !old->second.name.empty()) { // We might be updating after a name change, so remove any old information - m->name_to_fields[old->second.name].erase(og); + name_to_fields[old->second.name].erase(og); } - m->field_to[og].name = name; - m->name_to_fields[name].insert(og); + field_to[og].name = name; + name_to_fields[name].insert(og); } for (auto const& kid: Kids) { - if (m->bad_fields.contains(kid)) { + if (bad_fields.contains(kid)) { continue; } if (!traverseField(kid, field, 1 + depth)) { - m->bad_fields.insert(kid); + bad_fields.insert(kid); } } return true; @@ -485,7 +486,7 @@ QPDFAcroFormDocumentHelper::disableDigitalSignatures() } void -QPDFAcroFormDocumentHelper::adjustInheritedFields( +AcroForm::adjustInheritedFields( QPDFObjectHandle obj, bool override_da, std::string const& from_default_da, @@ -592,7 +593,7 @@ ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) } void -QPDFAcroFormDocumentHelper::adjustDefaultAppearances( +AcroForm::adjustDefaultAppearances( QPDFObjectHandle obj, std::map> const& dr_map) { // This method is called on a field that has been copied from another file but whose /DA still @@ -650,7 +651,7 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances( } void -QPDFAcroFormDocumentHelper::adjustAppearanceStream( +AcroForm::adjustAppearanceStream( QPDFObjectHandle stream, std::map> dr_map) { // We don't have to modify appearance streams or their resource dictionaries for them to display @@ -807,7 +808,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( // Ensure that we have a /DR that is an indirect // dictionary object. if (!acroform) { - acroform = getOrCreateAcroForm(); + acroform = m->getOrCreateAcroForm(); } dr = acroform["/DR"]; if (!dr) { @@ -872,7 +873,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } ++i; } - adjustInheritedFields( + m->adjustInheritedFields( obj, override_da, from_default_da, override_q, from_default_q); if (foreign) { // Lazily initialize our /DR and the conflict map. @@ -888,7 +889,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( obj.replace("/DR", dr); } if (obj["/DA"].isString() && !dr_map.empty()) { - adjustDefaultAppearances(obj, dr_map); + m->adjustDefaultAppearances(obj, dr_map); } } } @@ -1035,7 +1036,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } Dictionary resources = dict["/Resources"]; if (!dr_map.empty() && resources) { - adjustAppearanceStream(stream, dr_map); + m->adjustAppearanceStream(stream, dr_map); } } auto rect = cm.transformRectangle(annot["/Rect"].getArrayAsRectangle()); diff --git a/libqpdf/qpdf/AcroForm.hh b/libqpdf/qpdf/AcroForm.hh index 8733aa5..4873ce5 100644 --- a/libqpdf/qpdf/AcroForm.hh +++ b/libqpdf/qpdf/AcroForm.hh @@ -24,6 +24,10 @@ namespace qpdf::impl AcroForm(impl::Doc& doc) : Common(doc) { + // We have to analyze up front. Otherwise, when we are adding annotations and fields, we + // are in a temporarily unstable configuration where some widget annotations are not + // reachable. + analyze(); } struct FieldData @@ -32,6 +36,149 @@ namespace qpdf::impl std::string name; }; + /// Retrieves a list of widget annotations for the specified page. + /// + /// A widget annotation represents the visual part of a form field in a PDF. + /// This function filters annotations on the given page, returning only those + /// annotations whose subtype is "/Widget". + /// + /// @param page A `QPDFPageObjectHelper` representing the page from which to + /// extract widget annotations. + /// + /// @return A vector of `QPDFAnnotationObjectHelper` objects corresponding to + /// the widget annotations found on the specified page. + std::vector getWidgetAnnotationsForPage(QPDFPageObjectHelper page); + + /// Analyzes the AcroForm structure in the PDF document and updates the internal + /// cache with the form fields and their corresponding widget annotations. + /// + /// The function performs the following steps: + /// - Checks if the cache is valid. If it is, the function exits early. + /// - Retrieves the `/AcroForm` dictionary from the PDF and checks if it contains + /// a `/Fields` key. + /// - If `/Fields` exist and is an array, iterates through the fields and traverses + /// them to map annotations bidirectionally to form fields. + /// - Logs a warning if the `/Fields` key is present but not an array, and initializes + /// it to an empty array. + /// - Ensures that all widget annotations are processed, including any annotations + /// that might not be reachable from the `/AcroForm`. Treats such annotations as + /// their own fields. + /// - Provides a workaround for PDF documents containing inconsistencies, such as + /// widget annotations on a page not being referenced in `/AcroForm`. + /// + /// This function allows precise navigation and manipulation of form fields and + /// their related annotations, facilitating advanced PDF document processing. + void analyze(); + + /// Recursively traverses the structure of form fields and annotations in a PDF's /AcroForm. + /// + /// The method is designed to process form fields in a hierarchical /AcroForm structure. + /// It captures field and annotation data, resolves parent-child relationships, detects + /// loops, and avoids stack overflow from excessive recursion depth. + /// + /// @param field The current field or annotation to process. + /// @param parent The parent field object. If the current field is a top-level field, parent + /// will be a null object. + /// @param depth The current recursion depth to limit stack usage and avoid infinite loops. + /// + /// @return True if the field was processed successfully, false otherwise. + /// + /// - Recursion is limited to a depth of 100 to prevent stack overflow with maliciously + /// crafted files. + /// - The function skips non-indirect and invalid objects (e.g., non-dictionaries or objects + /// with invalid parent references). + /// - Detects and warns about loops in the /AcroForm hierarchy. + /// - Differentiates between terminal fields, annotations, and composite fields based on + /// dictionary keys. + /// - Tracks processed fields and annotations using internal maps to prevent reprocessing + /// and detect loops. + /// - Updates name-to-field mappings for terminal fields with a valid fully qualified name. + /// - Ensures the integrity of parent-child relationships within the field hierarchy. + /// - Any invalid child objects are logged and skipped during traversal. + bool traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth); + + /// Retrieves or creates the /AcroForm dictionary in the PDF document's root. + /// + /// - If the /AcroForm key exists in the document root and is a dictionary, + /// it is returned as is. + /// - If the /AcroForm key does not exist or is not a dictionary, a new + /// dictionary is created, stored as the /AcroForm entry in the document root, + /// and then returned. + /// + /// @return A QPDFObjectHandle representing the /AcroForm dictionary. + QPDFObjectHandle getOrCreateAcroForm(); + + /// Adjusts inherited field properties for an AcroForm field object. + /// + /// This method ensures that the `/DA` (default appearance) and `/Q` (quadding) keys + /// of the specified field object are overridden if necessary, based on the provided + /// parameters. The overriding is performed only if the respective `override_da` or + /// `override_q` flags are set to true, and when the original object's values differ from + /// the provided defaults. No changes are made to fields that have explicit values for `/DA` + /// or `/Q`. + /// + /// The function is primarily used for adjusting inherited form field properties in cases + /// where the document structure or inherited values have changed (e.g., when working with + /// fields in a PDF document). + /// + /// @param obj The `QPDFObjectHandle` instance representing the form field object to be + /// adjusted. + /// @param override_da A boolean flag indicating whether to override the `/DA` key. + /// @param from_default_da The default appearance string to apply if overriding the `/DA` + /// key. + /// @param override_q A boolean flag indicating whether to override the `/Q` key. + /// @param from_default_q The default quadding value (alignment) to apply if overriding the + /// `/Q` key. + void adjustInheritedFields( + QPDFObjectHandle obj, + bool override_da, + std::string const& from_default_da, + bool override_q, + int from_default_q); + + /// Adjusts the default appearances (/DA) of an AcroForm field object. + /// + /// This method ensures that form fields copied from another PDF document + /// have their default appearances resource references updated to correctly + /// point to the appropriate resources in the current document's resource + /// dictionary (/DR). It resolves name conflicts between the dictionaries + /// of the source and destination documents by using a mapping provided in + /// `dr_map`. + /// + /// The method parses the /DA string, processes its resource references, + /// and regenerates the /DA with updated references. + /// + /// @param obj The AcroForm field object whose /DA is being adjusted. + /// @param dr_map A mapping between resource names in the source document's + /// resource dictionary and their corresponding names in the current + /// document's resource dictionary. + void adjustDefaultAppearances( + QPDFObjectHandle obj, + std::map> const& dr_map); + + /// Modifies the appearance stream of an AcroForm field to ensure its resources + /// align with the resource dictionary and appearance settings. This method + /// ensures proper resource handling to avoid any conflicts when regenerating + /// the appearance stream. + /// + /// Adjustments include: + /// - Creating a private resource dictionary for the stream if not already present. + /// - Merging top-level resource keys into the stream's resource dictionary. + /// - Resolving naming conflicts between existing and remapped resource keys. + /// - Removing empty sub-dictionaries from the resource dictionary. + /// - Attaching a token filter to rewrite resource references in the stream content. + /// + /// If conflicts between keys are encountered or the stream cannot be parsed successfully, + /// appropriate warnings will be generated instead of halting execution. + /// + /// @param stream The QPDFObjectHandle representation of the PDF appearance stream to be + /// adjusted. + /// @param dr_map A mapping of resource types and their corresponding name remappings + /// used for resolving resource conflicts and regenerating appearances. + void adjustAppearanceStream( + QPDFObjectHandle stream, + std::map> dr_map); + bool cache_valid{false}; std::map field_to; std::map annotation_to_field; diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 1e87fd6..e306363 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -3567,7 +3567,6 @@ test_101(QPDF& pdf, char const* arg2) std::cout << oh.unparseResolved() << '\n'; } - auto test_helper_throws = [&qpdf](auto helper_func) { bool thrown = false; try {