Commit bd67a468e42942e6e4166f0887b15b95ce2cf818

Authored by m-holger
1 parent f26327a3

Refactor `AcroForm` implementation to improve encapsulation and reusability.

- Move AcroForm-related methods (`analyze`, `traverseField`, `getOrCreateAcroForm`, etc.) from `QPDFAcroFormDocumentHelper` to the `AcroForm` class.
- Update method calls across files to reflect changes.
- Improve comments for methods to align with PDF specifications.
include/qpdf/QPDFAcroFormDocumentHelper.hh
... ... @@ -225,21 +225,6 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
225 225 std::set<QPDFObjGen>* new_fields = nullptr);
226 226  
227 227 private:
228   - void analyze();
229   - bool traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth);
230   - QPDFObjectHandle getOrCreateAcroForm();
231   - void adjustInheritedFields(
232   - QPDFObjectHandle obj,
233   - bool override_da,
234   - std::string const& from_default_da,
235   - bool override_q,
236   - int from_default_q);
237   - void adjustDefaultAppearances(
238   - QPDFObjectHandle obj,
239   - std::map<std::string, std::map<std::string, std::string>> const& dr_map);
240   - void adjustAppearanceStream(
241   - QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map);
242   -
243 228 class Members;
244 229  
245 230 std::shared_ptr<Members> m;
... ...
libqpdf/QPDFAcroFormDocumentHelper.cc
... ... @@ -31,9 +31,6 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF&amp; qpdf) :
31 31 QPDFDocumentHelper(qpdf),
32 32 m(std::make_shared<Members>(qpdf))
33 33 {
34   - // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in
35   - // a temporarily unstable configuration where some widget annotations are not reachable.
36   - analyze();
37 34 }
38 35  
39 36 QPDFAcroFormDocumentHelper&
... ... @@ -46,7 +43,7 @@ void
46 43 QPDFAcroFormDocumentHelper::validate(bool repair)
47 44 {
48 45 invalidateCache();
49   - analyze();
  46 + m->analyze();
50 47 }
51 48  
52 49 void
... ... @@ -65,7 +62,7 @@ QPDFAcroFormDocumentHelper::hasAcroForm()
65 62 }
66 63  
67 64 QPDFObjectHandle
68   -QPDFAcroFormDocumentHelper::getOrCreateAcroForm()
  65 +AcroForm::getOrCreateAcroForm()
69 66 {
70 67 auto acroform = qpdf.getRoot().getKey("/AcroForm");
71 68 if (!acroform.isDictionary()) {
... ... @@ -78,19 +75,19 @@ QPDFAcroFormDocumentHelper::getOrCreateAcroForm()
78 75 void
79 76 QPDFAcroFormDocumentHelper::addFormField(QPDFFormFieldObjectHelper ff)
80 77 {
81   - auto acroform = getOrCreateAcroForm();
  78 + auto acroform = m->getOrCreateAcroForm();
82 79 auto fields = acroform.getKey("/Fields");
83 80 if (!fields.isArray()) {
84 81 fields = acroform.replaceKeyAndGetNew("/Fields", QPDFObjectHandle::newArray());
85 82 }
86 83 fields.appendItem(ff.getObjectHandle());
87   - traverseField(ff.getObjectHandle(), {}, 0);
  84 + m->traverseField(ff.getObjectHandle(), {}, 0);
88 85 }
89 86  
90 87 void
91 88 QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector<QPDFObjectHandle> fields)
92 89 {
93   - analyze();
  90 + m->analyze();
94 91 std::map<std::string, std::string> renames;
95 92 QPDFObjGen::set seen;
96 93 for (std::list<QPDFObjectHandle> queue{fields.begin(), fields.end()}; !queue.empty();
... ... @@ -182,13 +179,13 @@ void
182 179 QPDFAcroFormDocumentHelper::setFormFieldName(QPDFFormFieldObjectHelper ff, std::string const& name)
183 180 {
184 181 ff.setFieldAttribute("/T", name);
185   - traverseField(ff, ff["/Parent"], 0);
  182 + m->traverseField(ff, ff["/Parent"], 0);
186 183 }
187 184  
188 185 std::vector<QPDFFormFieldObjectHelper>
189 186 QPDFAcroFormDocumentHelper::getFormFields()
190 187 {
191   - analyze();
  188 + m->analyze();
192 189 std::vector<QPDFFormFieldObjectHelper> result;
193 190 for (auto const& [og, data]: m->field_to) {
194 191 if (!data.annotations.empty()) {
... ... @@ -201,7 +198,7 @@ QPDFAcroFormDocumentHelper::getFormFields()
201 198 std::set<QPDFObjGen>
202 199 QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name)
203 200 {
204   - analyze();
  201 + m->analyze();
205 202 // Keep from creating an empty entry
206 203 auto iter = m->name_to_fields.find(name);
207 204 if (iter != m->name_to_fields.end()) {
... ... @@ -213,7 +210,7 @@ QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const&amp; name)
213 210 std::vector<QPDFAnnotationObjectHelper>
214 211 QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h)
215 212 {
216   - analyze();
  213 + m->analyze();
217 214 std::vector<QPDFAnnotationObjectHelper> result;
218 215 QPDFObjGen og(h.getObjectHandle().getObjGen());
219 216 if (m->field_to.contains(og)) {
... ... @@ -225,13 +222,19 @@ QPDFAcroFormDocumentHelper::getAnnotationsForField(QPDFFormFieldObjectHelper h)
225 222 std::vector<QPDFAnnotationObjectHelper>
226 223 QPDFAcroFormDocumentHelper::getWidgetAnnotationsForPage(QPDFPageObjectHelper h)
227 224 {
  225 + return m->getWidgetAnnotationsForPage(h);
  226 +}
  227 +
  228 +std::vector<QPDFAnnotationObjectHelper>
  229 +AcroForm::getWidgetAnnotationsForPage(QPDFPageObjectHelper h)
  230 +{
228 231 return h.getAnnotations("/Widget");
229 232 }
230 233  
231 234 std::vector<QPDFFormFieldObjectHelper>
232 235 QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph)
233 236 {
234   - analyze();
  237 + m->analyze();
235 238 QPDFObjGen::set todo;
236 239 std::vector<QPDFFormFieldObjectHelper> result;
237 240 for (auto& annot: getWidgetAnnotationsForPage(ph)) {
... ... @@ -250,7 +253,7 @@ QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h)
250 253 if (!oh.isDictionaryOfType("", "/Widget")) {
251 254 return Null::temp();
252 255 }
253   - analyze();
  256 + m->analyze();
254 257 QPDFObjGen og(oh.getObjGen());
255 258 if (m->annotation_to_field.contains(og)) {
256 259 return m->annotation_to_field[og];
... ... @@ -259,12 +262,12 @@ QPDFAcroFormDocumentHelper::getFieldForAnnotation(QPDFAnnotationObjectHelper h)
259 262 }
260 263  
261 264 void
262   -QPDFAcroFormDocumentHelper::analyze()
  265 +AcroForm::analyze()
263 266 {
264   - if (m->cache_valid) {
  267 + if (cache_valid) {
265 268 return;
266 269 }
267   - m->cache_valid = true;
  270 + cache_valid = true;
268 271 QPDFObjectHandle acroform = qpdf.getRoot().getKey("/AcroForm");
269 272 if (!(acroform.isDictionary() && acroform.hasKey("/Fields"))) {
270 273 return;
... ... @@ -287,11 +290,11 @@ QPDFAcroFormDocumentHelper::analyze()
287 290 // a file that contains this kind of error will probably not
288 291 // actually work with most viewers.
289 292  
290   - for (auto const& ph: QPDFPageDocumentHelper(qpdf).getAllPages()) {
  293 + for (QPDFPageObjectHelper ph: pages) {
291 294 for (auto const& iter: getWidgetAnnotationsForPage(ph)) {
292 295 QPDFObjectHandle annot(iter.getObjectHandle());
293 296 QPDFObjGen og(annot.getObjGen());
294   - if (!m->annotation_to_field.contains(og)) {
  297 + if (!annotation_to_field.contains(og)) {
295 298 // This is not supposed to happen, but it's easy enough for us to handle this case.
296 299 // Treat the annotation as its own field. This could allow qpdf to sensibly handle a
297 300 // case such as a PDF creator adding a self-contained annotation (merged with the
... ... @@ -300,16 +303,15 @@ QPDFAcroFormDocumentHelper::analyze()
300 303 annot.warn(
301 304 "this widget annotation is not reachable from /AcroForm in the document "
302 305 "catalog");
303   - m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot);
304   - m->field_to[og].annotations.emplace_back(annot);
  306 + annotation_to_field[og] = QPDFFormFieldObjectHelper(annot);
  307 + field_to[og].annotations.emplace_back(annot);
305 308 }
306 309 }
307 310 }
308 311 }
309 312  
310 313 bool
311   -QPDFAcroFormDocumentHelper::traverseField(
312   - QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth)
  314 +AcroForm::traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth)
313 315 {
314 316 if (depth > 100) {
315 317 // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that
... ... @@ -333,8 +335,7 @@ QPDFAcroFormDocumentHelper::traverseField(
333 335 return false;
334 336 }
335 337 QPDFObjGen og(field.getObjGen());
336   - if (m->field_to.contains(og) || m->annotation_to_field.contains(og) ||
337   - m->bad_fields.contains(og)) {
  338 + if (field_to.contains(og) || annotation_to_field.contains(og) || bad_fields.contains(og)) {
338 339 field.warn("loop detected while traversing /AcroForm");
339 340 return false;
340 341 }
... ... @@ -362,8 +363,8 @@ QPDFAcroFormDocumentHelper::traverseField(
362 363  
363 364 if (is_annotation) {
364 365 QPDFObjectHandle our_field = (is_field ? field : parent);
365   - m->field_to[our_field.getObjGen()].annotations.emplace_back(field);
366   - m->annotation_to_field[og] = QPDFFormFieldObjectHelper(our_field);
  366 + field_to[our_field.getObjGen()].annotations.emplace_back(field);
  367 + annotation_to_field[og] = QPDFFormFieldObjectHelper(our_field);
367 368 }
368 369  
369 370 if (is_field && depth != 0 && field["/Parent"] != parent) {
... ... @@ -386,22 +387,22 @@ QPDFAcroFormDocumentHelper::traverseField(
386 387 if (is_field && field.hasKey("/T")) {
387 388 QPDFFormFieldObjectHelper foh(field);
388 389 std::string name = foh.getFullyQualifiedName();
389   - auto old = m->field_to.find(og);
390   - if (old != m->field_to.end() && !old->second.name.empty()) {
  390 + auto old = field_to.find(og);
  391 + if (old != field_to.end() && !old->second.name.empty()) {
391 392 // We might be updating after a name change, so remove any old information
392   - m->name_to_fields[old->second.name].erase(og);
  393 + name_to_fields[old->second.name].erase(og);
393 394 }
394   - m->field_to[og].name = name;
395   - m->name_to_fields[name].insert(og);
  395 + field_to[og].name = name;
  396 + name_to_fields[name].insert(og);
396 397 }
397 398  
398 399 for (auto const& kid: Kids) {
399   - if (m->bad_fields.contains(kid)) {
  400 + if (bad_fields.contains(kid)) {
400 401 continue;
401 402 }
402 403  
403 404 if (!traverseField(kid, field, 1 + depth)) {
404   - m->bad_fields.insert(kid);
  405 + bad_fields.insert(kid);
405 406 }
406 407 }
407 408 return true;
... ... @@ -485,7 +486,7 @@ QPDFAcroFormDocumentHelper::disableDigitalSignatures()
485 486 }
486 487  
487 488 void
488   -QPDFAcroFormDocumentHelper::adjustInheritedFields(
  489 +AcroForm::adjustInheritedFields(
489 490 QPDFObjectHandle obj,
490 491 bool override_da,
491 492 std::string const& from_default_da,
... ... @@ -592,7 +593,7 @@ ResourceReplacer::handleToken(QPDFTokenizer::Token const&amp; token)
592 593 }
593 594  
594 595 void
595   -QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
  596 +AcroForm::adjustDefaultAppearances(
596 597 QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map)
597 598 {
598 599 // This method is called on a field that has been copied from another file but whose /DA still
... ... @@ -650,7 +651,7 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
650 651 }
651 652  
652 653 void
653   -QPDFAcroFormDocumentHelper::adjustAppearanceStream(
  654 +AcroForm::adjustAppearanceStream(
654 655 QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map)
655 656 {
656 657 // We don't have to modify appearance streams or their resource dictionaries for them to display
... ... @@ -807,7 +808,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
807 808 // Ensure that we have a /DR that is an indirect
808 809 // dictionary object.
809 810 if (!acroform) {
810   - acroform = getOrCreateAcroForm();
  811 + acroform = m->getOrCreateAcroForm();
811 812 }
812 813 dr = acroform["/DR"];
813 814 if (!dr) {
... ... @@ -872,7 +873,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
872 873 }
873 874 ++i;
874 875 }
875   - adjustInheritedFields(
  876 + m->adjustInheritedFields(
876 877 obj, override_da, from_default_da, override_q, from_default_q);
877 878 if (foreign) {
878 879 // Lazily initialize our /DR and the conflict map.
... ... @@ -888,7 +889,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
888 889 obj.replace("/DR", dr);
889 890 }
890 891 if (obj["/DA"].isString() && !dr_map.empty()) {
891   - adjustDefaultAppearances(obj, dr_map);
  892 + m->adjustDefaultAppearances(obj, dr_map);
892 893 }
893 894 }
894 895 }
... ... @@ -1035,7 +1036,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
1035 1036 }
1036 1037 Dictionary resources = dict["/Resources"];
1037 1038 if (!dr_map.empty() && resources) {
1038   - adjustAppearanceStream(stream, dr_map);
  1039 + m->adjustAppearanceStream(stream, dr_map);
1039 1040 }
1040 1041 }
1041 1042 auto rect = cm.transformRectangle(annot["/Rect"].getArrayAsRectangle());
... ...
libqpdf/qpdf/AcroForm.hh
... ... @@ -24,6 +24,10 @@ namespace qpdf::impl
24 24 AcroForm(impl::Doc& doc) :
25 25 Common(doc)
26 26 {
  27 + // We have to analyze up front. Otherwise, when we are adding annotations and fields, we
  28 + // are in a temporarily unstable configuration where some widget annotations are not
  29 + // reachable.
  30 + analyze();
27 31 }
28 32  
29 33 struct FieldData
... ... @@ -32,6 +36,149 @@ namespace qpdf::impl
32 36 std::string name;
33 37 };
34 38  
  39 + /// Retrieves a list of widget annotations for the specified page.
  40 + ///
  41 + /// A widget annotation represents the visual part of a form field in a PDF.
  42 + /// This function filters annotations on the given page, returning only those
  43 + /// annotations whose subtype is "/Widget".
  44 + ///
  45 + /// @param page A `QPDFPageObjectHelper` representing the page from which to
  46 + /// extract widget annotations.
  47 + ///
  48 + /// @return A vector of `QPDFAnnotationObjectHelper` objects corresponding to
  49 + /// the widget annotations found on the specified page.
  50 + std::vector<QPDFAnnotationObjectHelper> getWidgetAnnotationsForPage(QPDFPageObjectHelper page);
  51 +
  52 + /// Analyzes the AcroForm structure in the PDF document and updates the internal
  53 + /// cache with the form fields and their corresponding widget annotations.
  54 + ///
  55 + /// The function performs the following steps:
  56 + /// - Checks if the cache is valid. If it is, the function exits early.
  57 + /// - Retrieves the `/AcroForm` dictionary from the PDF and checks if it contains
  58 + /// a `/Fields` key.
  59 + /// - If `/Fields` exist and is an array, iterates through the fields and traverses
  60 + /// them to map annotations bidirectionally to form fields.
  61 + /// - Logs a warning if the `/Fields` key is present but not an array, and initializes
  62 + /// it to an empty array.
  63 + /// - Ensures that all widget annotations are processed, including any annotations
  64 + /// that might not be reachable from the `/AcroForm`. Treats such annotations as
  65 + /// their own fields.
  66 + /// - Provides a workaround for PDF documents containing inconsistencies, such as
  67 + /// widget annotations on a page not being referenced in `/AcroForm`.
  68 + ///
  69 + /// This function allows precise navigation and manipulation of form fields and
  70 + /// their related annotations, facilitating advanced PDF document processing.
  71 + void analyze();
  72 +
  73 + /// Recursively traverses the structure of form fields and annotations in a PDF's /AcroForm.
  74 + ///
  75 + /// The method is designed to process form fields in a hierarchical /AcroForm structure.
  76 + /// It captures field and annotation data, resolves parent-child relationships, detects
  77 + /// loops, and avoids stack overflow from excessive recursion depth.
  78 + ///
  79 + /// @param field The current field or annotation to process.
  80 + /// @param parent The parent field object. If the current field is a top-level field, parent
  81 + /// will be a null object.
  82 + /// @param depth The current recursion depth to limit stack usage and avoid infinite loops.
  83 + ///
  84 + /// @return True if the field was processed successfully, false otherwise.
  85 + ///
  86 + /// - Recursion is limited to a depth of 100 to prevent stack overflow with maliciously
  87 + /// crafted files.
  88 + /// - The function skips non-indirect and invalid objects (e.g., non-dictionaries or objects
  89 + /// with invalid parent references).
  90 + /// - Detects and warns about loops in the /AcroForm hierarchy.
  91 + /// - Differentiates between terminal fields, annotations, and composite fields based on
  92 + /// dictionary keys.
  93 + /// - Tracks processed fields and annotations using internal maps to prevent reprocessing
  94 + /// and detect loops.
  95 + /// - Updates name-to-field mappings for terminal fields with a valid fully qualified name.
  96 + /// - Ensures the integrity of parent-child relationships within the field hierarchy.
  97 + /// - Any invalid child objects are logged and skipped during traversal.
  98 + bool traverseField(QPDFObjectHandle field, QPDFObjectHandle const& parent, int depth);
  99 +
  100 + /// Retrieves or creates the /AcroForm dictionary in the PDF document's root.
  101 + ///
  102 + /// - If the /AcroForm key exists in the document root and is a dictionary,
  103 + /// it is returned as is.
  104 + /// - If the /AcroForm key does not exist or is not a dictionary, a new
  105 + /// dictionary is created, stored as the /AcroForm entry in the document root,
  106 + /// and then returned.
  107 + ///
  108 + /// @return A QPDFObjectHandle representing the /AcroForm dictionary.
  109 + QPDFObjectHandle getOrCreateAcroForm();
  110 +
  111 + /// Adjusts inherited field properties for an AcroForm field object.
  112 + ///
  113 + /// This method ensures that the `/DA` (default appearance) and `/Q` (quadding) keys
  114 + /// of the specified field object are overridden if necessary, based on the provided
  115 + /// parameters. The overriding is performed only if the respective `override_da` or
  116 + /// `override_q` flags are set to true, and when the original object's values differ from
  117 + /// the provided defaults. No changes are made to fields that have explicit values for `/DA`
  118 + /// or `/Q`.
  119 + ///
  120 + /// The function is primarily used for adjusting inherited form field properties in cases
  121 + /// where the document structure or inherited values have changed (e.g., when working with
  122 + /// fields in a PDF document).
  123 + ///
  124 + /// @param obj The `QPDFObjectHandle` instance representing the form field object to be
  125 + /// adjusted.
  126 + /// @param override_da A boolean flag indicating whether to override the `/DA` key.
  127 + /// @param from_default_da The default appearance string to apply if overriding the `/DA`
  128 + /// key.
  129 + /// @param override_q A boolean flag indicating whether to override the `/Q` key.
  130 + /// @param from_default_q The default quadding value (alignment) to apply if overriding the
  131 + /// `/Q` key.
  132 + void adjustInheritedFields(
  133 + QPDFObjectHandle obj,
  134 + bool override_da,
  135 + std::string const& from_default_da,
  136 + bool override_q,
  137 + int from_default_q);
  138 +
  139 + /// Adjusts the default appearances (/DA) of an AcroForm field object.
  140 + ///
  141 + /// This method ensures that form fields copied from another PDF document
  142 + /// have their default appearances resource references updated to correctly
  143 + /// point to the appropriate resources in the current document's resource
  144 + /// dictionary (/DR). It resolves name conflicts between the dictionaries
  145 + /// of the source and destination documents by using a mapping provided in
  146 + /// `dr_map`.
  147 + ///
  148 + /// The method parses the /DA string, processes its resource references,
  149 + /// and regenerates the /DA with updated references.
  150 + ///
  151 + /// @param obj The AcroForm field object whose /DA is being adjusted.
  152 + /// @param dr_map A mapping between resource names in the source document's
  153 + /// resource dictionary and their corresponding names in the current
  154 + /// document's resource dictionary.
  155 + void adjustDefaultAppearances(
  156 + QPDFObjectHandle obj,
  157 + std::map<std::string, std::map<std::string, std::string>> const& dr_map);
  158 +
  159 + /// Modifies the appearance stream of an AcroForm field to ensure its resources
  160 + /// align with the resource dictionary and appearance settings. This method
  161 + /// ensures proper resource handling to avoid any conflicts when regenerating
  162 + /// the appearance stream.
  163 + ///
  164 + /// Adjustments include:
  165 + /// - Creating a private resource dictionary for the stream if not already present.
  166 + /// - Merging top-level resource keys into the stream's resource dictionary.
  167 + /// - Resolving naming conflicts between existing and remapped resource keys.
  168 + /// - Removing empty sub-dictionaries from the resource dictionary.
  169 + /// - Attaching a token filter to rewrite resource references in the stream content.
  170 + ///
  171 + /// If conflicts between keys are encountered or the stream cannot be parsed successfully,
  172 + /// appropriate warnings will be generated instead of halting execution.
  173 + ///
  174 + /// @param stream The QPDFObjectHandle representation of the PDF appearance stream to be
  175 + /// adjusted.
  176 + /// @param dr_map A mapping of resource types and their corresponding name remappings
  177 + /// used for resolving resource conflicts and regenerating appearances.
  178 + void adjustAppearanceStream(
  179 + QPDFObjectHandle stream,
  180 + std::map<std::string, std::map<std::string, std::string>> dr_map);
  181 +
35 182 bool cache_valid{false};
36 183 std::map<QPDFObjGen, FieldData> field_to;
37 184 std::map<QPDFObjGen, QPDFFormFieldObjectHelper> annotation_to_field;
... ...
qpdf/test_driver.cc
... ... @@ -3567,7 +3567,6 @@ test_101(QPDF&amp; pdf, char const* arg2)
3567 3567 std::cout << oh.unparseResolved() << '\n';
3568 3568 }
3569 3569  
3570   -
3571 3570 auto test_helper_throws = [&qpdf](auto helper_func) {
3572 3571 bool thrown = false;
3573 3572 try {
... ...