Commit 9ca80c23fa72ced55e15b2b3ebf21810240b34cf

Authored by m-holger
Committed by GitHub
2 parents f4589458 8803fede

Merge pull request #1538 from m-holger/afdh5

Refactor QPDFAcroFormDocumentHelper::transformAnnotations
libqpdf/QPDFAcroFormDocumentHelper.cc
... ... @@ -8,6 +8,7 @@
8 8 #include <qpdf/QUtil.hh>
9 9 #include <qpdf/ResourceFinder.hh>
10 10  
  11 +#include <deque>
11 12 #include <utility>
12 13  
13 14 using namespace qpdf;
... ... @@ -480,6 +481,9 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields(
480 481 bool override_q,
481 482 int from_default_q)
482 483 {
  484 + if (!(override_da || override_q)) {
  485 + return;
  486 + }
483 487 // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a
484 488 // field and not just an annotation. In that case, we need to override if we are getting a value
485 489 // from the document that is different from the value we would have gotten from the old
... ... @@ -719,7 +723,7 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
719 723  
720 724 void
721 725 QPDFAcroFormDocumentHelper::transformAnnotations(
722   - QPDFObjectHandle old_annots,
  726 + QPDFObjectHandle a_old_annots,
723 727 std::vector<QPDFObjectHandle>& new_annots,
724 728 std::vector<QPDFObjectHandle>& new_fields,
725 729 std::set<QPDFObjGen>& old_fields,
... ... @@ -727,94 +731,87 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
727 731 QPDF* from_qpdf,
728 732 QPDFAcroFormDocumentHelper* from_afdh)
729 733 {
730   - std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
  734 + Array old_annots = std::move(a_old_annots);
731 735 if (!from_qpdf) {
732 736 // Assume these are from the same QPDF.
733 737 from_qpdf = &qpdf;
734 738 from_afdh = this;
735   - } else if ((from_qpdf != &qpdf) && (!from_afdh)) {
736   - afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(*from_qpdf);
737   - from_afdh = afdhph.get();
  739 + } else if (from_qpdf != &qpdf && !from_afdh) {
  740 + from_afdh = &QPDFAcroFormDocumentHelper::get(*from_qpdf);
738 741 }
739   - bool foreign = (from_qpdf != &qpdf);
  742 + const bool foreign = from_qpdf != &qpdf;
740 743  
741 744 // It's possible that we will transform annotations that don't include any form fields. This
742 745 // code takes care not to muck around with /AcroForm unless we have to.
743 746  
744   - QPDFObjectHandle acroform = qpdf.getRoot().getKey("/AcroForm");
745   - QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm");
  747 + Dictionary acroform = qpdf.getRoot()["/AcroForm"];
  748 + Dictionary from_acroform = from_qpdf->getRoot()["/AcroForm"];
746 749  
747 750 // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a
748 751 // foreign stream and the stream is getting one of these values from its document's /AcroForm,
749 752 // we will need to copy the value explicitly so that it doesn't start getting its default from
750 753 // the destination document.
751   - bool override_da = false;
752   - bool override_q = false;
753 754 std::string from_default_da;
754 755 int from_default_q = 0;
755 756 // If we copy any form fields, we will need to merge the source document's /DR into this
756 757 // document's /DR.
757   - QPDFObjectHandle from_dr = QPDFObjectHandle::newNull();
  758 + Dictionary from_dr;
  759 + std::string default_da;
  760 + int default_q = 0;
758 761 if (foreign) {
759   - std::string default_da;
760   - int default_q = 0;
761   - if (acroform.isDictionary()) {
762   - if (acroform.getKey("/DA").isString()) {
763   - default_da = acroform.getKey("/DA").getUTF8Value();
  762 + if (acroform) {
  763 + if (acroform["/DA"].isString()) {
  764 + default_da = acroform["/DA"].getUTF8Value();
764 765 }
765   - if (acroform.getKey("/Q").isInteger()) {
766   - default_q = acroform.getKey("/Q").getIntValueAsInt();
  766 + if (Integer Q = acroform["/Q"]) {
  767 + default_q = Q;
767 768 }
768 769 }
769   - if (from_acroform.isDictionary()) {
770   - if (from_acroform.getKey("/DR").isDictionary()) {
771   - from_dr = from_acroform.getKey("/DR");
772   - if (!from_dr.isIndirect()) {
  770 + if (from_acroform) {
  771 + from_dr = from_acroform["/DR"];
  772 + if (from_dr) {
  773 + if (!from_dr.indirect()) {
773 774 from_dr = from_qpdf->makeIndirectObject(from_dr);
774 775 }
775 776 from_dr = qpdf.copyForeignObject(from_dr);
776 777 }
777   - if (from_acroform.getKey("/DA").isString()) {
778   - from_default_da = from_acroform.getKey("/DA").getUTF8Value();
  778 + if (from_acroform["/DA"].isString()) {
  779 + from_default_da = from_acroform["/DA"].getUTF8Value();
779 780 }
780   - if (from_acroform.getKey("/Q").isInteger()) {
781   - from_default_q = from_acroform.getKey("/Q").getIntValueAsInt();
  781 + if (Integer Q = from_acroform["/Q"]) {
  782 + from_default_q = Q;
782 783 }
783 784 }
784   - if (from_default_da != default_da) {
785   - override_da = true;
786   - }
787   - if (from_default_q != default_q) {
788   - override_q = true;
789   - }
790 785 }
  786 + const bool override_da = from_acroform ? from_default_da != default_da : false;
  787 + const bool override_q = from_acroform ? from_default_q != default_q : false;
791 788  
792 789 // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set
793 790 // this up for lazy initialization in case we encounter any form fields.
794 791 std::map<std::string, std::map<std::string, std::string>> dr_map;
795   - bool initialized_dr_map = false;
796   - QPDFObjectHandle dr = QPDFObjectHandle::newNull();
  792 + Dictionary dr;
  793 +
797 794 auto init_dr_map = [&]() {
798   - if (!initialized_dr_map) {
799   - initialized_dr_map = true;
  795 + if (!dr) {
800 796 // Ensure that we have a /DR that is an indirect
801 797 // dictionary object.
802   - if (!acroform.isDictionary()) {
  798 + if (!acroform) {
803 799 acroform = getOrCreateAcroForm();
804 800 }
805   - dr = acroform.getKey("/DR");
806   - if (!dr.isDictionary()) {
807   - dr = QPDFObjectHandle::newDictionary();
  801 + dr = acroform["/DR"];
  802 + if (!dr) {
  803 + dr = Dictionary::empty();
808 804 }
809   - dr.makeResourcesIndirect(qpdf);
810   - if (!dr.isIndirect()) {
811   - dr = acroform.replaceKeyAndGetNew("/DR", qpdf.makeIndirectObject(dr));
  805 + QPDFObjectHandle(dr).makeResourcesIndirect(qpdf);
  806 + if (!dr.indirect()) {
  807 + acroform.replaceKey("/DR", qpdf.makeIndirectObject(dr));
  808 + dr = acroform["/DR"];
812 809 }
813 810 // Merge the other document's /DR, creating a conflict map. mergeResources checks to
814 811 // make sure both objects are dictionaries. By this point, if this is foreign, from_dr
815 812 // has been copied, so we use the target qpdf as the owning qpdf.
816   - from_dr.makeResourcesIndirect(qpdf);
817   - dr.mergeResources(from_dr, &dr_map);
  813 + QPDFObjectHandle(from_dr).makeResourcesIndirect(qpdf);
  814 + QPDFObjectHandle(dr).mergeResources(from_dr, &dr_map);
818 815  
819 816 if (from_afdh->getNeedAppearances()) {
820 817 setNeedAppearances(true);
... ... @@ -836,15 +833,59 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
836 833 }
837 834 };
838 835  
839   - // Now do the actual copies.
840   -
841   - QPDFObjGen::set added_new_fields;
842   - for (auto annot: old_annots.aitems()) {
843   - if (annot.isStream()) {
844   - annot.warn("ignoring annotation that's a stream");
845   - continue;
  836 + // Traverse the field, copying kids, and preserving integrity.
  837 + auto traverse_field = [&](QPDFObjectHandle& top_field) -> void {
  838 + std::deque<Dictionary> queue({top_field});
  839 + QPDFObjGen::set seen;
  840 + for (auto it = queue.begin(); it != queue.end(); ++it) {
  841 + auto& obj = *it;
  842 + if (seen.add(obj)) {
  843 + Dictionary parent = obj["/Parent"];
  844 + if (parent.indirect()) {
  845 + auto parent_og = parent.id_gen();
  846 + if (orig_to_copy.contains(parent_og)) {
  847 + obj.replaceKey("/Parent", orig_to_copy[parent_og]);
  848 + } else {
  849 + parent.warn(
  850 + "while traversing field " + obj.id_gen().unparse(',') +
  851 + ", found parent (" + parent_og.unparse(',') +
  852 + ") that had not been seen, indicating likely invalid field structure");
  853 + }
  854 + }
  855 + size_t i = 0;
  856 + Array Kids = obj["/Kids"];
  857 + for (auto& kid: Kids) {
  858 + if (maybe_copy_object(kid)) {
  859 + Kids.set(i, kid);
  860 + queue.emplace_back(kid);
  861 + }
  862 + ++i;
  863 + }
  864 + adjustInheritedFields(
  865 + obj, override_da, from_default_da, override_q, from_default_q);
  866 + if (foreign) {
  867 + // Lazily initialize our /DR and the conflict map.
  868 + init_dr_map();
  869 + // The spec doesn't say anything about /DR on the field, but lots of writers
  870 + // put one there, and it is frequently the same as the document-level /DR.
  871 + // To avoid having the field's /DR point to information that we are not
  872 + // maintaining, just reset it to that if it exists. Empirical evidence
  873 + // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
  874 + // chrome, firefox, the mac Preview application, and several of the free
  875 + // readers on Linux all ignore /DR at the field level.
  876 + if (obj.contains("/DR")) {
  877 + obj.replaceKey("/DR", dr);
  878 + }
  879 + if (obj["/DA"].isString() && !dr_map.empty()) {
  880 + adjustDefaultAppearances(obj, dr_map);
  881 + }
  882 + }
  883 + }
846 884 }
  885 + };
847 886  
  887 + auto transform_annotation =
  888 + [&](QPDFObjectHandle& annot) -> std::tuple<QPDFObjectHandle, bool, bool> {
848 889 // Make copies of annotations and fields down to the appearance streams, preserving all
849 890 // internal referential integrity. When the incoming annotations are from a different file,
850 891 // we first copy them locally. Then, whether local or foreign, we copy them again so that if
... ... @@ -870,98 +911,57 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
870 911  
871 912 auto ffield = from_afdh->getFieldForAnnotation(annot);
872 913 auto ffield_oh = ffield.getObjectHandle();
873   - QPDFObjectHandle top_field;
874   - bool have_field = false;
875   - bool have_parent = false;
  914 + if (ffield.null()) {
  915 + return {{}, false, false};
  916 + }
876 917 if (ffield_oh.isStream()) {
877 918 ffield.warn("ignoring form field that's a stream");
878   - } else if (!ffield_oh.null() && !ffield_oh.isIndirect()) {
  919 + return {{}, false, false};
  920 + }
  921 + if (!ffield_oh.isIndirect()) {
879 922 ffield.warn("ignoring form field not indirect");
880   - } else if (!ffield.null()) {
881   - // A field and its associated annotation can be the same object. This matters because we
882   - // don't want to clone the annotation and field separately in this case.
883   - have_field = true;
884   - // Find the top-level field. It may be the field itself.
885   - top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
886   - if (foreign) {
887   - // copyForeignObject returns the same value if called multiple times with the same
888   - // field. Create/retrieve the local copy of the original field. This pulls over
889   - // everything the field references including annotations and appearance streams, but
890   - // it's harmless to call copyForeignObject on them too. They will already be copied,
891   - // so we'll get the right object back.
892   -
893   - // top_field and ffield_oh are known to be indirect.
894   - top_field = qpdf.copyForeignObject(top_field);
895   - ffield_oh = qpdf.copyForeignObject(ffield_oh);
896   - } else {
897   - // We don't need to add top_field to old_fields if it's foreign because the new copy
898   - // of the foreign field won't be referenced anywhere. It's just the starting point
899   - // for us to make an additional local copy of.
900   - old_fields.insert(top_field.getObjGen());
901   - }
  923 + return {{}, false, false};
  924 + }
  925 + // A field and its associated annotation can be the same object. This matters because we
  926 + // don't want to clone the annotation and field separately in this case.
  927 + // Find the top-level field. It may be the field itself.
  928 + bool have_parent = false;
  929 + QPDFObjectHandle top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
  930 + if (foreign) {
  931 + // copyForeignObject returns the same value if called multiple times with the same
  932 + // field. Create/retrieve the local copy of the original field. This pulls over
  933 + // everything the field references including annotations and appearance streams, but
  934 + // it's harmless to call copyForeignObject on them too. They will already be copied,
  935 + // so we'll get the right object back.
  936 +
  937 + // top_field and ffield_oh are known to be indirect.
  938 + top_field = qpdf.copyForeignObject(top_field);
  939 + ffield_oh = qpdf.copyForeignObject(ffield_oh);
  940 + } else {
  941 + // We don't need to add top_field to old_fields if it's foreign because the new copy
  942 + // of the foreign field won't be referenced anywhere. It's just the starting point
  943 + // for us to make an additional local copy of.
  944 + old_fields.insert(top_field.getObjGen());
  945 + }
902 946  
903   - // Traverse the field, copying kids, and preserving integrity.
904   - std::list<QPDFObjectHandle> queue;
905   - QPDFObjGen::set seen;
906   - if (maybe_copy_object(top_field)) {
907   - queue.push_back(top_field);
908   - }
909   - for (; !queue.empty(); queue.pop_front()) {
910   - auto& obj = queue.front();
911   - if (seen.add(obj)) {
912   - auto parent = obj.getKey("/Parent");
913   - if (parent.isIndirect()) {
914   - auto parent_og = parent.getObjGen();
915   - if (orig_to_copy.contains(parent_og)) {
916   - obj.replaceKey("/Parent", orig_to_copy[parent_og]);
917   - } else {
918   - parent.warn(
919   - "while traversing field " + obj.getObjGen().unparse(',') +
920   - ", found parent (" + parent_og.unparse(',') +
921   - ") that had not been seen, indicating likely invalid field "
922   - "structure");
923   - }
924   - }
925   - auto kids = obj.getKey("/Kids");
926   - int sz = static_cast<int>(kids.size());
927   - if (sz != 1 || kids.isArray()) {
928   - for (int i = 0; i < sz; ++i) {
929   - auto kid = kids.getArrayItem(i);
930   - if (maybe_copy_object(kid)) {
931   - kids.setArrayItem(i, kid);
932   - queue.emplace_back(kid);
933   - }
934   - }
935   - }
  947 + if (maybe_copy_object(top_field)) {
  948 + traverse_field(top_field);
  949 + }
936 950  
937   - if (override_da || override_q) {
938   - adjustInheritedFields(
939   - obj, override_da, from_default_da, override_q, from_default_q);
940   - }
941   - if (foreign) {
942   - // Lazily initialize our /DR and the conflict map.
943   - init_dr_map();
944   - // The spec doesn't say anything about /DR on the field, but lots of writers
945   - // put one there, and it is frequently the same as the document-level /DR.
946   - // To avoid having the field's /DR point to information that we are not
947   - // maintaining, just reset it to that if it exists. Empirical evidence
948   - // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
949   - // chrome, firefox, the mac Preview application, and several of the free
950   - // readers on Linux all ignore /DR at the field level.
951   - if (obj.hasKey("/DR")) {
952   - obj.replaceKey("/DR", dr);
953   - }
954   - }
955   - if (foreign && obj.getKey("/DA").isString() && (!dr_map.empty())) {
956   - adjustDefaultAppearances(obj, dr_map);
957   - }
958   - }
959   - }
  951 + // Now switch to copies. We already switched for top_field
  952 + maybe_copy_object(ffield_oh);
  953 + return {top_field, true, have_parent};
  954 + };
  955 +
  956 + // Now do the actual copies.
960 957  
961   - // Now switch to copies. We already switched for top_field
962   - maybe_copy_object(ffield_oh);
963   - ffield = QPDFFormFieldObjectHelper(ffield_oh);
  958 + QPDFObjGen::set added_new_fields;
  959 + for (auto annot: old_annots) {
  960 + if (annot.isStream()) {
  961 + annot.warn("ignoring annotation that's a stream");
  962 + continue;
964 963 }
  964 + auto [top_field, have_field, have_parent] = transform_annotation(annot);
965 965  
966 966 QTC::TC(
967 967 "qpdf",
... ... @@ -983,24 +983,25 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
983 983  
984 984 // Now we have copies, so we can safely mutate.
985 985 if (have_field && added_new_fields.add(top_field)) {
986   - new_fields.push_back(top_field);
  986 + new_fields.emplace_back(top_field);
987 987 }
988   - new_annots.push_back(annot);
  988 + new_annots.emplace_back(annot);
989 989  
990 990 // Identify and copy any appearance streams
991 991  
992 992 auto ah = QPDFAnnotationObjectHelper(annot);
993   - auto apdict = ah.getAppearanceDictionary();
  993 + Dictionary apdict = ah.getAppearanceDictionary();
994 994 std::vector<QPDFObjectHandle> streams;
995 995 auto replace_stream = [](auto& dict, auto& key, auto& old) {
996   - return dict.replaceKeyAndGetNew(key, old.copyStream());
  996 + dict.replaceKey(key, old.copyStream());
  997 + return dict[key];
997 998 };
998 999  
999   - for (auto& [key1, value1]: apdict.as_dictionary()) {
  1000 + for (auto& [key1, value1]: apdict) {
1000 1001 if (value1.isStream()) {
1001 1002 streams.emplace_back(replace_stream(apdict, key1, value1));
1002 1003 } else {
1003   - for (auto& [key2, value2]: value1.as_dictionary()) {
  1004 + for (auto& [key2, value2]: Dictionary(value1)) {
1004 1005 if (value2.isStream()) {
1005 1006 streams.emplace_back(replace_stream(value1, key2, value2));
1006 1007 }
... ... @@ -1010,25 +1011,23 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
1010 1011  
1011 1012 // Now we can safely mutate the annotation and its appearance streams.
1012 1013 for (auto& stream: streams) {
1013   - auto dict = stream.getDict();
1014   - auto omatrix = dict.getKey("/Matrix");
  1014 + Dictionary dict = stream.getDict();
  1015 +
1015 1016 QPDFMatrix apcm;
1016   - if (omatrix.isArray()) {
1017   - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper modify ap matrix");
1018   - auto m1 = omatrix.getArrayAsMatrix();
1019   - apcm = QPDFMatrix(m1);
  1017 + Array omatrix = dict["/Matrix"];
  1018 + if (omatrix) {
  1019 + apcm = QPDFMatrix(QPDFObjectHandle(omatrix).getArrayAsMatrix());
1020 1020 }
1021 1021 apcm.concat(cm);
1022   - auto new_matrix = QPDFObjectHandle::newFromMatrix(apcm);
1023   - if (omatrix.isArray() || (apcm != QPDFMatrix())) {
1024   - dict.replaceKey("/Matrix", new_matrix);
  1022 + if (omatrix || apcm != QPDFMatrix()) {
  1023 + dict.replaceKey("/Matrix", QPDFObjectHandle::newFromMatrix(apcm));
1025 1024 }
1026   - auto resources = dict.getKey("/Resources");
1027   - if ((!dr_map.empty()) && resources.isDictionary()) {
  1025 + Dictionary resources = dict["/Resources"];
  1026 + if (!dr_map.empty() && resources) {
1028 1027 adjustAppearanceStream(stream, dr_map);
1029 1028 }
1030 1029 }
1031   - auto rect = cm.transformRectangle(annot.getKey("/Rect").getArrayAsRectangle());
  1030 + auto rect = cm.transformRectangle(annot["/Rect"].getArrayAsRectangle());
1032 1031 annot.replaceKey("/Rect", QPDFObjectHandle::newFromRectangle(rect));
1033 1032 }
1034 1033 }
... ...
qpdf/qpdf.testcov
... ... @@ -487,7 +487,6 @@ QPDFFileSpecObjectHelper empty compat_name 0
487 487 QPDFFileSpecObjectHelper non-empty compat_name 0
488 488 QPDFAcroFormDocumentHelper copy annotation 3
489 489 QPDFAcroFormDocumentHelper field with parent 3
490   -QPDFAcroFormDocumentHelper modify ap matrix 0
491 490 QPDFJob pages keeping field from original 0
492 491 QPDFObjectHandle merge reuse 0
493 492 QPDFObjectHandle merge generate 0
... ...