Commit f9287c412f663d3dce55251ae236916af8d347db

Authored by m-holger
1 parent 0fa8fbbc

Refactor `QPDFAcroFormDocumentHelper::transformAnnotations`: extract `transform_…

…annotation` to increase code clarity.
libqpdf/QPDFAcroFormDocumentHelper.cc
@@ -831,15 +831,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -831,15 +831,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
831 } 831 }
832 }; 832 };
833 833
834 - // Now do the actual copies.  
835 -  
836 - QPDFObjGen::set added_new_fields;  
837 - for (auto annot: old_annots) {  
838 - if (annot.isStream()) {  
839 - annot.warn("ignoring annotation that's a stream");  
840 - continue;  
841 - }  
842 - 834 + auto transform_annotation =
  835 + [&](QPDFObjectHandle& annot) -> std::tuple<QPDFObjectHandle, bool, bool> {
843 // Make copies of annotations and fields down to the appearance streams, preserving all 836 // Make copies of annotations and fields down to the appearance streams, preserving all
844 // internal referential integrity. When the incoming annotations are from a different file, 837 // internal referential integrity. When the incoming annotations are from a different file,
845 // we first copy them locally. Then, whether local or foreign, we copy them again so that if 838 // we first copy them locally. Then, whether local or foreign, we copy them again so that if
@@ -865,98 +858,111 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -865,98 +858,111 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
865 858
866 auto ffield = from_afdh->getFieldForAnnotation(annot); 859 auto ffield = from_afdh->getFieldForAnnotation(annot);
867 auto ffield_oh = ffield.getObjectHandle(); 860 auto ffield_oh = ffield.getObjectHandle();
868 - QPDFObjectHandle top_field;  
869 - bool have_field = false;  
870 - bool have_parent = false; 861 + if (ffield.null()) {
  862 + return {{}, false, false};
  863 + }
871 if (ffield_oh.isStream()) { 864 if (ffield_oh.isStream()) {
872 ffield.warn("ignoring form field that's a stream"); 865 ffield.warn("ignoring form field that's a stream");
873 - } else if (!ffield_oh.null() && !ffield_oh.isIndirect()) { 866 + return {{}, false, false};
  867 + }
  868 + if (!ffield_oh.isIndirect()) {
874 ffield.warn("ignoring form field not indirect"); 869 ffield.warn("ignoring form field not indirect");
875 - } else if (!ffield.null()) {  
876 - // A field and its associated annotation can be the same object. This matters because we  
877 - // don't want to clone the annotation and field separately in this case.  
878 - have_field = true;  
879 - // Find the top-level field. It may be the field itself.  
880 - top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();  
881 - if (foreign) {  
882 - // copyForeignObject returns the same value if called multiple times with the same  
883 - // field. Create/retrieve the local copy of the original field. This pulls over  
884 - // everything the field references including annotations and appearance streams, but  
885 - // it's harmless to call copyForeignObject on them too. They will already be copied,  
886 - // so we'll get the right object back.  
887 -  
888 - // top_field and ffield_oh are known to be indirect.  
889 - top_field = qpdf.copyForeignObject(top_field);  
890 - ffield_oh = qpdf.copyForeignObject(ffield_oh);  
891 - } else {  
892 - // We don't need to add top_field to old_fields if it's foreign because the new copy  
893 - // of the foreign field won't be referenced anywhere. It's just the starting point  
894 - // for us to make an additional local copy of.  
895 - old_fields.insert(top_field.getObjGen());  
896 - } 870 + return {{}, false, false};
  871 + }
  872 + // A field and its associated annotation can be the same object. This matters because we
  873 + // don't want to clone the annotation and field separately in this case.
  874 + // Find the top-level field. It may be the field itself.
  875 + bool have_parent = false;
  876 + QPDFObjectHandle top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
  877 + if (foreign) {
  878 + // copyForeignObject returns the same value if called multiple times with the same
  879 + // field. Create/retrieve the local copy of the original field. This pulls over
  880 + // everything the field references including annotations and appearance streams, but
  881 + // it's harmless to call copyForeignObject on them too. They will already be copied,
  882 + // so we'll get the right object back.
  883 +
  884 + // top_field and ffield_oh are known to be indirect.
  885 + top_field = qpdf.copyForeignObject(top_field);
  886 + ffield_oh = qpdf.copyForeignObject(ffield_oh);
  887 + } else {
  888 + // We don't need to add top_field to old_fields if it's foreign because the new copy
  889 + // of the foreign field won't be referenced anywhere. It's just the starting point
  890 + // for us to make an additional local copy of.
  891 + old_fields.insert(top_field.getObjGen());
  892 + }
897 893
898 - // Traverse the field, copying kids, and preserving integrity.  
899 - std::list<QPDFObjectHandle> queue;  
900 - QPDFObjGen::set seen;  
901 - if (maybe_copy_object(top_field)) {  
902 - queue.emplace_back(top_field);  
903 - }  
904 - for (; !queue.empty(); queue.pop_front()) {  
905 - auto& obj = queue.front();  
906 - if (seen.add(obj)) {  
907 - auto parent = obj["/Parent"];  
908 - if (parent.isIndirect()) {  
909 - auto parent_og = parent.getObjGen();  
910 - if (orig_to_copy.contains(parent_og)) {  
911 - obj.replaceKey("/Parent", orig_to_copy[parent_og]);  
912 - } else {  
913 - parent.warn(  
914 - "while traversing field " + obj.getObjGen().unparse(',') +  
915 - ", found parent (" + parent_og.unparse(',') +  
916 - ") that had not been seen, indicating likely invalid field "  
917 - "structure");  
918 - } 894 + // Traverse the field, copying kids, and preserving integrity.
  895 + std::list<QPDFObjectHandle> queue;
  896 + QPDFObjGen::set seen;
  897 + if (maybe_copy_object(top_field)) {
  898 + queue.emplace_back(top_field);
  899 + }
  900 + for (; !queue.empty(); queue.pop_front()) {
  901 + auto& obj = queue.front();
  902 + if (seen.add(obj)) {
  903 + auto parent = obj["/Parent"];
  904 + if (parent.isIndirect()) {
  905 + auto parent_og = parent.getObjGen();
  906 + if (orig_to_copy.contains(parent_og)) {
  907 + obj.replaceKey("/Parent", orig_to_copy[parent_og]);
  908 + } else {
  909 + parent.warn(
  910 + "while traversing field " + obj.getObjGen().unparse(',') +
  911 + ", found parent (" + parent_og.unparse(',') +
  912 + ") that had not been seen, indicating likely invalid field structure");
919 } 913 }
920 - auto kids = obj["/Kids"];  
921 - int sz = static_cast<int>(kids.size());  
922 - if (sz != 1 || kids.isArray()) {  
923 - for (int i = 0; i < sz; ++i) {  
924 - auto kid = kids.getArrayItem(i);  
925 - if (maybe_copy_object(kid)) {  
926 - kids.setArrayItem(i, kid);  
927 - queue.emplace_back(kid);  
928 - } 914 + }
  915 + auto kids = obj["/Kids"];
  916 + int sz = static_cast<int>(kids.size());
  917 + if (sz != 1 || kids.isArray()) {
  918 + for (int i = 0; i < sz; ++i) {
  919 + auto kid = kids.getArrayItem(i);
  920 + if (maybe_copy_object(kid)) {
  921 + kids.setArrayItem(i, kid);
  922 + queue.emplace_back(kid);
929 } 923 }
930 } 924 }
  925 + }
931 926
932 - if (override_da || override_q) {  
933 - adjustInheritedFields(  
934 - obj, override_da, from_default_da, override_q, from_default_q);  
935 - }  
936 - if (foreign) {  
937 - // Lazily initialize our /DR and the conflict map.  
938 - init_dr_map();  
939 - // The spec doesn't say anything about /DR on the field, but lots of writers  
940 - // put one there, and it is frequently the same as the document-level /DR.  
941 - // To avoid having the field's /DR point to information that we are not  
942 - // maintaining, just reset it to that if it exists. Empirical evidence  
943 - // suggests that many readers, including Acrobat, Adobe Acrobat Reader,  
944 - // chrome, firefox, the mac Preview application, and several of the free  
945 - // readers on Linux all ignore /DR at the field level.  
946 - if (obj.hasKey("/DR")) {  
947 - obj.replaceKey("/DR", dr);  
948 - }  
949 - }  
950 - if (foreign && obj["/DA"].isString() && !dr_map.empty()) {  
951 - adjustDefaultAppearances(obj, dr_map); 927 + if (override_da || override_q) {
  928 + adjustInheritedFields(
  929 + obj, override_da, from_default_da, override_q, from_default_q);
  930 + }
  931 + if (foreign) {
  932 + // Lazily initialize our /DR and the conflict map.
  933 + init_dr_map();
  934 + // The spec doesn't say anything about /DR on the field, but lots of writers
  935 + // put one there, and it is frequently the same as the document-level /DR.
  936 + // To avoid having the field's /DR point to information that we are not
  937 + // maintaining, just reset it to that if it exists. Empirical evidence
  938 + // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
  939 + // chrome, firefox, the mac Preview application, and several of the free
  940 + // readers on Linux all ignore /DR at the field level.
  941 + if (obj.hasKey("/DR")) {
  942 + obj.replaceKey("/DR", dr);
952 } 943 }
953 } 944 }
  945 + if (foreign && obj["/DA"].isString() && !dr_map.empty()) {
  946 + adjustDefaultAppearances(obj, dr_map);
  947 + }
954 } 948 }
  949 + }
955 950
956 - // Now switch to copies. We already switched for top_field  
957 - maybe_copy_object(ffield_oh);  
958 - ffield = QPDFFormFieldObjectHelper(ffield_oh); 951 + // Now switch to copies. We already switched for top_field
  952 + maybe_copy_object(ffield_oh);
  953 + ffield = QPDFFormFieldObjectHelper(ffield_oh);
  954 + return {top_field, true, have_parent};
  955 + };
  956 +
  957 + // Now do the actual copies.
  958 +
  959 + QPDFObjGen::set added_new_fields;
  960 + for (auto annot: old_annots) {
  961 + if (annot.isStream()) {
  962 + annot.warn("ignoring annotation that's a stream");
  963 + continue;
959 } 964 }
  965 + auto [top_field, have_field, have_parent] = transform_annotation(annot);
960 966
961 QTC::TC( 967 QTC::TC(
962 "qpdf", 968 "qpdf",