Commit 0d5c57c151a35be81b98eef6051303301d253112

Authored by m-holger
1 parent f1800410

Revert "Merge pull request #1272 from m-holger/xref_table"

This reverts commit ff2a78f579ebdd06b417e34260a17dba06e71137, reversing
changes made to 8f54319f7a6514110f4b05cbbf1cb1c9fc8cb6a0.
include/qpdf/QPDF.hh
@@ -725,15 +725,165 @@ class QPDF @@ -725,15 +725,165 @@ class QPDF
725 void removePage(QPDFObjectHandle page); 725 void removePage(QPDFObjectHandle page);
726 // End legacy page helpers 726 // End legacy page helpers
727 727
728 - // End of the public API. The following classes and methods are for qpdf internal use only. 728 + // Writer class is restricted to QPDFWriter so that only it can call certain methods.
  729 + class Writer
  730 + {
  731 + friend class QPDFWriter;
  732 +
  733 + private:
  734 + static void
  735 + optimize(
  736 + QPDF& qpdf,
  737 + QPDFWriter::ObjTable const& obj,
  738 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  739 + {
  740 + return qpdf.optimize(obj, skip_stream_parameters);
  741 + }
  742 +
  743 + static void
  744 + getLinearizedParts(
  745 + QPDF& qpdf,
  746 + QPDFWriter::ObjTable const& obj,
  747 + std::vector<QPDFObjectHandle>& part4,
  748 + std::vector<QPDFObjectHandle>& part6,
  749 + std::vector<QPDFObjectHandle>& part7,
  750 + std::vector<QPDFObjectHandle>& part8,
  751 + std::vector<QPDFObjectHandle>& part9)
  752 + {
  753 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
  754 + }
  755 +
  756 + static void
  757 + generateHintStream(
  758 + QPDF& qpdf,
  759 + QPDFWriter::NewObjTable const& new_obj,
  760 + QPDFWriter::ObjTable const& obj,
  761 + std::shared_ptr<Buffer>& hint_stream,
  762 + int& S,
  763 + int& O,
  764 + bool compressed)
  765 + {
  766 + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
  767 + }
  768 +
  769 + static std::vector<QPDFObjGen>
  770 + getCompressibleObjGens(QPDF& qpdf)
  771 + {
  772 + return qpdf.getCompressibleObjVector();
  773 + }
  774 +
  775 + static std::vector<bool>
  776 + getCompressibleObjSet(QPDF& qpdf)
  777 + {
  778 + return qpdf.getCompressibleObjSet();
  779 + }
  780 +
  781 + static std::map<QPDFObjGen, QPDFXRefEntry> const&
  782 + getXRefTable(QPDF& qpdf)
  783 + {
  784 + return qpdf.getXRefTableInternal();
  785 + }
  786 +
  787 + static size_t
  788 + tableSize(QPDF& qpdf)
  789 + {
  790 + return qpdf.tableSize();
  791 + }
  792 + };
  793 +
  794 + // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
  795 + // references.
  796 + class Resolver
  797 + {
  798 + friend class QPDFObject;
  799 + friend class QPDF_Unresolved;
  800 +
  801 + private:
  802 + static QPDFObject*
  803 + resolved(QPDF* qpdf, QPDFObjGen og)
  804 + {
  805 + return qpdf->resolve(og);
  806 + }
  807 + };
  808 +
  809 + // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
  810 + class StreamCopier
  811 + {
  812 + friend class QPDFObjectHandle;
  813 +
  814 + private:
  815 + static void
  816 + copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
  817 + {
  818 + qpdf->copyStreamData(dest, src);
  819 + }
  820 + };
  821 +
  822 + // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
  823 + // special access to allow the parser to create unresolved objects and dangling references.
  824 + class ParseGuard
  825 + {
  826 + friend class QPDFParser;
  827 +
  828 + private:
  829 + ParseGuard(QPDF* qpdf) :
  830 + qpdf(qpdf)
  831 + {
  832 + if (qpdf) {
  833 + qpdf->inParse(true);
  834 + }
  835 + }
  836 +
  837 + static std::shared_ptr<QPDFObject>
  838 + getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
  839 + {
  840 + return qpdf->getObjectForParser(id, gen, parse_pdf);
  841 + }
729 842
730 - class Writer;  
731 - class Resolver;  
732 - class StreamCopier;  
733 - class ParseGuard;  
734 - class Pipe;  
735 - class JobSetter;  
736 - class Xref_table; 843 + ~ParseGuard()
  844 + {
  845 + if (qpdf) {
  846 + qpdf->inParse(false);
  847 + }
  848 + }
  849 + QPDF* qpdf;
  850 + };
  851 +
  852 + // Pipe class is restricted to QPDF_Stream.
  853 + class Pipe
  854 + {
  855 + friend class QPDF_Stream;
  856 +
  857 + private:
  858 + static bool
  859 + pipeStreamData(
  860 + QPDF* qpdf,
  861 + QPDFObjGen const& og,
  862 + qpdf_offset_t offset,
  863 + size_t length,
  864 + QPDFObjectHandle dict,
  865 + Pipeline* pipeline,
  866 + bool suppress_warnings,
  867 + bool will_retry)
  868 + {
  869 + return qpdf->pipeStreamData(
  870 + og, offset, length, dict, pipeline, suppress_warnings, will_retry);
  871 + }
  872 + };
  873 +
  874 + // JobSetter class is restricted to QPDFJob.
  875 + class JobSetter
  876 + {
  877 + friend class QPDFJob;
  878 +
  879 + private:
  880 + // Enable enhanced warnings for pdf file checking.
  881 + static void
  882 + setCheckMode(QPDF& qpdf, bool val)
  883 + {
  884 + qpdf.m->check_mode = val;
  885 + }
  886 + };
737 887
738 // For testing only -- do not add to DLL 888 // For testing only -- do not add to DLL
739 static bool test_json_validators(); 889 static bool test_json_validators();
@@ -748,18 +898,163 @@ class QPDF @@ -748,18 +898,163 @@ class QPDF
748 898
749 static std::string const qpdf_version; 899 static std::string const qpdf_version;
750 900
751 - class ObjCache;  
752 - class ObjCopier;  
753 - class EncryptionParameters;  
754 - class ForeignStreamData;  
755 - class CopiedStreamDataProvider;  
756 - class StringDecrypter;  
757 - class ResolveRecorder; 901 + class ObjCache
  902 + {
  903 + public:
  904 + ObjCache() :
  905 + end_before_space(0),
  906 + end_after_space(0)
  907 + {
  908 + }
  909 + ObjCache(
  910 + std::shared_ptr<QPDFObject> object,
  911 + qpdf_offset_t end_before_space = 0,
  912 + qpdf_offset_t end_after_space = 0) :
  913 + object(object),
  914 + end_before_space(end_before_space),
  915 + end_after_space(end_after_space)
  916 + {
  917 + }
  918 +
  919 + std::shared_ptr<QPDFObject> object;
  920 + qpdf_offset_t end_before_space;
  921 + qpdf_offset_t end_after_space;
  922 + };
  923 +
  924 + class ObjCopier
  925 + {
  926 + public:
  927 + std::map<QPDFObjGen, QPDFObjectHandle> object_map;
  928 + std::vector<QPDFObjectHandle> to_copy;
  929 + QPDFObjGen::set visiting;
  930 + };
  931 +
  932 + class EncryptionParameters
  933 + {
  934 + friend class QPDF;
  935 +
  936 + public:
  937 + EncryptionParameters();
  938 +
  939 + private:
  940 + bool encrypted;
  941 + bool encryption_initialized;
  942 + int encryption_V;
  943 + int encryption_R;
  944 + bool encrypt_metadata;
  945 + std::map<std::string, encryption_method_e> crypt_filters;
  946 + encryption_method_e cf_stream;
  947 + encryption_method_e cf_string;
  948 + encryption_method_e cf_file;
  949 + std::string provided_password;
  950 + std::string user_password;
  951 + std::string encryption_key;
  952 + std::string cached_object_encryption_key;
  953 + QPDFObjGen cached_key_og;
  954 + bool user_password_matched;
  955 + bool owner_password_matched;
  956 + };
  957 +
  958 + class ForeignStreamData
  959 + {
  960 + friend class QPDF;
  961 +
  962 + public:
  963 + ForeignStreamData(
  964 + std::shared_ptr<EncryptionParameters> encp,
  965 + std::shared_ptr<InputSource> file,
  966 + QPDFObjGen const& foreign_og,
  967 + qpdf_offset_t offset,
  968 + size_t length,
  969 + QPDFObjectHandle local_dict);
  970 +
  971 + private:
  972 + std::shared_ptr<EncryptionParameters> encp;
  973 + std::shared_ptr<InputSource> file;
  974 + QPDFObjGen foreign_og;
  975 + qpdf_offset_t offset;
  976 + size_t length;
  977 + QPDFObjectHandle local_dict;
  978 + };
  979 +
  980 + class CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
  981 + {
  982 + public:
  983 + CopiedStreamDataProvider(QPDF& destination_qpdf);
  984 + ~CopiedStreamDataProvider() override = default;
  985 + bool provideStreamData(
  986 + QPDFObjGen const& og,
  987 + Pipeline* pipeline,
  988 + bool suppress_warnings,
  989 + bool will_retry) override;
  990 + void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);
  991 + void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);
  992 +
  993 + private:
  994 + QPDF& destination_qpdf;
  995 + std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
  996 + std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;
  997 + };
  998 +
  999 + class StringDecrypter: public QPDFObjectHandle::StringDecrypter
  1000 + {
  1001 + friend class QPDF;
  1002 +
  1003 + public:
  1004 + StringDecrypter(QPDF* qpdf, QPDFObjGen const& og);
  1005 + ~StringDecrypter() override = default;
  1006 + void decryptString(std::string& val) override;
  1007 +
  1008 + private:
  1009 + QPDF* qpdf;
  1010 + QPDFObjGen og;
  1011 + };
  1012 +
  1013 + class ResolveRecorder
  1014 + {
  1015 + public:
  1016 + ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) :
  1017 + qpdf(qpdf),
  1018 + iter(qpdf->m->resolving.insert(og).first)
  1019 + {
  1020 + }
  1021 + virtual ~ResolveRecorder()
  1022 + {
  1023 + this->qpdf->m->resolving.erase(iter);
  1024 + }
  1025 +
  1026 + private:
  1027 + QPDF* qpdf;
  1028 + std::set<QPDFObjGen>::const_iterator iter;
  1029 + };
  1030 +
758 class JSONReactor; 1031 class JSONReactor;
759 1032
760 void parse(char const* password); 1033 void parse(char const* password);
761 void inParse(bool); 1034 void inParse(bool);
  1035 + void setTrailer(QPDFObjectHandle obj);
  1036 + void read_xref(qpdf_offset_t offset);
  1037 + bool resolveXRefTable();
  1038 + void reconstruct_xref(QPDFExc& e);
  1039 + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
  1040 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1041 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1042 + qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
  1043 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
  1044 + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
  1045 + std::pair<int, std::array<int, 3>>
  1046 + processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
  1047 + int processXRefSize(
  1048 + QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
  1049 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
  1050 + QPDFObjectHandle& dict,
  1051 + int max_num_entries,
  1052 + std::function<QPDFExc(std::string_view)> damaged);
  1053 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  1054 + void insertFreeXrefEntry(QPDFObjGen);
  1055 + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
762 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); 1056 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
  1057 + QPDFObjectHandle readTrailer();
763 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); 1058 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
764 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); 1059 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
765 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); 1060 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
@@ -786,7 +1081,11 @@ class QPDF @@ -786,7 +1081,11 @@ class QPDF
786 std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf); 1081 std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
787 std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen); 1082 std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
788 void removeObject(QPDFObjGen og); 1083 void removeObject(QPDFObjGen og);
789 - void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object); 1084 + void updateCache(
  1085 + QPDFObjGen const& og,
  1086 + std::shared_ptr<QPDFObject> const& object,
  1087 + qpdf_offset_t end_before_space,
  1088 + qpdf_offset_t end_after_space);
790 static QPDFExc damagedPDF( 1089 static QPDFExc damagedPDF(
791 InputSource& input, 1090 InputSource& input,
792 std::string const& object, 1091 std::string const& object,
@@ -823,6 +1122,7 @@ class QPDF @@ -823,6 +1122,7 @@ class QPDF
823 1122
824 // For QPDFWriter: 1123 // For QPDFWriter:
825 1124
  1125 + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
826 template <typename T> 1126 template <typename T>
827 void optimize_internal( 1127 void optimize_internal(
828 T const& object_stream_data, 1128 T const& object_stream_data,
@@ -831,7 +1131,6 @@ class QPDF @@ -831,7 +1131,6 @@ class QPDF
831 void optimize( 1131 void optimize(
832 QPDFWriter::ObjTable const& obj, 1132 QPDFWriter::ObjTable const& obj,
833 std::function<int(QPDFObjectHandle&)> skip_stream_parameters); 1133 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
834 - void optimize(Xref_table const& obj);  
835 size_t tableSize(); 1134 size_t tableSize();
836 1135
837 // Get lists of all objects in order according to the part of a linearized file that they belong 1136 // Get lists of all objects in order according to the part of a linearized file that they belong
@@ -897,19 +1196,200 @@ class QPDF @@ -897,19 +1196,200 @@ class QPDF
897 replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top); 1196 replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top);
898 void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream); 1197 void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream);
899 1198
900 - struct HPageOffsetEntry;  
901 - struct HPageOffset;  
902 - struct HSharedObjectEntry;  
903 - struct HSharedObject;  
904 - struct HGeneric;  
905 - struct LinParameters;  
906 - struct CHPageOffsetEntry;  
907 - struct CHPageOffset;  
908 - struct CHSharedObjectEntry;  
909 - struct CHSharedObject;  
910 - class ObjUser;  
911 - struct UpdateObjectMapsFrame;  
912 - class PatternFinder; 1199 + // Linearization Hint table structures.
  1200 + // Naming conventions:
  1201 +
  1202 + // HSomething is the Something Hint Table or table header
  1203 + // HSomethingEntry is an entry in the Something table
  1204 +
  1205 + // delta_something + min_something = something
  1206 + // nbits_something = number of bits required for something
  1207 +
  1208 + // something_offset is the pre-adjusted offset in the file. If >=
  1209 + // H0_offset, H0_length must be added to get an actual file
  1210 + // offset.
  1211 +
  1212 + // PDF 1.4: Table F.4
  1213 + struct HPageOffsetEntry
  1214 + {
  1215 + int delta_nobjects{0}; // 1
  1216 + qpdf_offset_t delta_page_length{0}; // 2
  1217 + // vectors' sizes = nshared_objects
  1218 + int nshared_objects{0}; // 3
  1219 + std::vector<int> shared_identifiers; // 4
  1220 + std::vector<int> shared_numerators; // 5
  1221 + qpdf_offset_t delta_content_offset{0}; // 6
  1222 + qpdf_offset_t delta_content_length{0}; // 7
  1223 + };
  1224 +
  1225 + // PDF 1.4: Table F.3
  1226 + struct HPageOffset
  1227 + {
  1228 + int min_nobjects{0}; // 1
  1229 + qpdf_offset_t first_page_offset{0}; // 2
  1230 + int nbits_delta_nobjects{0}; // 3
  1231 + int min_page_length{0}; // 4
  1232 + int nbits_delta_page_length{0}; // 5
  1233 + int min_content_offset{0}; // 6
  1234 + int nbits_delta_content_offset{0}; // 7
  1235 + int min_content_length{0}; // 8
  1236 + int nbits_delta_content_length{0}; // 9
  1237 + int nbits_nshared_objects{0}; // 10
  1238 + int nbits_shared_identifier{0}; // 11
  1239 + int nbits_shared_numerator{0}; // 12
  1240 + int shared_denominator{0}; // 13
  1241 + // vector size is npages
  1242 + std::vector<HPageOffsetEntry> entries;
  1243 + };
  1244 +
  1245 + // PDF 1.4: Table F.6
  1246 + struct HSharedObjectEntry
  1247 + {
  1248 + // Item 3 is a 128-bit signature (unsupported by Acrobat)
  1249 + int delta_group_length{0}; // 1
  1250 + int signature_present{0}; // 2 -- always 0
  1251 + int nobjects_minus_one{0}; // 4 -- always 0
  1252 + };
  1253 +
  1254 + // PDF 1.4: Table F.5
  1255 + struct HSharedObject
  1256 + {
  1257 + int first_shared_obj{0}; // 1
  1258 + qpdf_offset_t first_shared_offset{0}; // 2
  1259 + int nshared_first_page{0}; // 3
  1260 + int nshared_total{0}; // 4
  1261 + int nbits_nobjects{0}; // 5
  1262 + int min_group_length{0}; // 6
  1263 + int nbits_delta_group_length{0}; // 7
  1264 + // vector size is nshared_total
  1265 + std::vector<HSharedObjectEntry> entries;
  1266 + };
  1267 +
  1268 + // PDF 1.4: Table F.9
  1269 + struct HGeneric
  1270 + {
  1271 + int first_object{0}; // 1
  1272 + qpdf_offset_t first_object_offset{0}; // 2
  1273 + int nobjects{0}; // 3
  1274 + int group_length{0}; // 4
  1275 + };
  1276 +
  1277 + // Other linearization data structures
  1278 +
  1279 + // Initialized from Linearization Parameter dictionary
  1280 + struct LinParameters
  1281 + {
  1282 + qpdf_offset_t file_size{0}; // /L
  1283 + int first_page_object{0}; // /O
  1284 + qpdf_offset_t first_page_end{0}; // /E
  1285 + int npages{0}; // /N
  1286 + qpdf_offset_t xref_zero_offset{0}; // /T
  1287 + int first_page{0}; // /P
  1288 + qpdf_offset_t H_offset{0}; // offset of primary hint stream
  1289 + qpdf_offset_t H_length{0}; // length of primary hint stream
  1290 + };
  1291 +
  1292 + // Computed hint table value data structures. These tables contain the computed values on which
  1293 + // the hint table values are based. They exclude things like number of bits and store actual
  1294 + // values instead of mins and deltas. File offsets are also absolute rather than being offset
  1295 + // by the size of the primary hint table. We populate the hint table structures from these
  1296 + // during writing and compare the hint table values with these during validation. We ignore
  1297 + // some values for various reasons described in the code. Those values are omitted from these
  1298 + // structures. Note also that object numbers are object numbers from the input file, not the
  1299 + // output file.
  1300 +
  1301 + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
  1302 +
  1303 + struct CHPageOffsetEntry
  1304 + {
  1305 + int nobjects{0};
  1306 + int nshared_objects{0};
  1307 + // vectors' sizes = nshared_objects
  1308 + std::vector<int> shared_identifiers;
  1309 + };
  1310 +
  1311 + struct CHPageOffset
  1312 + {
  1313 + // vector size is npages
  1314 + std::vector<CHPageOffsetEntry> entries;
  1315 + };
  1316 +
  1317 + struct CHSharedObjectEntry
  1318 + {
  1319 + CHSharedObjectEntry(int object) :
  1320 + object(object)
  1321 + {
  1322 + }
  1323 +
  1324 + int object;
  1325 + };
  1326 +
  1327 + // PDF 1.4: Table F.5
  1328 + struct CHSharedObject
  1329 + {
  1330 + int first_shared_obj{0};
  1331 + int nshared_first_page{0};
  1332 + int nshared_total{0};
  1333 + // vector size is nshared_total
  1334 + std::vector<CHSharedObjectEntry> entries;
  1335 + };
  1336 +
  1337 + // No need for CHGeneric -- HGeneric is fine as is.
  1338 +
  1339 + // Data structures to support optimization -- implemented in QPDF_optimization.cc
  1340 +
  1341 + class ObjUser
  1342 + {
  1343 + public:
  1344 + enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
  1345 +
  1346 + // type is set to ou_bad
  1347 + ObjUser();
  1348 +
  1349 + // type must be ou_root
  1350 + ObjUser(user_e type);
  1351 +
  1352 + // type must be one of ou_page or ou_thumb
  1353 + ObjUser(user_e type, int pageno);
  1354 +
  1355 + // type must be one of ou_trailer_key or ou_root_key
  1356 + ObjUser(user_e type, std::string const& key);
  1357 +
  1358 + bool operator<(ObjUser const&) const;
  1359 +
  1360 + user_e ou_type;
  1361 + int pageno; // if ou_page;
  1362 + std::string key; // if ou_trailer_key or ou_root_key
  1363 + };
  1364 +
  1365 + struct UpdateObjectMapsFrame
  1366 + {
  1367 + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
  1368 +
  1369 + ObjUser const& ou;
  1370 + QPDFObjectHandle oh;
  1371 + bool top;
  1372 + };
  1373 +
  1374 + class PatternFinder: public InputSource::Finder
  1375 + {
  1376 + public:
  1377 + PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
  1378 + qpdf(qpdf),
  1379 + checker(checker)
  1380 + {
  1381 + }
  1382 + ~PatternFinder() override = default;
  1383 + bool
  1384 + check() override
  1385 + {
  1386 + return (this->qpdf.*checker)();
  1387 + }
  1388 +
  1389 + private:
  1390 + QPDF& qpdf;
  1391 + bool (QPDF::*checker)();
  1392 + };
913 1393
914 // Methods to support pattern finding 1394 // Methods to support pattern finding
915 static bool validatePDFVersion(char const*&, std::string& version); 1395 static bool validatePDFVersion(char const*&, std::string& version);
@@ -931,7 +1411,6 @@ class QPDF @@ -931,7 +1411,6 @@ class QPDF
931 QPDFObjectHandle 1411 QPDFObjectHandle
932 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); 1412 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
933 QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); 1413 QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
934 - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj);  
935 int lengthNextN(int first_object, int n); 1414 int lengthNextN(int first_object, int n);
936 void 1415 void
937 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); 1416 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@@ -977,7 +1456,6 @@ class QPDF @@ -977,7 +1456,6 @@ class QPDF
977 std::function<int(QPDFObjectHandle&)> skip_stream_parameters); 1456 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
978 void filterCompressedObjects(std::map<int, int> const& object_stream_data); 1457 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
979 void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); 1458 void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
980 - void filterCompressedObjects(Xref_table const& object_stream_data);  
981 1459
982 // JSON import 1460 // JSON import
983 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); 1461 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
@@ -1008,7 +1486,90 @@ class QPDF @@ -1008,7 +1486,90 @@ class QPDF
1008 return QIntC::to_ulonglong(i); 1486 return QIntC::to_ulonglong(i);
1009 } 1487 }
1010 1488
1011 - class Members; 1489 + class Members
  1490 + {
  1491 + friend class QPDF;
  1492 + friend class ResolveRecorder;
  1493 +
  1494 + public:
  1495 + QPDF_DLL
  1496 + ~Members() = default;
  1497 +
  1498 + private:
  1499 + Members();
  1500 + Members(Members const&) = delete;
  1501 +
  1502 + std::shared_ptr<QPDFLogger> log;
  1503 + unsigned long long unique_id{0};
  1504 + QPDFTokenizer tokenizer;
  1505 + std::shared_ptr<InputSource> file;
  1506 + std::string last_object_description;
  1507 + bool provided_password_is_hex_key{false};
  1508 + bool ignore_xref_streams{false};
  1509 + bool suppress_warnings{false};
  1510 + size_t max_warnings{0};
  1511 + bool attempt_recovery{true};
  1512 + bool check_mode{false};
  1513 + std::shared_ptr<EncryptionParameters> encp;
  1514 + std::string pdf_version;
  1515 + std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
  1516 + // Various tables are indexed by object id, with potential size id + 1
  1517 + int xref_table_max_id{std::numeric_limits<int>::max() - 1};
  1518 + qpdf_offset_t xref_table_max_offset{0};
  1519 + std::set<int> deleted_objects;
  1520 + std::map<QPDFObjGen, ObjCache> obj_cache;
  1521 + std::set<QPDFObjGen> resolving;
  1522 + QPDFObjectHandle trailer;
  1523 + std::vector<QPDFObjectHandle> all_pages;
  1524 + bool invalid_page_found{false};
  1525 + std::map<QPDFObjGen, int> pageobj_to_pages_pos;
  1526 + bool pushed_inherited_attributes_to_pages{false};
  1527 + bool ever_pushed_inherited_attributes_to_pages{false};
  1528 + bool ever_called_get_all_pages{false};
  1529 + std::vector<QPDFExc> warnings;
  1530 + std::map<unsigned long long, ObjCopier> object_copiers;
  1531 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
  1532 + // copied_stream_data_provider is owned by copied_streams
  1533 + CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
  1534 + bool reconstructed_xref{false};
  1535 + bool fixed_dangling_refs{false};
  1536 + bool immediate_copy_from{false};
  1537 + bool in_parse{false};
  1538 + bool parsed{false};
  1539 + std::set<int> resolved_object_streams;
  1540 +
  1541 + // Linearization data
  1542 + qpdf_offset_t first_xref_item_offset{0}; // actual value from file
  1543 + bool uncompressed_after_compressed{false};
  1544 + bool linearization_warnings{false};
  1545 +
  1546 + // Linearization parameter dictionary and hint table data: may be read from file or computed
  1547 + // prior to writing a linearized file
  1548 + QPDFObjectHandle lindict;
  1549 + LinParameters linp;
  1550 + HPageOffset page_offset_hints;
  1551 + HSharedObject shared_object_hints;
  1552 + HGeneric outline_hints;
  1553 +
  1554 + // Computed linearization data: used to populate above tables during writing and to compare
  1555 + // with them during validation. c_ means computed.
  1556 + LinParameters c_linp;
  1557 + CHPageOffset c_page_offset_data;
  1558 + CHSharedObject c_shared_object_data;
  1559 + HGeneric c_outline_data;
  1560 +
  1561 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  1562 + // Part numbers refer to the PDF 1.4 specification.
  1563 + std::vector<QPDFObjectHandle> part4;
  1564 + std::vector<QPDFObjectHandle> part6;
  1565 + std::vector<QPDFObjectHandle> part7;
  1566 + std::vector<QPDFObjectHandle> part8;
  1567 + std::vector<QPDFObjectHandle> part9;
  1568 +
  1569 + // Optimization data
  1570 + std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
  1571 + std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
  1572 + };
1012 1573
1013 // Keep all member variables inside the Members object, which we dynamically allocate. This 1574 // Keep all member variables inside the Members object, which we dynamically allocate. This
1014 // makes it possible to add new private members without breaking binary compatibility. 1575 // makes it possible to add new private members without breaking binary compatibility.
libqpdf/QPDF.cc
1 #include <qpdf/qpdf-config.h> // include first for large file support 1 #include <qpdf/qpdf-config.h> // include first for large file support
2 2
3 -#include <qpdf/QPDF_private.hh> 3 +#include <qpdf/QPDF.hh>
4 4
5 #include <array> 5 #include <array>
6 #include <atomic> 6 #include <atomic>
@@ -32,51 +32,67 @@ @@ -32,51 +32,67 @@
32 // being static as well. 32 // being static as well.
33 std::string const QPDF::qpdf_version(QPDF_VERSION); 33 std::string const QPDF::qpdf_version(QPDF_VERSION);
34 34
  35 +static char const* EMPTY_PDF = (
  36 + // force line break
  37 + "%PDF-1.3\n"
  38 + "1 0 obj\n"
  39 + "<< /Type /Catalog /Pages 2 0 R >>\n"
  40 + "endobj\n"
  41 + "2 0 obj\n"
  42 + "<< /Type /Pages /Kids [] /Count 0 >>\n"
  43 + "endobj\n"
  44 + "xref\n"
  45 + "0 3\n"
  46 + "0000000000 65535 f \n"
  47 + "0000000009 00000 n \n"
  48 + "0000000058 00000 n \n"
  49 + "trailer << /Size 3 /Root 1 0 R >>\n"
  50 + "startxref\n"
  51 + "110\n"
  52 + "%%EOF\n");
  53 +
35 namespace 54 namespace
36 { 55 {
37 - class InvalidInputSource final: public InputSource 56 + class InvalidInputSource: public InputSource
38 { 57 {
39 public: 58 public:
40 - InvalidInputSource(std::string const& name) :  
41 - name(name)  
42 - {  
43 - }  
44 - ~InvalidInputSource() final = default; 59 + ~InvalidInputSource() override = default;
45 qpdf_offset_t 60 qpdf_offset_t
46 - findAndSkipNextEOL() final 61 + findAndSkipNextEOL() override
47 { 62 {
48 throwException(); 63 throwException();
49 return 0; 64 return 0;
50 } 65 }
51 std::string const& 66 std::string const&
52 - getName() const final 67 + getName() const override
53 { 68 {
  69 + static std::string name("closed input source");
54 return name; 70 return name;
55 } 71 }
56 qpdf_offset_t 72 qpdf_offset_t
57 - tell() final 73 + tell() override
58 { 74 {
59 throwException(); 75 throwException();
60 return 0; 76 return 0;
61 } 77 }
62 void 78 void
63 - seek(qpdf_offset_t offset, int whence) final 79 + seek(qpdf_offset_t offset, int whence) override
64 { 80 {
65 throwException(); 81 throwException();
66 } 82 }
67 void 83 void
68 - rewind() final 84 + rewind() override
69 { 85 {
70 throwException(); 86 throwException();
71 } 87 }
72 size_t 88 size_t
73 - read(char* buffer, size_t length) final 89 + read(char* buffer, size_t length) override
74 { 90 {
75 throwException(); 91 throwException();
76 return 0; 92 return 0;
77 } 93 }
78 void 94 void
79 - unreadCh(char ch) final 95 + unreadCh(char ch) override
80 { 96 {
81 throwException(); 97 throwException();
82 } 98 }
@@ -89,8 +105,6 @@ namespace @@ -89,8 +105,6 @@ namespace
89 "source. QPDF operations are invalid before processFile (or " 105 "source. QPDF operations are invalid before processFile (or "
90 "another process method) or after closeInputSource"); 106 "another process method) or after closeInputSource");
91 } 107 }
92 -  
93 - std::string const& name;  
94 }; 108 };
95 } // namespace 109 } // namespace
96 110
@@ -182,17 +196,15 @@ QPDF::EncryptionParameters::EncryptionParameters() : @@ -182,17 +196,15 @@ QPDF::EncryptionParameters::EncryptionParameters() :
182 { 196 {
183 } 197 }
184 198
185 -QPDF::Members::Members(QPDF& qpdf) : 199 +QPDF::Members::Members() :
186 log(QPDFLogger::defaultLogger()), 200 log(QPDFLogger::defaultLogger()),
187 - file_sp(new InvalidInputSource(no_input_name)),  
188 - file(file_sp.get()),  
189 - encp(new EncryptionParameters),  
190 - xref_table(qpdf, file) 201 + file(new InvalidInputSource()),
  202 + encp(new EncryptionParameters)
191 { 203 {
192 } 204 }
193 205
194 QPDF::QPDF() : 206 QPDF::QPDF() :
195 - m(new Members(*this)) 207 + m(new Members())
196 { 208 {
197 m->tokenizer.allowEOF(); 209 m->tokenizer.allowEOF();
198 // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout 210 // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
@@ -213,6 +225,9 @@ QPDF::~QPDF() @@ -213,6 +225,9 @@ QPDF::~QPDF()
213 // are reachable from this object to release their association with this QPDF. Direct objects 225 // are reachable from this object to release their association with this QPDF. Direct objects
214 // are not destroyed since they can be moved to other QPDF objects safely. 226 // are not destroyed since they can be moved to other QPDF objects safely.
215 227
  228 + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
  229 + // the xref table anyway just to prevent any possibility of resolve() succeeding.
  230 + m->xref_table.clear();
216 for (auto const& iter: m->obj_cache) { 231 for (auto const& iter: m->obj_cache) {
217 iter.second.object->disconnect(); 232 iter.second.object->disconnect();
218 if (iter.second.object->getTypeCode() != ::ot_null) { 233 if (iter.second.object->getTypeCode() != ::ot_null) {
@@ -256,17 +271,14 @@ QPDF::processMemoryFile( @@ -256,17 +271,14 @@ QPDF::processMemoryFile(
256 void 271 void
257 QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password) 272 QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
258 { 273 {
259 - m->file_sp = source;  
260 - m->file = source.get(); 274 + m->file = source;
261 parse(password); 275 parse(password);
262 } 276 }
263 277
264 void 278 void
265 QPDF::closeInputSource() 279 QPDF::closeInputSource()
266 { 280 {
267 - m->no_input_name = "closed input source";  
268 - m->file_sp = std::shared_ptr<InputSource>(new InvalidInputSource(m->no_input_name));  
269 - m->file = m->file_sp.get(); 281 + m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
270 } 282 }
271 283
272 void 284 void
@@ -278,9 +290,7 @@ QPDF::setPasswordIsHexKey(bool val) @@ -278,9 +290,7 @@ QPDF::setPasswordIsHexKey(bool val)
278 void 290 void
279 QPDF::emptyPDF() 291 QPDF::emptyPDF()
280 { 292 {
281 - m->pdf_version = "1.3";  
282 - m->no_input_name = "empty PDF";  
283 - m->xref_table.initialize_empty(); 293 + processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
284 } 294 }
285 295
286 void 296 void
@@ -293,7 +303,7 @@ QPDF::registerStreamFilter( @@ -293,7 +303,7 @@ QPDF::registerStreamFilter(
293 void 303 void
294 QPDF::setIgnoreXRefStreams(bool val) 304 QPDF::setIgnoreXRefStreams(bool val)
295 { 305 {
296 - m->xref_table.ignore_streams(val); 306 + m->ignore_xref_streams = val;
297 } 307 }
298 308
299 std::shared_ptr<QPDFLogger> 309 std::shared_ptr<QPDFLogger>
@@ -331,7 +341,6 @@ void @@ -331,7 +341,6 @@ void
331 QPDF::setAttemptRecovery(bool val) 341 QPDF::setAttemptRecovery(bool val)
332 { 342 {
333 m->attempt_recovery = val; 343 m->attempt_recovery = val;
334 - m->xref_table.attempt_recovery(val);  
335 } 344 }
336 345
337 void 346 void
@@ -401,9 +410,7 @@ QPDF::findHeader() @@ -401,9 +410,7 @@ QPDF::findHeader()
401 // PDF header, all explicit offsets in the file are such that 0 points to the beginning 410 // PDF header, all explicit offsets in the file are such that 0 points to the beginning
402 // of the header. 411 // of the header.
403 QTC::TC("qpdf", "QPDF global offset"); 412 QTC::TC("qpdf", "QPDF global offset");
404 - m->file_sp =  
405 - std::shared_ptr<InputSource>(new OffsetInputSource(m->file_sp, global_offset));  
406 - m->file = m->file_sp.get(); 413 + m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
407 } 414 }
408 } 415 }
409 return valid; 416 return valid;
@@ -436,8 +443,46 @@ QPDF::parse(char const* password) @@ -436,8 +443,46 @@ QPDF::parse(char const* password)
436 m->pdf_version = "1.2"; 443 m->pdf_version = "1.2";
437 } 444 }
438 445
439 - m->xref_table.initialize(); 446 + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
  447 + // 30 characters to leave room for the startxref stuff.
  448 + m->file->seek(0, SEEK_END);
  449 + qpdf_offset_t end_offset = m->file->tell();
  450 + m->xref_table_max_offset = end_offset;
  451 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  452 + // scenarios at least 3 bytes are required.
  453 + if (m->xref_table_max_id > m->xref_table_max_offset / 3) {
  454 + m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
  455 + }
  456 + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
  457 + PatternFinder sf(*this, &QPDF::findStartxref);
  458 + qpdf_offset_t xref_offset = 0;
  459 + if (m->file->findLast("startxref", start_offset, 0, sf)) {
  460 + xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
  461 + }
  462 +
  463 + try {
  464 + if (xref_offset == 0) {
  465 + QTC::TC("qpdf", "QPDF can't find startxref");
  466 + throw damagedPDF("", 0, "can't find startxref");
  467 + }
  468 + try {
  469 + read_xref(xref_offset);
  470 + } catch (QPDFExc&) {
  471 + throw;
  472 + } catch (std::exception& e) {
  473 + throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());
  474 + }
  475 + } catch (QPDFExc& e) {
  476 + if (m->attempt_recovery) {
  477 + reconstruct_xref(e);
  478 + QTC::TC("qpdf", "QPDF reconstructed xref table");
  479 + } else {
  480 + throw;
  481 + }
  482 + }
  483 +
440 initializeEncryption(); 484 initializeEncryption();
  485 + m->parsed = true;
441 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { 486 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
442 // QPDFs created from JSON have an empty xref table and no root object yet. 487 // QPDFs created from JSON have an empty xref table and no root object yet.
443 throw damagedPDF("", 0, "unable to find page tree"); 488 throw damagedPDF("", 0, "unable to find page tree");
@@ -479,77 +524,18 @@ QPDF::warn( @@ -479,77 +524,18 @@ QPDF::warn(
479 } 524 }
480 525
481 void 526 void
482 -QPDF::Xref_table::initialize_empty() 527 +QPDF::setTrailer(QPDFObjectHandle obj)
483 { 528 {
484 - initialized_ = true;  
485 - trailer_ = QPDFObjectHandle::newDictionary();  
486 - auto rt = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());  
487 - auto pgs = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());  
488 - pgs.replaceKey("/Type", QPDFObjectHandle::newName("/Pages"));  
489 - pgs.replaceKey("/Kids", QPDFObjectHandle::newArray());  
490 - pgs.replaceKey("/Count", QPDFObjectHandle::newInteger(0));  
491 - rt.replaceKey("/Type", QPDFObjectHandle::newName("/Catalog"));  
492 - rt.replaceKey("/Pages", pgs);  
493 - trailer_.replaceKey("/Root", rt);  
494 - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(3));  
495 -}  
496 -  
497 -void  
498 -QPDF::Xref_table::initialize_json()  
499 -{  
500 - initialized_ = true;  
501 - table.resize(1);  
502 - trailer_ = QPDFObjectHandle::newDictionary();  
503 - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(1));  
504 -}  
505 -  
506 -void  
507 -QPDF::Xref_table::initialize()  
508 -{  
509 - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra  
510 - // 30 characters to leave room for the startxref stuff.  
511 - file->seek(0, SEEK_END);  
512 - qpdf_offset_t end_offset = file->tell();  
513 - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic  
514 - // scenarios at least 3 bytes are required.  
515 - if (max_id_ > end_offset / 3) {  
516 - max_id_ = static_cast<int>(end_offset / 3);  
517 - }  
518 - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);  
519 - PatternFinder sf(qpdf, &QPDF::findStartxref);  
520 - qpdf_offset_t xref_offset = 0;  
521 - if (file->findLast("startxref", start_offset, 0, sf)) {  
522 - xref_offset = QUtil::string_to_ll(read_token().getValue().c_str());  
523 - }  
524 -  
525 - try {  
526 - if (xref_offset == 0) {  
527 - QTC::TC("qpdf", "QPDF can't find startxref");  
528 - throw damaged_pdf("can't find startxref");  
529 - }  
530 - try {  
531 - read(xref_offset);  
532 - } catch (QPDFExc&) {  
533 - throw;  
534 - } catch (std::exception& e) {  
535 - throw damaged_pdf(std::string("error reading xref: ") + e.what());  
536 - }  
537 - } catch (QPDFExc& e) {  
538 - if (attempt_recovery_) {  
539 - reconstruct(e);  
540 - QTC::TC("qpdf", "QPDF reconstructed xref table");  
541 - } else {  
542 - throw;  
543 - } 529 + if (m->trailer) {
  530 + return;
544 } 531 }
545 -  
546 - initialized_ = true; 532 + m->trailer = obj;
547 } 533 }
548 534
549 void 535 void
550 -QPDF::Xref_table::reconstruct(QPDFExc& e) 536 +QPDF::reconstruct_xref(QPDFExc& e)
551 { 537 {
552 - if (reconstructed_) { 538 + if (m->reconstructed_xref) {
553 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because 539 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
554 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. 540 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
555 throw e; 541 throw e;
@@ -557,93 +543,78 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e) @@ -557,93 +543,78 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
557 543
558 // If recovery generates more than 1000 warnings, the file is so severely damaged that there 544 // If recovery generates more than 1000 warnings, the file is so severely damaged that there
559 // probably is no point trying to continue. 545 // probably is no point trying to continue.
560 - const auto max_warnings = qpdf.m->warnings.size() + 1000U; 546 + const auto max_warnings = m->warnings.size() + 1000U;
561 auto check_warnings = [this, max_warnings]() { 547 auto check_warnings = [this, max_warnings]() {
562 - if (qpdf.m->warnings.size() > max_warnings) {  
563 - throw damaged_pdf("too many errors while reconstructing cross-reference table"); 548 + if (m->warnings.size() > max_warnings) {
  549 + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
564 } 550 }
565 }; 551 };
566 552
567 - reconstructed_ = true; 553 + m->reconstructed_xref = true;
568 // We may find more objects, which may contain dangling references. 554 // We may find more objects, which may contain dangling references.
569 - qpdf.m->fixed_dangling_refs = false; 555 + m->fixed_dangling_refs = false;
570 556
571 - warn_damaged("file is damaged");  
572 - qpdf.warn(e);  
573 - warn_damaged("Attempting to reconstruct cross-reference table"); 557 + warn(damagedPDF("", 0, "file is damaged"));
  558 + warn(e);
  559 + warn(damagedPDF("", 0, "Attempting to reconstruct cross-reference table"));
574 560
575 // Delete all references to type 1 (uncompressed) objects 561 // Delete all references to type 1 (uncompressed) objects
576 - for (auto& iter: table) {  
577 - if (iter.type() == 1) {  
578 - iter = {}; 562 + std::set<QPDFObjGen> to_delete;
  563 + for (auto const& iter: m->xref_table) {
  564 + if (iter.second.getType() == 1) {
  565 + to_delete.insert(iter.first);
579 } 566 }
580 } 567 }
  568 + for (auto const& iter: to_delete) {
  569 + m->xref_table.erase(iter);
  570 + }
581 571
582 - std::vector<std::tuple<int, int, qpdf_offset_t>> objects;  
583 - std::vector<qpdf_offset_t> trailers;  
584 - int max_found = 0;  
585 -  
586 - file->seek(0, SEEK_END);  
587 - qpdf_offset_t eof = file->tell();  
588 - file->seek(0, SEEK_SET); 572 + m->file->seek(0, SEEK_END);
  573 + qpdf_offset_t eof = m->file->tell();
  574 + m->file->seek(0, SEEK_SET);
589 // Don't allow very long tokens here during recovery. All the interesting tokens are covered. 575 // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
590 static size_t const MAX_LEN = 10; 576 static size_t const MAX_LEN = 10;
591 - while (file->tell() < eof) {  
592 - QPDFTokenizer::Token t1 = read_token(MAX_LEN);  
593 - qpdf_offset_t token_start = file->tell() - toO(t1.getValue().length()); 577 + while (m->file->tell() < eof) {
  578 + QPDFTokenizer::Token t1 = readToken(*m->file, MAX_LEN);
  579 + qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
594 if (t1.isInteger()) { 580 if (t1.isInteger()) {
595 - auto pos = file->tell();  
596 - QPDFTokenizer::Token t2 = read_token(MAX_LEN);  
597 - if (t2.isInteger() && read_token(MAX_LEN).isWord("obj")) { 581 + auto pos = m->file->tell();
  582 + QPDFTokenizer::Token t2 = readToken(*m->file, MAX_LEN);
  583 + if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) {
598 int obj = QUtil::string_to_int(t1.getValue().c_str()); 584 int obj = QUtil::string_to_int(t1.getValue().c_str());
599 int gen = QUtil::string_to_int(t2.getValue().c_str()); 585 int gen = QUtil::string_to_int(t2.getValue().c_str());
600 - if (obj <= max_id_) {  
601 - objects.emplace_back(obj, gen, token_start);  
602 - if (obj > max_found) {  
603 - max_found = obj;  
604 - } 586 + if (obj <= m->xref_table_max_id) {
  587 + insertReconstructedXrefEntry(obj, token_start, gen);
605 } else { 588 } else {
606 - warn_damaged("ignoring object with impossibly large id " + std::to_string(obj)); 589 + warn(damagedPDF(
  590 + "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
607 } 591 }
608 } 592 }
609 - file->seek(pos, SEEK_SET);  
610 - } else if (!trailer_ && t1.isWord("trailer")) {  
611 - trailers.emplace_back(file->tell());  
612 - }  
613 - file->findAndSkipNextEOL();  
614 - }  
615 -  
616 - table.resize(toS(max_found) + 1);  
617 -  
618 - for (auto tr: trailers) {  
619 - file->seek(tr, SEEK_SET);  
620 - auto t = read_trailer();  
621 - if (!t.isDictionary()) {  
622 - // Oh well. It was worth a try.  
623 - } else {  
624 - trailer_ = t;  
625 - break; 593 + m->file->seek(pos, SEEK_SET);
  594 + } else if (!m->trailer && t1.isWord("trailer")) {
  595 + auto pos = m->file->tell();
  596 + QPDFObjectHandle t = readTrailer();
  597 + if (!t.isDictionary()) {
  598 + // Oh well. It was worth a try.
  599 + } else {
  600 + setTrailer(t);
  601 + }
  602 + m->file->seek(pos, SEEK_SET);
626 } 603 }
627 check_warnings(); 604 check_warnings();
  605 + m->file->findAndSkipNextEOL();
628 } 606 }
  607 + m->deleted_objects.clear();
629 608
630 - auto rend = objects.rend();  
631 - for (auto it = objects.rbegin(); it != rend; it++) {  
632 - auto [obj, gen, token_start] = *it;  
633 - insert(obj, 1, token_start, gen);  
634 - check_warnings();  
635 - }  
636 -  
637 - if (!trailer_) { 609 + if (!m->trailer) {
638 qpdf_offset_t max_offset{0}; 610 qpdf_offset_t max_offset{0};
639 // If there are any xref streams, take the last one to appear. 611 // If there are any xref streams, take the last one to appear.
640 - int i = -1;  
641 - for (auto const& item: table) {  
642 - ++i;  
643 - if (item.type() != 1) { 612 + for (auto const& iter: m->xref_table) {
  613 + auto entry = iter.second;
  614 + if (entry.getType() != 1) {
644 continue; 615 continue;
645 } 616 }
646 - auto oh = qpdf.getObject(i, item.gen()); 617 + auto oh = getObjectByObjGen(iter.first);
647 try { 618 try {
648 if (!oh.isStreamOfType("/XRef")) { 619 if (!oh.isStreamOfType("/XRef")) {
649 continue; 620 continue;
@@ -651,44 +622,44 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e) @@ -651,44 +622,44 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
651 } catch (std::exception&) { 622 } catch (std::exception&) {
652 continue; 623 continue;
653 } 624 }
654 - auto offset = item.offset(); 625 + auto offset = entry.getOffset();
655 if (offset > max_offset) { 626 if (offset > max_offset) {
656 max_offset = offset; 627 max_offset = offset;
657 - trailer_ = oh.getDict(); 628 + setTrailer(oh.getDict());
658 } 629 }
659 check_warnings(); 630 check_warnings();
660 } 631 }
661 if (max_offset > 0) { 632 if (max_offset > 0) {
662 try { 633 try {
663 - read(max_offset); 634 + read_xref(max_offset);
664 } catch (std::exception&) { 635 } catch (std::exception&) {
665 - throw damaged_pdf(  
666 - "error decoding candidate xref stream while recovering damaged file"); 636 + throw damagedPDF(
  637 + "", 0, "error decoding candidate xref stream while recovering damaged file");
667 } 638 }
668 QTC::TC("qpdf", "QPDF recover xref stream"); 639 QTC::TC("qpdf", "QPDF recover xref stream");
669 } 640 }
670 } 641 }
671 642
672 - if (!trailer_) { 643 + if (!m->trailer) {
673 // We could check the last encountered object to see if it was an xref stream. If so, we 644 // We could check the last encountered object to see if it was an xref stream. If so, we
674 // could try to get the trailer from there. This may make it possible to recover files with 645 // could try to get the trailer from there. This may make it possible to recover files with
675 // bad startxref pointers even when they have object streams. 646 // bad startxref pointers even when they have object streams.
676 647
677 - throw damaged_pdf("unable to find trailer dictionary while recovering damaged file"); 648 + throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
678 } 649 }
679 - if (table.empty()) { 650 + if (m->xref_table.empty()) {
680 // We cannot check for an empty xref table in parse because empty tables are valid when 651 // We cannot check for an empty xref table in parse because empty tables are valid when
681 // creating QPDF objects from JSON. 652 // creating QPDF objects from JSON.
682 - throw damaged_pdf("unable to find objects while recovering damaged file"); 653 + throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
683 } 654 }
684 check_warnings(); 655 check_warnings();
685 - if (!initialized_) {  
686 - initialized_ = true;  
687 - qpdf.getAllPages(); 656 + if (!m->parsed) {
  657 + m->parsed = true;
  658 + getAllPages();
688 check_warnings(); 659 check_warnings();
689 - if (qpdf.m->all_pages.empty()) {  
690 - initialized_ = false;  
691 - throw damaged_pdf("unable to find any pages while recovering damaged file"); 660 + if (m->all_pages.empty()) {
  661 + m->parsed = false;
  662 + throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
692 } 663 }
693 } 664 }
694 // We could iterate through the objects looking for streams and try to find objects inside of 665 // We could iterate through the objects looking for streams and try to find objects inside of
@@ -699,7 +670,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e) @@ -699,7 +670,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
699 } 670 }
700 671
701 void 672 void
702 -QPDF::Xref_table::read(qpdf_offset_t xref_offset) 673 +QPDF::read_xref(qpdf_offset_t xref_offset)
703 { 674 {
704 std::map<int, int> free_table; 675 std::map<int, int> free_table;
705 std::set<qpdf_offset_t> visited; 676 std::set<qpdf_offset_t> visited;
@@ -707,7 +678,7 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -707,7 +678,7 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
707 visited.insert(xref_offset); 678 visited.insert(xref_offset);
708 char buf[7]; 679 char buf[7];
709 memset(buf, 0, sizeof(buf)); 680 memset(buf, 0, sizeof(buf));
710 - file->seek(xref_offset, SEEK_SET); 681 + m->file->seek(xref_offset, SEEK_SET);
711 // Some files miss the mark a little with startxref. We could do a better job of searching 682 // Some files miss the mark a little with startxref. We could do a better job of searching
712 // in the neighborhood for something that looks like either an xref table or stream, but the 683 // in the neighborhood for something that looks like either an xref table or stream, but the
713 // simple heuristic of skipping whitespace can help with the xref table case and is harmless 684 // simple heuristic of skipping whitespace can help with the xref table case and is harmless
@@ -716,11 +687,11 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -716,11 +687,11 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
716 bool skipped_space = false; 687 bool skipped_space = false;
717 while (!done) { 688 while (!done) {
718 char ch; 689 char ch;
719 - if (1 == file->read(&ch, 1)) { 690 + if (1 == m->file->read(&ch, 1)) {
720 if (QUtil::is_space(ch)) { 691 if (QUtil::is_space(ch)) {
721 skipped_space = true; 692 skipped_space = true;
722 } else { 693 } else {
723 - file->unreadCh(ch); 694 + m->file->unreadCh(ch);
724 done = true; 695 done = true;
725 } 696 }
726 } else { 697 } else {
@@ -729,13 +700,13 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -729,13 +700,13 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
729 } 700 }
730 } 701 }
731 702
732 - file->read(buf, sizeof(buf) - 1); 703 + m->file->read(buf, sizeof(buf) - 1);
733 // The PDF spec says xref must be followed by a line terminator, but files exist in the wild 704 // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
734 // where it is terminated by arbitrary whitespace. 705 // where it is terminated by arbitrary whitespace.
735 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { 706 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
736 if (skipped_space) { 707 if (skipped_space) {
737 QTC::TC("qpdf", "QPDF xref skipped space"); 708 QTC::TC("qpdf", "QPDF xref skipped space");
738 - warn_damaged("extraneous whitespace seen before xref"); 709 + warn(damagedPDF("", 0, "extraneous whitespace seen before xref"));
739 } 710 }
740 QTC::TC( 711 QTC::TC(
741 "qpdf", 712 "qpdf",
@@ -749,38 +720,54 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -749,38 +720,54 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
749 while (QUtil::is_space(buf[skip])) { 720 while (QUtil::is_space(buf[skip])) {
750 ++skip; 721 ++skip;
751 } 722 }
752 - xref_offset = process_section(xref_offset + skip); 723 + xref_offset = read_xrefTable(xref_offset + skip);
753 } else { 724 } else {
754 - xref_offset = read_stream(xref_offset); 725 + xref_offset = read_xrefStream(xref_offset);
755 } 726 }
756 if (visited.count(xref_offset) != 0) { 727 if (visited.count(xref_offset) != 0) {
757 QTC::TC("qpdf", "QPDF xref loop"); 728 QTC::TC("qpdf", "QPDF xref loop");
758 - throw damaged_pdf("loop detected following xref tables"); 729 + throw damagedPDF("", 0, "loop detected following xref tables");
759 } 730 }
760 } 731 }
761 732
762 - if (!trailer_) {  
763 - throw damaged_pdf("unable to find trailer while reading xref"); 733 + if (!m->trailer) {
  734 + throw damagedPDF("", 0, "unable to find trailer while reading xref");
764 } 735 }
765 - int size = trailer_.getKey("/Size").getIntValueAsInt();  
766 -  
767 - if (size < 3) {  
768 - throw damaged_pdf("too few objects - file can't have a page tree"); 736 + int size = m->trailer.getKey("/Size").getIntValueAsInt();
  737 + int max_obj = 0;
  738 + if (!m->xref_table.empty()) {
  739 + max_obj = m->xref_table.rbegin()->first.getObj();
  740 + }
  741 + if (!m->deleted_objects.empty()) {
  742 + max_obj = std::max(max_obj, *(m->deleted_objects.rbegin()));
  743 + }
  744 + if ((size < 1) || (size - 1 != max_obj)) {
  745 + QTC::TC("qpdf", "QPDF xref size mismatch");
  746 + warn(damagedPDF(
  747 + "",
  748 + 0,
  749 + ("reported number of objects (" + std::to_string(size) +
  750 + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
769 } 751 }
770 752
771 - // We are no longer reporting what the highest id in the xref table is. I don't think it adds  
772 - // anything. If we want to report more detail, we should report the total number of missing  
773 - // entries, including missing entries before the last actual entry. 753 + // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
  754 + // never depend on its being set.
  755 + m->deleted_objects.clear();
  756 +
  757 + // Make sure we keep only the highest generation for any object.
  758 + QPDFObjGen last_og{-1, 0};
  759 + for (auto const& item: m->xref_table) {
  760 + auto id = item.first.getObj();
  761 + if (id == last_og.getObj() && id > 0) {
  762 + removeObject(last_og);
  763 + }
  764 + last_og = item.first;
  765 + }
774 } 766 }
775 767
776 -QPDF::Xref_table::Subsection  
777 -QPDF::Xref_table::subsection(std::string const& line) 768 +bool
  769 +QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
778 { 770 {
779 - auto terminate = [this]() -> void {  
780 - QTC::TC("qpdf", "QPDF invalid xref");  
781 - throw damaged_table("xref syntax invalid");  
782 - };  
783 -  
784 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated 771 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
785 // buffer. 772 // buffer.
786 char const* p = line.c_str(); 773 char const* p = line.c_str();
@@ -792,7 +779,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line) @@ -792,7 +779,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
792 } 779 }
793 // Require digit 780 // Require digit
794 if (!QUtil::is_digit(*p)) { 781 if (!QUtil::is_digit(*p)) {
795 - terminate(); 782 + return false;
796 } 783 }
797 // Gather digits 784 // Gather digits
798 std::string obj_str; 785 std::string obj_str;
@@ -801,7 +788,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line) @@ -801,7 +788,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
801 } 788 }
802 // Require space 789 // Require space
803 if (!QUtil::is_space(*p)) { 790 if (!QUtil::is_space(*p)) {
804 - terminate(); 791 + return false;
805 } 792 }
806 // Skip spaces 793 // Skip spaces
807 while (QUtil::is_space(*p)) { 794 while (QUtil::is_space(*p)) {
@@ -809,7 +796,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line) @@ -809,7 +796,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
809 } 796 }
810 // Require digit 797 // Require digit
811 if (!QUtil::is_digit(*p)) { 798 if (!QUtil::is_digit(*p)) {
812 - terminate(); 799 + return false;
813 } 800 }
814 // Gather digits 801 // Gather digits
815 std::string num_str; 802 std::string num_str;
@@ -820,82 +807,18 @@ QPDF::Xref_table::subsection(std::string const&amp; line) @@ -820,82 +807,18 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
820 while (QUtil::is_space(*p)) { 807 while (QUtil::is_space(*p)) {
821 ++p; 808 ++p;
822 } 809 }
823 - auto obj = QUtil::string_to_int(obj_str.c_str());  
824 - auto count = QUtil::string_to_int(num_str.c_str());  
825 - if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {  
826 - throw damaged_table("xref table subsection header contains impossibly large entry");  
827 - }  
828 - return {obj, count, file->getLastOffset() + toI(p - start)};  
829 -}  
830 -  
831 -std::vector<QPDF::Xref_table::Subsection>  
832 -QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)  
833 -{  
834 - std::vector<QPDF::Xref_table::Subsection> result;  
835 - qpdf_offset_t f1 = 0;  
836 - int f2 = 0;  
837 - char type = '\0';  
838 -  
839 - file->seek(start, SEEK_SET);  
840 -  
841 - while (true) {  
842 - line.assign(50, '\0');  
843 - file->read(line.data(), line.size());  
844 - auto [obj, num, offset] = result.emplace_back(subsection(line));  
845 - file->seek(offset, SEEK_SET);  
846 - for (qpdf_offset_t i = obj; i - num < obj; ++i) {  
847 - if (!read_entry(f1, f2, type)) {  
848 - QTC::TC("qpdf", "QPDF invalid xref entry");  
849 - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");  
850 - }  
851 - }  
852 - qpdf_offset_t pos = file->tell();  
853 - if (read_token().isWord("trailer")) {  
854 - return result;  
855 - } else {  
856 - file->seek(pos, SEEK_SET);  
857 - }  
858 - }  
859 -}  
860 -  
861 -// Optimistically read and parse all subsection headers. If an error is encountered return the  
862 -// result of bad_subsections.  
863 -std::vector<QPDF::Xref_table::Subsection>  
864 -QPDF::Xref_table::subsections(std::string& line)  
865 -{  
866 - auto recovery_offset = file->tell();  
867 - try {  
868 - std::vector<QPDF::Xref_table::Subsection> result;  
869 -  
870 - while (true) {  
871 - line.assign(50, '\0');  
872 - file->read(line.data(), line.size());  
873 - auto& sub = result.emplace_back(subsection(line));  
874 - auto count = std::get<1>(sub);  
875 - auto offset = std::get<2>(sub);  
876 - file->seek(offset + 20 * toO(count) - 1, SEEK_SET);  
877 - file->read(line.data(), 1);  
878 - if (!(line[0] == '\n' || line[0] == '\n')) {  
879 - return bad_subsections(line, recovery_offset);  
880 - }  
881 - qpdf_offset_t pos = file->tell();  
882 - if (read_token().isWord("trailer")) {  
883 - return result;  
884 - } else {  
885 - file->seek(pos, SEEK_SET);  
886 - }  
887 - }  
888 - } catch (...) {  
889 - return bad_subsections(line, recovery_offset);  
890 - } 810 + bytes = toI(p - start);
  811 + obj = QUtil::string_to_int(obj_str.c_str());
  812 + num = QUtil::string_to_int(num_str.c_str());
  813 + return true;
891 } 814 }
892 815
893 bool 816 bool
894 -QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) 817 +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
895 { 818 {
896 // Reposition after initial read attempt and reread. 819 // Reposition after initial read attempt and reread.
897 - file->seek(file->getLastOffset(), SEEK_SET);  
898 - auto line = file->readLine(30); 820 + m->file->seek(m->file->getLastOffset(), SEEK_SET);
  821 + auto line = m->file->readLine(30);
899 822
900 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated 823 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
901 // buffer. 824 // buffer.
@@ -961,7 +884,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type) @@ -961,7 +884,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
961 } 884 }
962 885
963 if (invalid) { 886 if (invalid) {
964 - qpdf.warn(damaged_table("accepting invalid xref table entry")); 887 + warn(damagedPDF("xref table", "accepting invalid xref table entry"));
965 } 888 }
966 889
967 f1 = QUtil::string_to_ll(f1_str.c_str()); 890 f1 = QUtil::string_to_ll(f1_str.c_str());
@@ -973,10 +896,10 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type) @@ -973,10 +896,10 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
973 // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return 896 // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
974 // result. 897 // result.
975 bool 898 bool
976 -QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) 899 +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
977 { 900 {
978 std::array<char, 21> line; 901 std::array<char, 21> line;
979 - if (file->read(line.data(), 20) != 20) { 902 + if (m->file->read(line.data(), 20) != 20) {
980 // C++20: [[unlikely]] 903 // C++20: [[unlikely]]
981 return false; 904 return false;
982 } 905 }
@@ -1022,78 +945,84 @@ QPDF::Xref_table::read_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type) @@ -1022,78 +945,84 @@ QPDF::Xref_table::read_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
1022 return true; 945 return true;
1023 } 946 }
1024 } 947 }
1025 - return read_bad_entry(f1, f2, type); 948 + return read_bad_xrefEntry(f1, f2, type);
1026 } 949 }
1027 950
1028 // Read a single cross-reference table section and associated trailer. 951 // Read a single cross-reference table section and associated trailer.
1029 qpdf_offset_t 952 qpdf_offset_t
1030 -QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) 953 +QPDF::read_xrefTable(qpdf_offset_t xref_offset)
1031 { 954 {
1032 - file->seek(xref_offset, SEEK_SET); 955 + m->file->seek(xref_offset, SEEK_SET);
1033 std::string line; 956 std::string line;
1034 - auto subs = subsections(line);  
1035 -  
1036 - auto cur_trailer_offset = file->tell();  
1037 - auto cur_trailer = read_trailer();  
1038 - if (!cur_trailer.isDictionary()) {  
1039 - QTC::TC("qpdf", "QPDF missing trailer");  
1040 - throw qpdf.damagedPDF("", "expected trailer dictionary");  
1041 - }  
1042 -  
1043 - if (!trailer_) {  
1044 - unsigned int sz;  
1045 - trailer_ = cur_trailer;  
1046 -  
1047 - if (!trailer_.hasKey("/Size")) {  
1048 - QTC::TC("qpdf", "QPDF trailer lacks size");  
1049 - throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");  
1050 - }  
1051 - if (!trailer_.getKey("/Size").getValueAsUInt(sz)) {  
1052 - QTC::TC("qpdf", "QPDF trailer size not integer");  
1053 - throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");  
1054 - }  
1055 -  
1056 - table.resize(sz);  
1057 - }  
1058 -  
1059 - for (auto [obj, num, offset]: subs) {  
1060 - file->seek(offset, SEEK_SET); 957 + while (true) {
  958 + line.assign(50, '\0');
  959 + m->file->read(line.data(), line.size());
  960 + int obj = 0;
  961 + int num = 0;
  962 + int bytes = 0;
  963 + if (!parse_xrefFirst(line, obj, num, bytes)) {
  964 + QTC::TC("qpdf", "QPDF invalid xref");
  965 + throw damagedPDF("xref table", "xref syntax invalid");
  966 + }
  967 + m->file->seek(m->file->getLastOffset() + bytes, SEEK_SET);
1061 for (qpdf_offset_t i = obj; i - num < obj; ++i) { 968 for (qpdf_offset_t i = obj; i - num < obj; ++i) {
1062 if (i == 0) { 969 if (i == 0) {
1063 // This is needed by checkLinearization() 970 // This is needed by checkLinearization()
1064 - first_item_offset_ = file->tell(); 971 + m->first_xref_item_offset = m->file->tell();
1065 } 972 }
1066 // For xref_table, these will always be small enough to be ints 973 // For xref_table, these will always be small enough to be ints
1067 qpdf_offset_t f1 = 0; 974 qpdf_offset_t f1 = 0;
1068 int f2 = 0; 975 int f2 = 0;
1069 char type = '\0'; 976 char type = '\0';
1070 - if (!read_entry(f1, f2, type)) {  
1071 - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")"); 977 + if (!read_xrefEntry(f1, f2, type)) {
  978 + QTC::TC("qpdf", "QPDF invalid xref entry");
  979 + throw damagedPDF(
  980 + "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
1072 } 981 }
1073 if (type == 'f') { 982 if (type == 'f') {
1074 - insert_free(QPDFObjGen(toI(i), f2)); 983 + insertFreeXrefEntry(QPDFObjGen(toI(i), f2));
1075 } else { 984 } else {
1076 - insert(toI(i), 1, f1, f2); 985 + insertXrefEntry(toI(i), 1, f1, f2);
1077 } 986 }
1078 } 987 }
1079 - qpdf_offset_t pos = file->tell();  
1080 - if (read_token().isWord("trailer")) { 988 + qpdf_offset_t pos = m->file->tell();
  989 + if (readToken(*m->file).isWord("trailer")) {
1081 break; 990 break;
1082 } else { 991 } else {
1083 - file->seek(pos, SEEK_SET); 992 + m->file->seek(pos, SEEK_SET);
  993 + }
  994 + }
  995 +
  996 + // Set offset to previous xref table if any
  997 + QPDFObjectHandle cur_trailer = readTrailer();
  998 + if (!cur_trailer.isDictionary()) {
  999 + QTC::TC("qpdf", "QPDF missing trailer");
  1000 + throw damagedPDF("", "expected trailer dictionary");
  1001 + }
  1002 +
  1003 + if (!m->trailer) {
  1004 + setTrailer(cur_trailer);
  1005 +
  1006 + if (!m->trailer.hasKey("/Size")) {
  1007 + QTC::TC("qpdf", "QPDF trailer lacks size");
  1008 + throw damagedPDF("trailer", "trailer dictionary lacks /Size key");
  1009 + }
  1010 + if (!m->trailer.getKey("/Size").isInteger()) {
  1011 + QTC::TC("qpdf", "QPDF trailer size not integer");
  1012 + throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1084 } 1013 }
1085 } 1014 }
1086 1015
1087 if (cur_trailer.hasKey("/XRefStm")) { 1016 if (cur_trailer.hasKey("/XRefStm")) {
1088 - if (ignore_streams_) { 1017 + if (m->ignore_xref_streams) {
1089 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); 1018 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
1090 } else { 1019 } else {
1091 if (cur_trailer.getKey("/XRefStm").isInteger()) { 1020 if (cur_trailer.getKey("/XRefStm").isInteger()) {
1092 // Read the xref stream but disregard any return value -- we'll use our trailer's 1021 // Read the xref stream but disregard any return value -- we'll use our trailer's
1093 // /Prev key instead of the xref stream's. 1022 // /Prev key instead of the xref stream's.
1094 - (void)read_stream(cur_trailer.getKey("/XRefStm").getIntValue()); 1023 + (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
1095 } else { 1024 } else {
1096 - throw qpdf.damagedPDF("xref stream", cur_trailer_offset, "invalid /XRefStm"); 1025 + throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
1097 } 1026 }
1098 } 1027 }
1099 } 1028 }
@@ -1101,8 +1030,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) @@ -1101,8 +1030,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
1101 if (cur_trailer.hasKey("/Prev")) { 1030 if (cur_trailer.hasKey("/Prev")) {
1102 if (!cur_trailer.getKey("/Prev").isInteger()) { 1031 if (!cur_trailer.getKey("/Prev").isInteger()) {
1103 QTC::TC("qpdf", "QPDF trailer prev not integer"); 1032 QTC::TC("qpdf", "QPDF trailer prev not integer");
1104 - throw qpdf.damagedPDF(  
1105 - "trailer", cur_trailer_offset, "/Prev key in trailer dictionary is not an integer"); 1033 + throw damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer");
1106 } 1034 }
1107 QTC::TC("qpdf", "QPDF prev key in trailer dictionary"); 1035 QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
1108 return cur_trailer.getKey("/Prev").getIntValue(); 1036 return cur_trailer.getKey("/Prev").getIntValue();
@@ -1113,35 +1041,34 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) @@ -1113,35 +1041,34 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
1113 1041
1114 // Read a single cross-reference stream. 1042 // Read a single cross-reference stream.
1115 qpdf_offset_t 1043 qpdf_offset_t
1116 -QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset) 1044 +QPDF::read_xrefStream(qpdf_offset_t xref_offset)
1117 { 1045 {
1118 - if (!ignore_streams_) { 1046 + if (!m->ignore_xref_streams) {
1119 QPDFObjGen x_og; 1047 QPDFObjGen x_og;
1120 QPDFObjectHandle xref_obj; 1048 QPDFObjectHandle xref_obj;
1121 try { 1049 try {
1122 - xref_obj = qpdf.readObjectAtOffset(  
1123 - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true); 1050 + xref_obj =
  1051 + readObjectAtOffset(false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
1124 } catch (QPDFExc&) { 1052 } catch (QPDFExc&) {
1125 // ignore -- report error below 1053 // ignore -- report error below
1126 } 1054 }
1127 if (xref_obj.isStreamOfType("/XRef")) { 1055 if (xref_obj.isStreamOfType("/XRef")) {
1128 QTC::TC("qpdf", "QPDF found xref stream"); 1056 QTC::TC("qpdf", "QPDF found xref stream");
1129 - return process_stream(xref_offset, xref_obj); 1057 + return processXRefStream(xref_offset, xref_obj);
1130 } 1058 }
1131 } 1059 }
1132 1060
1133 QTC::TC("qpdf", "QPDF can't find xref"); 1061 QTC::TC("qpdf", "QPDF can't find xref");
1134 - throw qpdf.damagedPDF("", xref_offset, "xref not found"); 1062 + throw damagedPDF("", xref_offset, "xref not found");
1135 return 0; // unreachable 1063 return 0; // unreachable
1136 } 1064 }
1137 1065
1138 // Return the entry size of the xref stream and the processed W array. 1066 // Return the entry size of the xref stream and the processed W array.
1139 std::pair<int, std::array<int, 3>> 1067 std::pair<int, std::array<int, 3>>
1140 -QPDF::Xref_table::process_W(  
1141 - QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged) 1068 +QPDF::processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
1142 { 1069 {
1143 auto W_obj = dict.getKey("/W"); 1070 auto W_obj = dict.getKey("/W");
1144 - if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() && 1071 + if (!(W_obj.isArray() && (W_obj.getArrayNItems() >= 3) && W_obj.getArrayItem(0).isInteger() &&
1145 W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) { 1072 W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
1146 throw damaged("Cross-reference stream does not have a proper /W key"); 1073 throw damaged("Cross-reference stream does not have a proper /W key");
1147 } 1074 }
@@ -1166,10 +1093,9 @@ QPDF::Xref_table::process_W( @@ -1166,10 +1093,9 @@ QPDF::Xref_table::process_W(
1166 return {entry_size, W}; 1093 return {entry_size, W};
1167 } 1094 }
1168 1095
1169 -// Validate Size entry and return the maximum number of entries that the xref stream can contain and  
1170 -// the value of the Size entry.  
1171 -std::pair<int, size_t>  
1172 -QPDF::Xref_table::process_Size( 1096 +// Validate Size key and return the maximum number of entries that the xref stream can contain.
  1097 +int
  1098 +QPDF::processXRefSize(
1173 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged) 1099 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
1174 { 1100 {
1175 // Number of entries is limited by the highest possible object id and stream size. 1101 // Number of entries is limited by the highest possible object id and stream size.
@@ -1188,12 +1114,12 @@ QPDF::Xref_table::process_Size( @@ -1188,12 +1114,12 @@ QPDF::Xref_table::process_Size(
1188 throw damaged("Cross-reference stream has an impossibly large /Size key"); 1114 throw damaged("Cross-reference stream has an impossibly large /Size key");
1189 } 1115 }
1190 // We are not validating that Size <= (Size key of parent xref / trailer). 1116 // We are not validating that Size <= (Size key of parent xref / trailer).
1191 - return {max_num_entries, toS(size)}; 1117 + return max_num_entries;
1192 } 1118 }
1193 1119
1194 // Return the number of entries of the xref stream and the processed Index array. 1120 // Return the number of entries of the xref stream and the processed Index array.
1195 std::pair<int, std::vector<std::pair<int, int>>> 1121 std::pair<int, std::vector<std::pair<int, int>>>
1196 -QPDF::Xref_table::process_Index( 1122 +QPDF::processXRefIndex(
1197 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged) 1123 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
1198 { 1124 {
1199 auto size = dict.getKey("/Size").getIntValueAsInt(); 1125 auto size = dict.getKey("/Size").getIntValueAsInt();
@@ -1260,17 +1186,17 @@ QPDF::Xref_table::process_Index( @@ -1260,17 +1186,17 @@ QPDF::Xref_table::process_Index(
1260 } 1186 }
1261 1187
1262 qpdf_offset_t 1188 qpdf_offset_t
1263 -QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) 1189 +QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
1264 { 1190 {
1265 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { 1191 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
1266 - return qpdf.damagedPDF("xref stream", xref_offset, msg.data()); 1192 + return damagedPDF("xref stream", xref_offset, msg.data());
1267 }; 1193 };
1268 1194
1269 auto dict = xref_obj.getDict(); 1195 auto dict = xref_obj.getDict();
1270 1196
1271 - auto [entry_size, W] = process_W(dict, damaged);  
1272 - auto [max_num_entries, size] = process_Size(dict, entry_size, damaged);  
1273 - auto [num_entries, indx] = process_Index(dict, max_num_entries, damaged); 1197 + auto [entry_size, W] = processXRefW(dict, damaged);
  1198 + int max_num_entries = processXRefSize(dict, entry_size, damaged);
  1199 + auto [num_entries, indx] = processXRefIndex(dict, max_num_entries, damaged);
1274 1200
1275 std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized); 1201 std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
1276 size_t actual_size = bp->getSize(); 1202 size_t actual_size = bp->getSize();
@@ -1283,16 +1209,8 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr @@ -1283,16 +1209,8 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1283 if (expected_size > actual_size) { 1209 if (expected_size > actual_size) {
1284 throw x; 1210 throw x;
1285 } else { 1211 } else {
1286 - qpdf.warn(x);  
1287 - }  
1288 - }  
1289 -  
1290 - if (!trailer_) {  
1291 - trailer_ = dict;  
1292 - if (size > toS(max_id_)) {  
1293 - throw damaged("Cross-reference stream /Size entry is impossibly large"); 1212 + warn(x);
1294 } 1213 }
1295 - table.resize(size);  
1296 } 1214 }
1297 1215
1298 bool saw_first_compressed_object = false; 1216 bool saw_first_compressed_object = false;
@@ -1320,29 +1238,33 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr @@ -1320,29 +1238,33 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1320 // object record, in which case the generation number appears as the third field. 1238 // object record, in which case the generation number appears as the third field.
1321 if (saw_first_compressed_object) { 1239 if (saw_first_compressed_object) {
1322 if (fields[0] != 2) { 1240 if (fields[0] != 2) {
1323 - uncompressed_after_compressed_ = true; 1241 + m->uncompressed_after_compressed = true;
1324 } 1242 }
1325 } else if (fields[0] == 2) { 1243 } else if (fields[0] == 2) {
1326 saw_first_compressed_object = true; 1244 saw_first_compressed_object = true;
1327 } 1245 }
1328 if (obj == 0) { 1246 if (obj == 0) {
1329 // This is needed by checkLinearization() 1247 // This is needed by checkLinearization()
1330 - first_item_offset_ = xref_offset; 1248 + m->first_xref_item_offset = xref_offset;
1331 } else if (fields[0] == 0) { 1249 } else if (fields[0] == 0) {
1332 // Ignore fields[2], which we don't care about in this case. This works around the 1250 // Ignore fields[2], which we don't care about in this case. This works around the
1333 // issue of some PDF files that put invalid values, like -1, here for deleted 1251 // issue of some PDF files that put invalid values, like -1, here for deleted
1334 // objects. 1252 // objects.
1335 - insert_free(QPDFObjGen(obj, 0)); 1253 + insertFreeXrefEntry(QPDFObjGen(obj, 0));
1336 } else { 1254 } else {
1337 - insert(obj, toI(fields[0]), fields[1], toI(fields[2])); 1255 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1338 } 1256 }
1339 ++obj; 1257 ++obj;
1340 } 1258 }
1341 } 1259 }
1342 1260
  1261 + if (!m->trailer) {
  1262 + setTrailer(dict);
  1263 + }
  1264 +
1343 if (dict.hasKey("/Prev")) { 1265 if (dict.hasKey("/Prev")) {
1344 if (!dict.getKey("/Prev").isInteger()) { 1266 if (!dict.getKey("/Prev").isInteger()) {
1345 - throw qpdf.damagedPDF( 1267 + throw damagedPDF(
1346 "xref stream", "/Prev key in xref stream dictionary is not an integer"); 1268 "xref stream", "/Prev key in xref stream dictionary is not an integer");
1347 } 1269 }
1348 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary"); 1270 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
@@ -1353,7 +1275,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr @@ -1353,7 +1275,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1353 } 1275 }
1354 1276
1355 void 1277 void
1356 -QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) 1278 +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1357 { 1279 {
1358 // Populate the xref table in such a way that the first reference to an object that we see, 1280 // Populate the xref table in such a way that the first reference to an object that we see,
1359 // which is the one in the latest xref table in which it appears, is the one that gets stored. 1281 // which is the one in the latest xref table in which it appears, is the one that gets stored.
@@ -1362,35 +1284,23 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1362,35 +1284,23 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1362 // If there is already an entry for this object and generation in the table, it means that a 1284 // If there is already an entry for this object and generation in the table, it means that a
1363 // later xref table has registered this object. Disregard this one. 1285 // later xref table has registered this object. Disregard this one.
1364 1286
1365 - int new_gen = f0 == 2 ? 0 : f2;  
1366 -  
1367 - if (!(obj > 0 && static_cast<size_t>(obj) < table.size() && 0 <= f2 && new_gen < 65535)) {  
1368 - // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There  
1369 - // is probably no point having another warning but we could count invalid items in order to  
1370 - // decide when to give up.  
1371 - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); 1287 + if (obj > m->xref_table_max_id) {
  1288 + // ignore impossibly large object ids or object ids > Size.
1372 return; 1289 return;
1373 } 1290 }
1374 1291
1375 - auto& entry = table[static_cast<size_t>(obj)];  
1376 - auto old_type = entry.type();  
1377 -  
1378 - if (!old_type && entry.gen() > 0) {  
1379 - // At the moment we are processing the updates last to first and therefore the gen doesn't  
1380 - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need  
1381 - // to be revisited when we want to support incremental updates or more comprhensive  
1382 - // checking. 1292 + if (m->deleted_objects.count(obj)) {
1383 QTC::TC("qpdf", "QPDF xref deleted object"); 1293 QTC::TC("qpdf", "QPDF xref deleted object");
1384 return; 1294 return;
1385 } 1295 }
1386 1296
1387 if (f0 == 2 && static_cast<int>(f1) == obj) { 1297 if (f0 == 2 && static_cast<int>(f1) == obj) {
1388 - qpdf.warn(qpdf.damagedPDF(  
1389 - "xref stream", "self-referential object stream " + std::to_string(obj))); 1298 + warn(damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj)));
1390 return; 1299 return;
1391 } 1300 }
1392 1301
1393 - if (old_type && entry.gen() >= new_gen) { 1302 + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
  1303 + if (!created) {
1394 QTC::TC("qpdf", "QPDF xref reused object"); 1304 QTC::TC("qpdf", "QPDF xref reused object");
1395 return; 1305 return;
1396 } 1306 }
@@ -1398,129 +1308,85 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1398,129 +1308,85 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1398 switch (f0) { 1308 switch (f0) {
1399 case 1: 1309 case 1:
1400 // f2 is generation 1310 // f2 is generation
1401 - QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0);  
1402 - entry = {f2, Uncompressed(f1)}; 1311 + QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
  1312 + iter->second = QPDFXRefEntry(f1);
1403 break; 1313 break;
1404 1314
1405 case 2: 1315 case 2:
1406 - entry = {0, Compressed(toI(f1), f2)};  
1407 - object_streams_ = true; 1316 + iter->second = QPDFXRefEntry(toI(f1), f2);
1408 break; 1317 break;
1409 1318
1410 default: 1319 default:
1411 - throw qpdf.damagedPDF(  
1412 - "xref stream", "unknown xref stream entry type " + std::to_string(f0)); 1320 + throw damagedPDF("xref stream", "unknown xref stream entry type " + std::to_string(f0));
1413 break; 1321 break;
1414 } 1322 }
1415 } 1323 }
1416 1324
1417 void 1325 void
1418 -QPDF::Xref_table::insert_free(QPDFObjGen og) 1326 +QPDF::insertFreeXrefEntry(QPDFObjGen og)
1419 { 1327 {
1420 - // At the moment we are processing the updates last to first and therefore the gen doesn't  
1421 - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be  
1422 - // revisited when we want to support incremental updates or more comprhensive checking.  
1423 - if (og.getObj() < 1) {  
1424 - return;  
1425 - }  
1426 - size_t id = static_cast<size_t>(og.getObj());  
1427 - if (id < table.size() && !type(id)) {  
1428 - table[id] = {1, {}}; 1328 + if (!m->xref_table.count(og)) {
  1329 + m->deleted_objects.insert(og.getObj());
1429 } 1330 }
1430 } 1331 }
1431 1332
1432 -QPDFObjGen  
1433 -QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept 1333 +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
  1334 +// beginning to end.
  1335 +void
  1336 +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
1434 { 1337 {
1435 - int id = 0;  
1436 - int gen = 0;  
1437 - qpdf_offset_t start = 0;  
1438 -  
1439 - int i = 0;  
1440 - for (auto const& item: table) {  
1441 - auto o = item.offset();  
1442 - if (start < o && o <= offset) {  
1443 - start = o;  
1444 - id = i;  
1445 - gen = item.gen();  
1446 - }  
1447 - ++i; 1338 + if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) {
  1339 + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
  1340 + return;
1448 } 1341 }
1449 - return QPDFObjGen(id, gen);  
1450 -}  
1451 1342
1452 -std::map<QPDFObjGen, QPDFXRefEntry>  
1453 -QPDF::Xref_table::as_map() const  
1454 -{  
1455 - std::map<QPDFObjGen, QPDFXRefEntry> result;  
1456 - int i{0};  
1457 - for (auto const& item: table) {  
1458 - switch (item.type()) {  
1459 - case 0:  
1460 - break;  
1461 - case 1:  
1462 - result.emplace(QPDFObjGen(i, item.gen()), item.offset());  
1463 - break;  
1464 - case 2:  
1465 - result.emplace(  
1466 - QPDFObjGen(i, 0), QPDFXRefEntry(item.stream_number(), item.stream_index()));  
1467 - break;  
1468 - default:  
1469 - throw std::logic_error("Xref_table: invalid entry type");  
1470 - }  
1471 - ++i; 1343 + QPDFObjGen og(obj, f2);
  1344 + if (!m->deleted_objects.count(obj)) {
  1345 + // deleted_objects stores the uncompressed objects removed from the xref table at the start
  1346 + // of recovery.
  1347 + QTC::TC("qpdf", "QPDF xref overwrite object");
  1348 + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
1472 } 1349 }
1473 - return result;  
1474 } 1350 }
1475 1351
1476 void 1352 void
1477 QPDF::showXRefTable() 1353 QPDF::showXRefTable()
1478 { 1354 {
1479 - m->xref_table.show();  
1480 -}  
1481 -  
1482 -void  
1483 -QPDF::Xref_table::show()  
1484 -{  
1485 - auto& cout = *qpdf.m->log->getInfo();  
1486 - int i = -1;  
1487 - for (auto const& item: table) {  
1488 - ++i;  
1489 - if (item.type()) {  
1490 - cout << std::to_string(i) << "/" << std::to_string(item.gen()) << ": ";  
1491 - switch (item.type()) {  
1492 - case 1:  
1493 - cout << "uncompressed; offset = " << item.offset() << "\n";  
1494 - break; 1355 + auto& cout = *m->log->getInfo();
  1356 + for (auto const& iter: m->xref_table) {
  1357 + QPDFObjGen const& og = iter.first;
  1358 + QPDFXRefEntry const& entry = iter.second;
  1359 + cout << og.unparse('/') << ": ";
  1360 + switch (entry.getType()) {
  1361 + case 1:
  1362 + cout << "uncompressed; offset = " << entry.getOffset();
  1363 + break;
1495 1364
1496 - case 2:  
1497 - cout << "compressed; stream = " << item.stream_number()  
1498 - << ", index = " << item.stream_index() << "\n";  
1499 - break; 1365 + case 2:
  1366 + *m->log->getInfo() << "compressed; stream = " << entry.getObjStreamNumber()
  1367 + << ", index = " << entry.getObjStreamIndex();
  1368 + break;
1500 1369
1501 - default:  
1502 - throw std::logic_error(  
1503 - "unknown cross-reference table type while showing xref_table");  
1504 - } 1370 + default:
  1371 + throw std::logic_error("unknown cross-reference table type while"
  1372 + " showing xref_table");
  1373 + break;
1505 } 1374 }
  1375 + m->log->info("\n");
1506 } 1376 }
1507 } 1377 }
1508 1378
1509 // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and 1379 // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
1510 // return false. Otherwise return true. 1380 // return false. Otherwise return true.
1511 bool 1381 bool
1512 -QPDF::Xref_table::resolve()  
1513 -{  
1514 - bool may_change = !reconstructed_;  
1515 - int i = -1;  
1516 - for (auto& item: table) {  
1517 - ++i;  
1518 - if (item.type()) {  
1519 - if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) {  
1520 - qpdf.resolve(QPDFObjGen(i, item.gen()));  
1521 - if (may_change && reconstructed_) {  
1522 - return false;  
1523 - } 1382 +QPDF::resolveXRefTable()
  1383 +{
  1384 + bool may_change = !m->reconstructed_xref;
  1385 + for (auto& iter: m->xref_table) {
  1386 + if (isUnresolved(iter.first)) {
  1387 + resolve(iter.first);
  1388 + if (may_change && m->reconstructed_xref) {
  1389 + return false;
1524 } 1390 }
1525 } 1391 }
1526 } 1392 }
@@ -1535,9 +1401,9 @@ QPDF::fixDanglingReferences(bool force) @@ -1535,9 +1401,9 @@ QPDF::fixDanglingReferences(bool force)
1535 if (m->fixed_dangling_refs) { 1401 if (m->fixed_dangling_refs) {
1536 return; 1402 return;
1537 } 1403 }
1538 - if (!m->xref_table.resolve()) { 1404 + if (!resolveXRefTable()) {
1539 QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction"); 1405 QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
1540 - m->xref_table.resolve(); 1406 + resolveXRefTable();
1541 } 1407 }
1542 m->fixed_dangling_refs = true; 1408 m->fixed_dangling_refs = true;
1543 } 1409 }
@@ -1584,21 +1450,21 @@ QPDF::setLastObjectDescription(std::string const&amp; description, QPDFObjGen const&amp; @@ -1584,21 +1450,21 @@ QPDF::setLastObjectDescription(std::string const&amp; description, QPDFObjGen const&amp;
1584 } 1450 }
1585 1451
1586 QPDFObjectHandle 1452 QPDFObjectHandle
1587 -QPDF::Xref_table::read_trailer() 1453 +QPDF::readTrailer()
1588 { 1454 {
1589 - qpdf_offset_t offset = file->tell(); 1455 + qpdf_offset_t offset = m->file->tell();
1590 bool empty = false; 1456 bool empty = false;
1591 auto object = 1457 auto object =
1592 - QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false); 1458 + QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false);
1593 if (empty) { 1459 if (empty) {
1594 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1460 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1595 // actual PDF files and Adobe Reader appears to ignore them. 1461 // actual PDF files and Adobe Reader appears to ignore them.
1596 - qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null"));  
1597 - } else if (object.isDictionary() && read_token().isWord("stream")) {  
1598 - qpdf.warn(qpdf.damagedPDF("trailer", file->tell(), "stream keyword found in trailer")); 1462 + warn(damagedPDF("trailer", "empty object treated as null"));
  1463 + } else if (object.isDictionary() && readToken(*m->file).isWord("stream")) {
  1464 + warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1599 } 1465 }
1600 // Override last_offset so that it points to the beginning of the object we just read 1466 // Override last_offset so that it points to the beginning of the object we just read
1601 - file->setLastOffset(offset); 1467 + m->file->setLastOffset(offset);
1602 return object; 1468 return object;
1603 } 1469 }
1604 1470
@@ -1666,7 +1532,7 @@ QPDF::readStream(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset_t offset) @@ -1666,7 +1532,7 @@ QPDF::readStream(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset_t offset)
1666 } catch (QPDFExc& e) { 1532 } catch (QPDFExc& e) {
1667 if (m->attempt_recovery) { 1533 if (m->attempt_recovery) {
1668 warn(e); 1534 warn(e);
1669 - length = recoverStreamLength(m->file_sp, og, stream_offset); 1535 + length = recoverStreamLength(m->file, og, stream_offset);
1670 } else { 1536 } else {
1671 throw; 1537 throw;
1672 } 1538 }
@@ -1773,9 +1639,21 @@ QPDF::recoverStreamLength( @@ -1773,9 +1639,21 @@ QPDF::recoverStreamLength(
1773 } 1639 }
1774 1640
1775 if (length) { 1641 if (length) {
  1642 + auto end = stream_offset + toO(length);
  1643 + qpdf_offset_t found_offset = 0;
  1644 + QPDFObjGen found_og;
  1645 +
1776 // Make sure this is inside this object 1646 // Make sure this is inside this object
1777 - auto found = m->xref_table.at_offset(stream_offset + toO(length));  
1778 - if (found == QPDFObjGen() || found == og) { 1647 + for (auto const& [current_og, entry]: m->xref_table) {
  1648 + if (entry.getType() == 1) {
  1649 + qpdf_offset_t obj_offset = entry.getOffset();
  1650 + if (found_offset < obj_offset && obj_offset < end) {
  1651 + found_offset = obj_offset;
  1652 + found_og = current_og;
  1653 + }
  1654 + }
  1655 + }
  1656 + if (!found_offset || found_og == og) {
1779 // If we are trying to recover an XRef stream the xref table will not contain and 1657 // If we are trying to recover an XRef stream the xref table will not contain and
1780 // won't contain any entries, therefore we cannot check the found length. Otherwise we 1658 // won't contain any entries, therefore we cannot check the found length. Otherwise we
1781 // found endstream\nendobj within the space allowed for this object, so we're probably 1659 // found endstream\nendobj within the space allowed for this object, so we're probably
@@ -1884,18 +1762,21 @@ QPDF::readObjectAtOffset( @@ -1884,18 +1762,21 @@ QPDF::readObjectAtOffset(
1884 } catch (QPDFExc& e) { 1762 } catch (QPDFExc& e) {
1885 if (try_recovery) { 1763 if (try_recovery) {
1886 // Try again after reconstructing xref table 1764 // Try again after reconstructing xref table
1887 - m->xref_table.reconstruct(e);  
1888 - if (m->xref_table.type(exp_og) == 1) { 1765 + reconstruct_xref(e);
  1766 + if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {
  1767 + qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
  1768 + QPDFObjectHandle result =
  1769 + readObjectAtOffset(false, new_offset, description, exp_og, og, false);
1889 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); 1770 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1890 - return readObjectAtOffset(  
1891 - false, m->xref_table.offset(exp_og), description, exp_og, og, false); 1771 + return result;
1892 } else { 1772 } else {
1893 QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); 1773 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1894 warn(damagedPDF( 1774 warn(damagedPDF(
1895 "", 1775 "",
1896 0, 1776 0,
1897 ("object " + exp_og.unparse(' ') + 1777 ("object " + exp_og.unparse(' ') +
1898 - " not found in file after regenerating cross reference table"))); 1778 + " not found in file after regenerating cross reference "
  1779 + "table")));
1899 return QPDFObjectHandle::newNull(); 1780 return QPDFObjectHandle::newNull();
1900 } 1781 }
1901 } else { 1782 } else {
@@ -1928,7 +1809,7 @@ QPDF::readObjectAtOffset( @@ -1928,7 +1809,7 @@ QPDF::readObjectAtOffset(
1928 } 1809 }
1929 } 1810 }
1930 qpdf_offset_t end_after_space = m->file->tell(); 1811 qpdf_offset_t end_after_space = m->file->tell();
1931 - if (skip_cache_if_in_xref && m->xref_table.type(og)) { 1812 + if (skip_cache_if_in_xref && m->xref_table.count(og)) {
1932 // Ordinarily, an object gets read here when resolved through xref table or stream. In 1813 // Ordinarily, an object gets read here when resolved through xref table or stream. In
1933 // the special case of the xref stream and linearization hint tables, the offset comes 1814 // the special case of the xref stream and linearization hint tables, the offset comes
1934 // from another source. For the specific case of xref streams, the xref stream is read 1815 // from another source. For the specific case of xref streams, the xref stream is read
@@ -1956,9 +1837,7 @@ QPDF::readObjectAtOffset( @@ -1956,9 +1837,7 @@ QPDF::readObjectAtOffset(
1956 // could use !check_og in place of skip_cache_if_in_xref. 1837 // could use !check_og in place of skip_cache_if_in_xref.
1957 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); 1838 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1958 } else { 1839 } else {
1959 - m->xref_table.linearization_offsets(  
1960 - toS(og.getObj()), end_before_space, end_after_space);  
1961 - updateCache(og, oh.getObj()); 1840 + updateCache(og, oh.getObj(), end_before_space, end_after_space);
1962 } 1841 }
1963 } 1842 }
1964 1843
@@ -1977,43 +1856,44 @@ QPDF::resolve(QPDFObjGen og) @@ -1977,43 +1856,44 @@ QPDF::resolve(QPDFObjGen og)
1977 // has to be resolved during object parsing, such as stream length. 1856 // has to be resolved during object parsing, such as stream length.
1978 QTC::TC("qpdf", "QPDF recursion loop in resolve"); 1857 QTC::TC("qpdf", "QPDF recursion loop in resolve");
1979 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); 1858 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1980 - updateCache(og, QPDF_Null::create()); 1859 + updateCache(og, QPDF_Null::create(), -1, -1);
1981 return m->obj_cache[og].object.get(); 1860 return m->obj_cache[og].object.get();
1982 } 1861 }
1983 ResolveRecorder rr(this, og); 1862 ResolveRecorder rr(this, og);
1984 1863
1985 - try {  
1986 - switch (m->xref_table.type(og)) {  
1987 - case 0:  
1988 - break;  
1989 - case 1:  
1990 - {  
1991 - // Object stored in cache by readObjectAtOffset  
1992 - QPDFObjGen a_og;  
1993 - QPDFObjectHandle oh =  
1994 - readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);  
1995 - }  
1996 - break; 1864 + if (m->xref_table.count(og) != 0) {
  1865 + QPDFXRefEntry const& entry = m->xref_table[og];
  1866 + try {
  1867 + switch (entry.getType()) {
  1868 + case 1:
  1869 + {
  1870 + qpdf_offset_t offset = entry.getOffset();
  1871 + // Object stored in cache by readObjectAtOffset
  1872 + QPDFObjGen a_og;
  1873 + QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);
  1874 + }
  1875 + break;
1997 1876
1998 - case 2:  
1999 - resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));  
2000 - break; 1877 + case 2:
  1878 + resolveObjectsInStream(entry.getObjStreamNumber());
  1879 + break;
2001 1880
2002 - default:  
2003 - throw damagedPDF(  
2004 - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type")); 1881 + default:
  1882 + throw damagedPDF(
  1883 + "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
  1884 + }
  1885 + } catch (QPDFExc& e) {
  1886 + warn(e);
  1887 + } catch (std::exception& e) {
  1888 + warn(damagedPDF(
  1889 + "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
2005 } 1890 }
2006 - } catch (QPDFExc& e) {  
2007 - warn(e);  
2008 - } catch (std::exception& e) {  
2009 - warn(damagedPDF(  
2010 - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));  
2011 } 1891 }
2012 1892
2013 if (isUnresolved(og)) { 1893 if (isUnresolved(og)) {
2014 // PDF spec says unknown objects resolve to the null object. 1894 // PDF spec says unknown objects resolve to the null object.
2015 QTC::TC("qpdf", "QPDF resolve failure to null"); 1895 QTC::TC("qpdf", "QPDF resolve failure to null");
2016 - updateCache(og, QPDF_Null::create()); 1896 + updateCache(og, QPDF_Null::create(), -1, -1);
2017 } 1897 }
2018 1898
2019 auto result(m->obj_cache[og].object); 1899 auto result(m->obj_cache[og].object);
@@ -2035,6 +1915,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2035,6 +1915,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2035 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); 1915 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
2036 } 1916 }
2037 1917
  1918 + // For linearization data in the object, use the data from the object stream for the objects in
  1919 + // the stream.
  1920 + QPDFObjGen stream_og(obj_stream_number, 0);
  1921 + qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
  1922 + qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
  1923 +
2038 QPDFObjectHandle dict = obj_stream.getDict(); 1924 QPDFObjectHandle dict = obj_stream.getDict();
2039 if (!dict.isDictionaryOfType("/ObjStm")) { 1925 if (!dict.isDictionaryOfType("/ObjStm")) {
2040 QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); 1926 QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
@@ -2059,7 +1945,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2059,7 +1945,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2059 (m->file->getName() + " object stream " + std::to_string(obj_stream_number)), 1945 (m->file->getName() + " object stream " + std::to_string(obj_stream_number)),
2060 bp.get())); 1946 bp.get()));
2061 1947
2062 - qpdf_offset_t last_offset = -1;  
2063 for (int i = 0; i < n; ++i) { 1948 for (int i = 0; i < n; ++i) {
2064 QPDFTokenizer::Token tnum = readToken(*input); 1949 QPDFTokenizer::Token tnum = readToken(*input);
2065 QPDFTokenizer::Token toffset = readToken(*input); 1950 QPDFTokenizer::Token toffset = readToken(*input);
@@ -2073,7 +1958,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2073,7 +1958,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2073 1958
2074 int num = QUtil::string_to_int(tnum.getValue().c_str()); 1959 int num = QUtil::string_to_int(tnum.getValue().c_str());
2075 long long offset = QUtil::string_to_int(toffset.getValue().c_str()); 1960 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
2076 - if (num > m->xref_table.max_id()) { 1961 + if (num > m->xref_table_max_id) {
2077 continue; 1962 continue;
2078 } 1963 }
2079 if (num == obj_stream_number) { 1964 if (num == obj_stream_number) {
@@ -2085,15 +1970,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2085,15 +1970,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2085 "object stream claims to contain itself")); 1970 "object stream claims to contain itself"));
2086 continue; 1971 continue;
2087 } 1972 }
2088 - if (offset <= last_offset) {  
2089 - throw damagedPDF(  
2090 - *input,  
2091 - m->last_object_description,  
2092 - input->getLastOffset(),  
2093 - "expected offsets in object stream to be increasing");  
2094 - }  
2095 - last_offset = offset;  
2096 -  
2097 offsets[num] = toI(offset + first); 1973 offsets[num] = toI(offset + first);
2098 } 1974 }
2099 1975
@@ -2105,12 +1981,13 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2105,12 +1981,13 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2105 m->last_object_description += "object "; 1981 m->last_object_description += "object ";
2106 for (auto const& iter: offsets) { 1982 for (auto const& iter: offsets) {
2107 QPDFObjGen og(iter.first, 0); 1983 QPDFObjGen og(iter.first, 0);
2108 - if (m->xref_table.type(og) == 2 &&  
2109 - m->xref_table.stream_number(og.getObj()) == obj_stream_number) { 1984 + auto entry = m->xref_table.find(og);
  1985 + if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
  1986 + entry->second.getObjStreamNumber() == obj_stream_number) {
2110 int offset = iter.second; 1987 int offset = iter.second;
2111 input->seek(offset, SEEK_SET); 1988 input->seek(offset, SEEK_SET);
2112 QPDFObjectHandle oh = readObjectInStream(input, iter.first); 1989 QPDFObjectHandle oh = readObjectInStream(input, iter.first);
2113 - updateCache(og, oh.getObj()); 1990 + updateCache(og, oh.getObj(), end_before_space, end_after_space);
2114 } else { 1991 } else {
2115 QTC::TC("qpdf", "QPDF not caching overridden objstm object"); 1992 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
2116 } 1993 }
@@ -2125,14 +2002,20 @@ QPDF::newIndirect(QPDFObjGen const&amp; og, std::shared_ptr&lt;QPDFObject&gt; const&amp; obj) @@ -2125,14 +2002,20 @@ QPDF::newIndirect(QPDFObjGen const&amp; og, std::shared_ptr&lt;QPDFObject&gt; const&amp; obj)
2125 } 2002 }
2126 2003
2127 void 2004 void
2128 -QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object) 2005 +QPDF::updateCache(
  2006 + QPDFObjGen const& og,
  2007 + std::shared_ptr<QPDFObject> const& object,
  2008 + qpdf_offset_t end_before_space,
  2009 + qpdf_offset_t end_after_space)
2129 { 2010 {
2130 object->setObjGen(this, og); 2011 object->setObjGen(this, og);
2131 if (isCached(og)) { 2012 if (isCached(og)) {
2132 auto& cache = m->obj_cache[og]; 2013 auto& cache = m->obj_cache[og];
2133 cache.object->assign(object); 2014 cache.object->assign(object);
  2015 + cache.end_before_space = end_before_space;
  2016 + cache.end_after_space = end_after_space;
2134 } else { 2017 } else {
2135 - m->obj_cache[og] = ObjCache(object); 2018 + m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space);
2136 } 2019 }
2137 } 2020 }
2138 2021
@@ -2162,7 +2045,7 @@ QPDFObjectHandle @@ -2162,7 +2045,7 @@ QPDFObjectHandle
2162 QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj) 2045 QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
2163 { 2046 {
2164 QPDFObjGen next{nextObjGen()}; 2047 QPDFObjGen next{nextObjGen()};
2165 - m->obj_cache[next] = ObjCache(obj); 2048 + m->obj_cache[next] = ObjCache(obj, -1, -1);
2166 return newIndirect(next, m->obj_cache[next].object); 2049 return newIndirect(next, m->obj_cache[next].object);
2167 } 2050 }
2168 2051
@@ -2218,7 +2101,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) @@ -2218,7 +2101,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2218 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { 2101 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2219 return iter->second.object; 2102 return iter->second.object;
2220 } 2103 }
2221 - if (m->xref_table.type(og) || !m->xref_table.initialized()) { 2104 + if (m->xref_table.count(og) || !m->parsed) {
2222 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; 2105 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2223 } 2106 }
2224 if (parse_pdf) { 2107 if (parse_pdf) {
@@ -2234,9 +2117,8 @@ QPDF::getObjectForJSON(int id, int gen) @@ -2234,9 +2117,8 @@ QPDF::getObjectForJSON(int id, int gen)
2234 auto [it, inserted] = m->obj_cache.try_emplace(og); 2117 auto [it, inserted] = m->obj_cache.try_emplace(og);
2235 auto& obj = it->second.object; 2118 auto& obj = it->second.object;
2236 if (inserted) { 2119 if (inserted) {
2237 - obj = (m->xref_table.initialized() && !m->xref_table.type(og))  
2238 - ? QPDF_Null::create(this, og)  
2239 - : QPDF_Unresolved::create(this, og); 2120 + obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og)
  2121 + : QPDF_Unresolved::create(this, og);
2240 } 2122 }
2241 return obj; 2123 return obj;
2242 } 2124 }
@@ -2246,10 +2128,10 @@ QPDF::getObject(QPDFObjGen const&amp; og) @@ -2246,10 +2128,10 @@ QPDF::getObject(QPDFObjGen const&amp; og)
2246 { 2128 {
2247 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { 2129 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2248 return {it->second.object}; 2130 return {it->second.object};
2249 - } else if (m->xref_table.initialized() && !m->xref_table.type(og)) { 2131 + } else if (m->parsed && !m->xref_table.count(og)) {
2250 return QPDF_Null::create(); 2132 return QPDF_Null::create();
2251 } else { 2133 } else {
2252 - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og)); 2134 + auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
2253 return {result.first->second.object}; 2135 return {result.first->second.object};
2254 } 2136 }
2255 } 2137 }
@@ -2285,12 +2167,13 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh) @@ -2285,12 +2167,13 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
2285 QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); 2167 QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
2286 throw std::logic_error("QPDF::replaceObject called with indirect object handle"); 2168 throw std::logic_error("QPDF::replaceObject called with indirect object handle");
2287 } 2169 }
2288 - updateCache(og, oh.getObj()); 2170 + updateCache(og, oh.getObj(), -1, -1);
2289 } 2171 }
2290 2172
2291 void 2173 void
2292 QPDF::removeObject(QPDFObjGen og) 2174 QPDF::removeObject(QPDFObjGen og)
2293 { 2175 {
  2176 + m->xref_table.erase(og);
2294 if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) { 2177 if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
2295 // Take care of any object handles that may be floating around. 2178 // Take care of any object handles that may be floating around.
2296 cached->second.object->assign(QPDF_Null::create()); 2179 cached->second.object->assign(QPDF_Null::create());
@@ -2559,7 +2442,7 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) @@ -2559,7 +2442,7 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2559 } else { 2442 } else {
2560 auto foreign_stream_data = std::make_shared<ForeignStreamData>( 2443 auto foreign_stream_data = std::make_shared<ForeignStreamData>(
2561 foreign_stream_qpdf.m->encp, 2444 foreign_stream_qpdf.m->encp,
2562 - foreign_stream_qpdf.m->file_sp, 2445 + foreign_stream_qpdf.m->file,
2563 foreign.getObjGen(), 2446 foreign.getObjGen(),
2564 stream->getParsedOffset(), 2447 stream->getParsedOffset(),
2565 stream->getLength(), 2448 stream->getLength(),
@@ -2643,13 +2526,13 @@ QPDF::getExtensionLevel() @@ -2643,13 +2526,13 @@ QPDF::getExtensionLevel()
2643 QPDFObjectHandle 2526 QPDFObjectHandle
2644 QPDF::getTrailer() 2527 QPDF::getTrailer()
2645 { 2528 {
2646 - return m->xref_table.trailer(); 2529 + return m->trailer;
2647 } 2530 }
2648 2531
2649 QPDFObjectHandle 2532 QPDFObjectHandle
2650 QPDF::getRoot() 2533 QPDF::getRoot()
2651 { 2534 {
2652 - QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root"); 2535 + QPDFObjectHandle root = m->trailer.getKey("/Root");
2653 if (!root.isDictionary()) { 2536 if (!root.isDictionary()) {
2654 throw damagedPDF("", 0, "unable to find /Root dictionary"); 2537 throw damagedPDF("", 0, "unable to find /Root dictionary");
2655 } else if ( 2538 } else if (
@@ -2665,10 +2548,17 @@ QPDF::getRoot() @@ -2665,10 +2548,17 @@ QPDF::getRoot()
2665 std::map<QPDFObjGen, QPDFXRefEntry> 2548 std::map<QPDFObjGen, QPDFXRefEntry>
2666 QPDF::getXRefTable() 2549 QPDF::getXRefTable()
2667 { 2550 {
2668 - if (!m->xref_table.initialized()) { 2551 + return getXRefTableInternal();
  2552 +}
  2553 +
  2554 +std::map<QPDFObjGen, QPDFXRefEntry> const&
  2555 +QPDF::getXRefTableInternal()
  2556 +{
  2557 + if (!m->parsed) {
2669 throw std::logic_error("QPDF::getXRefTable called before parsing."); 2558 throw std::logic_error("QPDF::getXRefTable called before parsing.");
2670 } 2559 }
2671 - return m->xref_table.as_map(); 2560 +
  2561 + return m->xref_table;
2672 } 2562 }
2673 2563
2674 size_t 2564 size_t
@@ -2676,10 +2566,7 @@ QPDF::tableSize() @@ -2676,10 +2566,7 @@ QPDF::tableSize()
2676 { 2566 {
2677 // If obj_cache is dense, accommodate all object in tables,else accommodate only original 2567 // If obj_cache is dense, accommodate all object in tables,else accommodate only original
2678 // objects. 2568 // objects.
2679 - auto max_xref = toI(m->xref_table.size());  
2680 - if (max_xref > 0) {  
2681 - --max_xref;  
2682 - } 2569 + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
2683 auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0; 2570 auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
2684 auto max_id = std::numeric_limits<int>::max() - 1; 2571 auto max_id = std::numeric_limits<int>::max() - 1;
2685 if (max_obj >= max_id || max_xref >= max_id) { 2572 if (max_obj >= max_id || max_xref >= max_id) {
@@ -2717,14 +2604,14 @@ QPDF::getCompressibleObjGens() @@ -2717,14 +2604,14 @@ QPDF::getCompressibleObjGens()
2717 // iterating through the xref table since it avoids preserving orphaned items. 2604 // iterating through the xref table since it avoids preserving orphaned items.
2718 2605
2719 // Exclude encryption dictionary, if any 2606 // Exclude encryption dictionary, if any
2720 - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); 2607 + QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
2721 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); 2608 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
2722 2609
2723 const size_t max_obj = getObjectCount(); 2610 const size_t max_obj = getObjectCount();
2724 std::vector<bool> visited(max_obj, false); 2611 std::vector<bool> visited(max_obj, false);
2725 std::vector<QPDFObjectHandle> queue; 2612 std::vector<QPDFObjectHandle> queue;
2726 queue.reserve(512); 2613 queue.reserve(512);
2727 - queue.push_back(m->xref_table.trailer()); 2614 + queue.push_back(m->trailer);
2728 std::vector<T> result; 2615 std::vector<T> result;
2729 if constexpr (std::is_same_v<T, QPDFObjGen>) { 2616 if constexpr (std::is_same_v<T, QPDFObjGen>) {
2730 result.reserve(m->obj_cache.size()); 2617 result.reserve(m->obj_cache.size());
@@ -2879,7 +2766,7 @@ QPDF::pipeStreamData( @@ -2879,7 +2766,7 @@ QPDF::pipeStreamData(
2879 { 2766 {
2880 return pipeStreamData( 2767 return pipeStreamData(
2881 m->encp, 2768 m->encp,
2882 - m->file_sp, 2769 + m->file,
2883 *this, 2770 *this,
2884 og, 2771 og,
2885 offset, 2772 offset,
libqpdf/QPDFJob.cc
@@ -13,6 +13,7 @@ @@ -13,6 +13,7 @@
13 #include <qpdf/Pl_StdioFile.hh> 13 #include <qpdf/Pl_StdioFile.hh>
14 #include <qpdf/Pl_String.hh> 14 #include <qpdf/Pl_String.hh>
15 #include <qpdf/QIntC.hh> 15 #include <qpdf/QIntC.hh>
  16 +#include <qpdf/QPDF.hh>
16 #include <qpdf/QPDFAcroFormDocumentHelper.hh> 17 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
17 #include <qpdf/QPDFCryptoProvider.hh> 18 #include <qpdf/QPDFCryptoProvider.hh>
18 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh> 19 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
@@ -25,7 +26,6 @@ @@ -25,7 +26,6 @@
25 #include <qpdf/QPDFSystemError.hh> 26 #include <qpdf/QPDFSystemError.hh>
26 #include <qpdf/QPDFUsage.hh> 27 #include <qpdf/QPDFUsage.hh>
27 #include <qpdf/QPDFWriter.hh> 28 #include <qpdf/QPDFWriter.hh>
28 -#include <qpdf/QPDF_private.hh>  
29 #include <qpdf/QTC.hh> 29 #include <qpdf/QTC.hh>
30 #include <qpdf/QUtil.hh> 30 #include <qpdf/QUtil.hh>
31 31
libqpdf/QPDFWriter.cc
@@ -14,10 +14,10 @@ @@ -14,10 +14,10 @@
14 #include <qpdf/Pl_RC4.hh> 14 #include <qpdf/Pl_RC4.hh>
15 #include <qpdf/Pl_StdioFile.hh> 15 #include <qpdf/Pl_StdioFile.hh>
16 #include <qpdf/QIntC.hh> 16 #include <qpdf/QIntC.hh>
  17 +#include <qpdf/QPDF.hh>
17 #include <qpdf/QPDFObjectHandle.hh> 18 #include <qpdf/QPDFObjectHandle.hh>
18 #include <qpdf/QPDF_Name.hh> 19 #include <qpdf/QPDF_Name.hh>
19 #include <qpdf/QPDF_String.hh> 20 #include <qpdf/QPDF_String.hh>
20 -#include <qpdf/QPDF_private.hh>  
21 #include <qpdf/QTC.hh> 21 #include <qpdf/QTC.hh>
22 #include <qpdf/QUtil.hh> 22 #include <qpdf/QUtil.hh>
23 #include <qpdf/RC4.hh> 23 #include <qpdf/RC4.hh>
@@ -1698,6 +1698,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1698,6 +1698,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1698 if (obj_to_write.isStream()) { 1698 if (obj_to_write.isStream()) {
1699 // This condition occurred in a fuzz input. Ideally we should block it at parse 1699 // This condition occurred in a fuzz input. Ideally we should block it at parse
1700 // time, but it's not clear to me how to construct a case for this. 1700 // time, but it's not clear to me how to construct a case for this.
  1701 + QTC::TC("qpdf", "QPDFWriter stream in ostream");
1701 obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); 1702 obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1702 obj_to_write = QPDFObjectHandle::newNull(); 1703 obj_to_write = QPDFObjectHandle::newNull();
1703 } 1704 }
@@ -1936,26 +1937,47 @@ void @@ -1936,26 +1937,47 @@ void
1936 QPDFWriter::preserveObjectStreams() 1937 QPDFWriter::preserveObjectStreams()
1937 { 1938 {
1938 auto const& xref = QPDF::Writer::getXRefTable(m->pdf); 1939 auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1939 - m->obj.streams_empty = !xref.object_streams();  
1940 - if (m->obj.streams_empty) {  
1941 - return;  
1942 - }  
1943 - // This code filters out objects that are not allowed to be in object streams. In addition to  
1944 - // removing objects that were erroneously included in object streams in the source PDF, it also  
1945 - // prevents unreferenced objects from being included. 1940 + // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
  1941 + // streams out of old objects that have generation numbers greater than zero. However in an
  1942 + // existing PDF, all object stream objects and all objects in them must have generation 0
  1943 + // because the PDF spec does not provide any way to do otherwise. This code filters out objects
  1944 + // that are not allowed to be in object streams. In addition to removing objects that were
  1945 + // erroneously included in object streams in the source PDF, it also prevents unreferenced
  1946 + // objects from being included.
  1947 + auto end = xref.cend();
  1948 + m->obj.streams_empty = true;
1946 if (m->preserve_unreferenced_objects) { 1949 if (m->preserve_unreferenced_objects) {
1947 - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");  
1948 - for (auto [id, stream]: xref.compressed_objects()) {  
1949 - m->obj[id].object_stream = stream; 1950 + for (auto iter = xref.cbegin(); iter != end; ++iter) {
  1951 + if (iter->second.getType() == 2) {
  1952 + // Pdf contains object streams.
  1953 + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
  1954 + m->obj.streams_empty = false;
  1955 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1956 + }
1950 } 1957 }
1951 } else { 1958 } else {
1952 - QTC::TC("qpdf", "QPDFWriter preserve object streams");  
1953 - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);  
1954 - for (auto [id, stream]: xref.compressed_objects()) {  
1955 - if (eligible[id]) {  
1956 - m->obj[id].object_stream = stream;  
1957 - } else {  
1958 - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); 1959 + // Start by scanning for first compressed object in case we don't have any object streams to
  1960 + // process.
  1961 + for (auto iter = xref.cbegin(); iter != end; ++iter) {
  1962 + if (iter->second.getType() == 2) {
  1963 + // Pdf contains object streams.
  1964 + QTC::TC("qpdf", "QPDFWriter preserve object streams");
  1965 + m->obj.streams_empty = false;
  1966 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  1967 + // The object pointed to by iter may be a previous generation, in which case it is
  1968 + // removed by getCompressibleObjSet. We need to restart the loop (while the object
  1969 + // table may contain multiple generations of an object).
  1970 + for (iter = xref.cbegin(); iter != end; ++iter) {
  1971 + if (iter->second.getType() == 2) {
  1972 + auto id = static_cast<size_t>(iter->first.getObj());
  1973 + if (id < eligible.size() && eligible[id]) {
  1974 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1975 + } else {
  1976 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  1977 + }
  1978 + }
  1979 + }
  1980 + return;
1959 } 1981 }
1960 } 1982 }
1961 } 1983 }
libqpdf/QPDF_Stream.cc
@@ -10,8 +10,8 @@ @@ -10,8 +10,8 @@
10 #include <qpdf/Pl_Flate.hh> 10 #include <qpdf/Pl_Flate.hh>
11 #include <qpdf/Pl_QPDFTokenizer.hh> 11 #include <qpdf/Pl_QPDFTokenizer.hh>
12 #include <qpdf/QIntC.hh> 12 #include <qpdf/QIntC.hh>
  13 +#include <qpdf/QPDF.hh>
13 #include <qpdf/QPDFExc.hh> 14 #include <qpdf/QPDFExc.hh>
14 -#include <qpdf/QPDF_private.hh>  
15 #include <qpdf/QTC.hh> 15 #include <qpdf/QTC.hh>
16 #include <qpdf/QUtil.hh> 16 #include <qpdf/QUtil.hh>
17 #include <qpdf/SF_ASCII85Decode.hh> 17 #include <qpdf/SF_ASCII85Decode.hh>
libqpdf/QPDF_encryption.cc
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 3
4 #include <qpdf/assert_debug.h> 4 #include <qpdf/assert_debug.h>
5 5
6 -#include <qpdf/QPDF_private.hh> 6 +#include <qpdf/QPDF.hh>
7 7
8 #include <qpdf/QPDFExc.hh> 8 #include <qpdf/QPDFExc.hh>
9 9
@@ -727,7 +727,7 @@ QPDF::initializeEncryption() @@ -727,7 +727,7 @@ QPDF::initializeEncryption()
727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption 727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
728 // dictionary. 728 // dictionary.
729 729
730 - if (!m->xref_table.trailer().hasKey("/Encrypt")) { 730 + if (!m->trailer.hasKey("/Encrypt")) {
731 return; 731 return;
732 } 732 }
733 733
@@ -736,7 +736,7 @@ QPDF::initializeEncryption() @@ -736,7 +736,7 @@ QPDF::initializeEncryption()
736 m->encp->encrypted = true; 736 m->encp->encrypted = true;
737 737
738 std::string id1; 738 std::string id1;
739 - QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID"); 739 + QPDFObjectHandle id_obj = m->trailer.getKey("/ID");
740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { 740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
741 id1 = id_obj.getArrayItem(0).getStringValue(); 741 id1 = id_obj.getArrayItem(0).getStringValue();
742 } else { 742 } else {
@@ -745,7 +745,7 @@ QPDF::initializeEncryption() @@ -745,7 +745,7 @@ QPDF::initializeEncryption()
745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); 745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
746 } 746 }
747 747
748 - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); 748 + QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
749 if (!encryption_dict.isDictionary()) { 749 if (!encryption_dict.isDictionary()) {
750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); 750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
751 } 751 }
libqpdf/QPDF_json.cc
@@ -51,6 +51,17 @@ @@ -51,6 +51,17 @@
51 // ] | <- st_top 51 // ] | <- st_top
52 // } | 52 // } |
53 53
  54 +static char const* JSON_PDF = (
  55 + // force line break
  56 + "%PDF-1.3\n"
  57 + "xref\n"
  58 + "0 1\n"
  59 + "0000000000 65535 f \n"
  60 + "trailer << /Size 1 >>\n"
  61 + "startxref\n"
  62 + "9\n"
  63 + "%%EOF\n");
  64 +
54 // Validator methods -- these are much more performant than std::regex. 65 // Validator methods -- these are much more performant than std::regex.
55 static bool 66 static bool
56 is_indirect_object(std::string const& v, int& obj, int& gen) 67 is_indirect_object(std::string const& v, int& obj, int& gen)
@@ -256,10 +267,10 @@ class QPDF::JSONReactor: public JSON::Reactor @@ -256,10 +267,10 @@ class QPDF::JSONReactor: public JSON::Reactor
256 struct StackFrame 267 struct StackFrame
257 { 268 {
258 StackFrame(state_e state) : 269 StackFrame(state_e state) :
259 - state(state){}; 270 + state(state) {};
260 StackFrame(state_e state, QPDFObjectHandle&& object) : 271 StackFrame(state_e state, QPDFObjectHandle&& object) :
261 state(state), 272 state(state),
262 - object(object){}; 273 + object(object) {};
263 state_e state; 274 state_e state;
264 QPDFObjectHandle object; 275 QPDFObjectHandle object;
265 }; 276 };
@@ -582,7 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -582,7 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
582 this->saw_value = true; 593 this->saw_value = true;
583 // The trailer must be a dictionary, so we can use setNextStateIfDictionary. 594 // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
584 if (setNextStateIfDictionary("trailer.value", value, st_object)) { 595 if (setNextStateIfDictionary("trailer.value", value, st_object)) {
585 - pdf.m->xref_table.trailer(makeObject(value)); 596 + this->pdf.m->trailer = makeObject(value);
  597 + setObjectDescription(this->pdf.m->trailer, value);
586 } 598 }
587 } else if (key == "stream") { 599 } else if (key == "stream") {
588 // Don't need to set saw_stream here since there's already an error. 600 // Don't need to set saw_stream here since there's already an error.
@@ -774,9 +786,7 @@ QPDF::createFromJSON(std::string const&amp; json_file) @@ -774,9 +786,7 @@ QPDF::createFromJSON(std::string const&amp; json_file)
774 void 786 void
775 QPDF::createFromJSON(std::shared_ptr<InputSource> is) 787 QPDF::createFromJSON(std::shared_ptr<InputSource> is)
776 { 788 {
777 - m->pdf_version = "1.3";  
778 - m->no_input_name = is->getName();  
779 - m->xref_table.initialize_json(); 789 + processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
780 importJSON(is, true); 790 importJSON(is, true);
781 } 791 }
782 792
libqpdf/QPDF_linearization.cc
1 // See doc/linearization. 1 // See doc/linearization.
2 2
3 -#include <qpdf/QPDF_private.hh> 3 +#include <qpdf/QPDF.hh>
4 4
5 #include <qpdf/BitStream.hh> 5 #include <qpdf/BitStream.hh>
6 #include <qpdf/BitWriter.hh> 6 #include <qpdf/BitWriter.hh>
@@ -288,8 +288,9 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length) @@ -288,8 +288,9 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
288 QPDFObjGen og; 288 QPDFObjGen og;
289 QPDFObjectHandle H = 289 QPDFObjectHandle H =
290 readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); 290 readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
291 - qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);  
292 - qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); 291 + ObjCache& oc = m->obj_cache[og];
  292 + qpdf_offset_t min_end_offset = oc.end_before_space;
  293 + qpdf_offset_t max_end_offset = oc.end_after_space;
293 if (!H.isStream()) { 294 if (!H.isStream()) {
294 throw damagedPDF("linearization dictionary", "hint table is not a stream"); 295 throw damagedPDF("linearization dictionary", "hint table is not a stream");
295 } 296 }
@@ -300,11 +301,14 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length) @@ -300,11 +301,14 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
300 // increasing length to cover it, even though the specification says all objects in the 301 // increasing length to cover it, even though the specification says all objects in the
301 // linearization parameter dictionary must be direct. We have to get the file position of the 302 // linearization parameter dictionary must be direct. We have to get the file position of the
302 // end of length in this case. 303 // end of length in this case.
303 - auto length_og = Hdict.getKey("/Length").getObjGen();  
304 - if (length_og.isIndirect()) { 304 + QPDFObjectHandle length_obj = Hdict.getKey("/Length");
  305 + if (length_obj.isIndirect()) {
305 QTC::TC("qpdf", "QPDF hint table length indirect"); 306 QTC::TC("qpdf", "QPDF hint table length indirect");
306 - min_end_offset = m->xref_table.end_before_space(length_og);  
307 - max_end_offset = m->xref_table.end_after_space(length_og); 307 + // Force resolution
  308 + (void)length_obj.getIntValue();
  309 + ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()];
  310 + min_end_offset = oc2.end_before_space;
  311 + max_end_offset = oc2.end_after_space;
308 } else { 312 } else {
309 QTC::TC("qpdf", "QPDF hint table length direct"); 313 QTC::TC("qpdf", "QPDF hint table length direct");
310 } 314 }
@@ -441,7 +445,7 @@ QPDF::checkLinearizationInternal() @@ -441,7 +445,7 @@ QPDF::checkLinearizationInternal()
441 for (size_t i = 0; i < toS(npages); ++i) { 445 for (size_t i = 0; i < toS(npages); ++i) {
442 QPDFObjectHandle const& page = pages.at(i); 446 QPDFObjectHandle const& page = pages.at(i);
443 QPDFObjGen og(page.getObjGen()); 447 QPDFObjGen og(page.getObjGen());
444 - if (m->xref_table.type(og) == 2) { 448 + if (m->xref_table[og].getType() == 2) {
445 linearizationWarning( 449 linearizationWarning(
446 "page dictionary for page " + std::to_string(i) + " is compressed"); 450 "page dictionary for page " + std::to_string(i) + " is compressed");
447 } 451 }
@@ -457,11 +461,12 @@ QPDF::checkLinearizationInternal() @@ -457,11 +461,12 @@ QPDF::checkLinearizationInternal()
457 break; 461 break;
458 } 462 }
459 } 463 }
460 - if (m->file->tell() != m->xref_table.first_item_offset()) { 464 + if (m->file->tell() != m->first_xref_item_offset) {
461 QTC::TC("qpdf", "QPDF err /T mismatch"); 465 QTC::TC("qpdf", "QPDF err /T mismatch");
462 linearizationWarning( 466 linearizationWarning(
463 - "space before first xref item (/T) mismatch (computed = " +  
464 - std::to_string(m->xref_table.first_item_offset()) + 467 + "space before first xref item (/T) mismatch "
  468 + "(computed = " +
  469 + std::to_string(m->first_xref_item_offset) +
465 "; file = " + std::to_string(m->file->tell())); 470 "; file = " + std::to_string(m->file->tell()));
466 } 471 }
467 472
@@ -472,7 +477,7 @@ QPDF::checkLinearizationInternal() @@ -472,7 +477,7 @@ QPDF::checkLinearizationInternal()
472 // compressed objects are supposed to be at the end of the containing xref section if any object 477 // compressed objects are supposed to be at the end of the containing xref section if any object
473 // streams are in use. 478 // streams are in use.
474 479
475 - if (m->xref_table.uncompressed_after_compressed()) { 480 + if (m->uncompressed_after_compressed) {
476 linearizationWarning("linearized file contains an uncompressed object after a compressed " 481 linearizationWarning("linearized file contains an uncompressed object after a compressed "
477 "one in a cross-reference stream"); 482 "one in a cross-reference stream");
478 } 483 }
@@ -480,9 +485,18 @@ QPDF::checkLinearizationInternal() @@ -480,9 +485,18 @@ QPDF::checkLinearizationInternal()
480 // Further checking requires optimization and order calculation. Don't allow optimization to 485 // Further checking requires optimization and order calculation. Don't allow optimization to
481 // make changes. If it has to, then the file is not properly linearized. We use the xref table 486 // make changes. If it has to, then the file is not properly linearized. We use the xref table
482 // to figure out which objects are compressed and which are uncompressed. 487 // to figure out which objects are compressed and which are uncompressed.
483 -  
484 - optimize(m->xref_table);  
485 - calculateLinearizationData(m->xref_table); 488 + { // local scope
  489 + std::map<int, int> object_stream_data;
  490 + for (auto const& iter: m->xref_table) {
  491 + QPDFObjGen const& og = iter.first;
  492 + QPDFXRefEntry const& entry = iter.second;
  493 + if (entry.getType() == 2) {
  494 + object_stream_data[og.getObj()] = entry.getObjStreamNumber();
  495 + }
  496 + }
  497 + optimize(object_stream_data, false);
  498 + calculateLinearizationData(object_stream_data);
  499 + }
486 500
487 // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra 501 // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
488 // object here by mistake. pdlin fails to place thumbnail images in section 9, so when 502 // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
@@ -499,14 +513,13 @@ QPDF::checkLinearizationInternal() @@ -499,14 +513,13 @@ QPDF::checkLinearizationInternal()
499 qpdf_offset_t max_E = -1; 513 qpdf_offset_t max_E = -1;
500 for (auto const& oh: m->part6) { 514 for (auto const& oh: m->part6) {
501 QPDFObjGen og(oh.getObjGen()); 515 QPDFObjGen og(oh.getObjGen());
502 - auto before = m->xref_table.end_before_space(og);  
503 - auto after = m->xref_table.end_after_space(og);  
504 - if (before <= 0) { 516 + if (m->obj_cache.count(og) == 0) {
505 // All objects have to have been dereferenced to be classified. 517 // All objects have to have been dereferenced to be classified.
506 throw std::logic_error("linearization part6 object not in cache"); 518 throw std::logic_error("linearization part6 object not in cache");
507 } 519 }
508 - min_E = std::max(min_E, before);  
509 - max_E = std::max(max_E, after); 520 + ObjCache const& oc = m->obj_cache[og];
  521 + min_E = std::max(min_E, oc.end_before_space);
  522 + max_E = std::max(max_E, oc.end_after_space);
510 } 523 }
511 if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) { 524 if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) {
512 QTC::TC("qpdf", "QPDF warn /E mismatch"); 525 QTC::TC("qpdf", "QPDF warn /E mismatch");
@@ -533,11 +546,10 @@ QPDF::maxEnd(ObjUser const&amp; ou) @@ -533,11 +546,10 @@ QPDF::maxEnd(ObjUser const&amp; ou)
533 } 546 }
534 qpdf_offset_t end = 0; 547 qpdf_offset_t end = 0;
535 for (auto const& og: m->obj_user_to_objects[ou]) { 548 for (auto const& og: m->obj_user_to_objects[ou]) {
536 - auto e = m->xref_table.end_after_space(og);  
537 - if (e <= 0) { 549 + if (m->obj_cache.count(og) == 0) {
538 stopOnError("unknown object referenced in object user table"); 550 stopOnError("unknown object referenced in object user table");
539 } 551 }
540 - end = std::max(end, e); 552 + end = std::max(end, m->obj_cache[og].end_after_space);
541 } 553 }
542 return end; 554 return end;
543 } 555 }
@@ -545,18 +557,23 @@ QPDF::maxEnd(ObjUser const&amp; ou) @@ -545,18 +557,23 @@ QPDF::maxEnd(ObjUser const&amp; ou)
545 qpdf_offset_t 557 qpdf_offset_t
546 QPDF::getLinearizationOffset(QPDFObjGen const& og) 558 QPDF::getLinearizationOffset(QPDFObjGen const& og)
547 { 559 {
548 - switch (m->xref_table.type(og)) { 560 + QPDFXRefEntry entry = m->xref_table[og];
  561 + qpdf_offset_t result = 0;
  562 + switch (entry.getType()) {
549 case 1: 563 case 1:
550 - return m->xref_table.offset(og); 564 + result = entry.getOffset();
  565 + break;
551 566
552 case 2: 567 case 2:
553 // For compressed objects, return the offset of the object stream that contains them. 568 // For compressed objects, return the offset of the object stream that contains them.
554 - return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0)); 569 + result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
  570 + break;
555 571
556 default: 572 default:
557 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); 573 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
558 - return 0; // unreachable 574 + break;
559 } 575 }
  576 + return result;
560 } 577 }
561 578
562 QPDFObjectHandle 579 QPDFObjectHandle
@@ -571,16 +588,6 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj @@ -571,16 +588,6 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj
571 } 588 }
572 589
573 QPDFObjectHandle 590 QPDFObjectHandle
574 -QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref)  
575 -{  
576 - auto og = obj.getObjGen();  
577 - if (obj.isNull() || xref.type(og) != 2) {  
578 - return obj;  
579 - }  
580 - return getObject(xref.stream_number(og.getObj()), 0);  
581 -}  
582 -  
583 -QPDFObjectHandle  
584 QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) 591 QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj)
585 { 592 {
586 if (obj.contains(oh)) { 593 if (obj.contains(oh)) {
@@ -597,13 +604,15 @@ QPDF::lengthNextN(int first_object, int n) @@ -597,13 +604,15 @@ QPDF::lengthNextN(int first_object, int n)
597 int length = 0; 604 int length = 0;
598 for (int i = 0; i < n; ++i) { 605 for (int i = 0; i < n; ++i) {
599 QPDFObjGen og(first_object + i, 0); 606 QPDFObjGen og(first_object + i, 0);
600 - auto end = m->xref_table.end_after_space(og);  
601 - if (end <= 0) { 607 + if (m->xref_table.count(og) == 0) {
602 linearizationWarning( 608 linearizationWarning(
603 "no xref table entry for " + std::to_string(first_object + i) + " 0"); 609 "no xref table entry for " + std::to_string(first_object + i) + " 0");
604 - continue; 610 + } else {
  611 + if (m->obj_cache.count(og) == 0) {
  612 + stopOnError("found unknown object while calculating length for linearization data");
  613 + }
  614 + length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
605 } 615 }
606 - length += toI(end - getLinearizationOffset(og));  
607 } 616 }
608 return length; 617 return length;
609 } 618 }
@@ -627,7 +636,7 @@ QPDF::checkHPageOffset( @@ -627,7 +636,7 @@ QPDF::checkHPageOffset(
627 int npages = toI(pages.size()); 636 int npages = toI(pages.size());
628 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); 637 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
629 QPDFObjGen first_page_og(pages.at(0).getObjGen()); 638 QPDFObjGen first_page_og(pages.at(0).getObjGen());
630 - if (m->xref_table.type(first_page_og) == 0) { 639 + if (m->xref_table.count(first_page_og) == 0) {
631 stopOnError("supposed first page object is not known"); 640 stopOnError("supposed first page object is not known");
632 } 641 }
633 qpdf_offset_t offset = getLinearizationOffset(first_page_og); 642 qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@@ -638,7 +647,7 @@ QPDF::checkHPageOffset( @@ -638,7 +647,7 @@ QPDF::checkHPageOffset(
638 for (int pageno = 0; pageno < npages; ++pageno) { 647 for (int pageno = 0; pageno < npages; ++pageno) {
639 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); 648 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
640 int first_object = page_og.getObj(); 649 int first_object = page_og.getObj();
641 - if (m->xref_table.type(page_og) == 0) { 650 + if (m->xref_table.count(page_og) == 0) {
642 stopOnError("unknown object in page offset hint table"); 651 stopOnError("unknown object in page offset hint table");
643 } 652 }
644 offset = getLinearizationOffset(page_og); 653 offset = getLinearizationOffset(page_og);
@@ -760,7 +769,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in @@ -760,7 +769,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
760 cur_object = so.first_shared_obj; 769 cur_object = so.first_shared_obj;
761 770
762 QPDFObjGen og(cur_object, 0); 771 QPDFObjGen og(cur_object, 0);
763 - if (m->xref_table.type(og) == 0) { 772 + if (m->xref_table.count(og) == 0) {
764 stopOnError("unknown object in shared object hint table"); 773 stopOnError("unknown object in shared object hint table");
765 } 774 }
766 qpdf_offset_t offset = getLinearizationOffset(og); 775 qpdf_offset_t offset = getLinearizationOffset(og);
@@ -811,7 +820,7 @@ QPDF::checkHOutlines() @@ -811,7 +820,7 @@ QPDF::checkHOutlines()
811 return; 820 return;
812 } 821 }
813 QPDFObjGen og(outlines.getObjGen()); 822 QPDFObjGen og(outlines.getObjGen());
814 - if (m->xref_table.type(og) == 0) { 823 + if (m->xref_table.count(og) == 0) {
815 stopOnError("unknown object in outlines hint table"); 824 stopOnError("unknown object in outlines hint table");
816 } 825 }
817 qpdf_offset_t offset = getLinearizationOffset(og); 826 qpdf_offset_t offset = getLinearizationOffset(og);
@@ -830,7 +839,8 @@ QPDF::checkHOutlines() @@ -830,7 +839,8 @@ QPDF::checkHOutlines()
830 std::to_string(table_length) + "; computed = " + std::to_string(length)); 839 std::to_string(table_length) + "; computed = " + std::to_string(length));
831 } 840 }
832 } else { 841 } else {
833 - linearizationWarning("incorrect first object number in outline hints table."); 842 + linearizationWarning("incorrect first object number in outline "
  843 + "hints table.");
834 } 844 }
835 } else { 845 } else {
836 linearizationWarning("incorrect object count in outline hint table"); 846 linearizationWarning("incorrect object count in outline hint table");
libqpdf/QPDF_optimization.cc
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 #include <qpdf/assert_debug.h> 3 #include <qpdf/assert_debug.h>
4 4
5 -#include <qpdf/QPDF_private.hh> 5 +#include <qpdf/QPDF.hh>
6 6
7 #include <qpdf/QPDFExc.hh> 7 #include <qpdf/QPDFExc.hh>
8 #include <qpdf/QPDFWriter_private.hh> 8 #include <qpdf/QPDFWriter_private.hh>
@@ -78,12 +78,6 @@ QPDF::optimize( @@ -78,12 +78,6 @@ QPDF::optimize(
78 optimize_internal(obj, true, skip_stream_parameters); 78 optimize_internal(obj, true, skip_stream_parameters);
79 } 79 }
80 80
81 -void  
82 -QPDF::optimize(QPDF::Xref_table const& xref)  
83 -{  
84 - optimize_internal(xref, false, nullptr);  
85 -}  
86 -  
87 template <typename T> 81 template <typename T>
88 void 82 void
89 QPDF::optimize_internal( 83 QPDF::optimize_internal(
@@ -121,13 +115,13 @@ QPDF::optimize_internal( @@ -121,13 +115,13 @@ QPDF::optimize_internal(
121 } 115 }
122 116
123 // Traverse document-level items 117 // Traverse document-level items
124 - for (auto const& key: m->xref_table.trailer().getKeys()) { 118 + for (auto const& key: m->trailer.getKeys()) {
125 if (key == "/Root") { 119 if (key == "/Root") {
126 // handled separately 120 // handled separately
127 } else { 121 } else {
128 updateObjectMaps( 122 updateObjectMaps(
129 ObjUser(ObjUser::ou_trailer_key, key), 123 ObjUser(ObjUser::ou_trailer_key, key),
130 - m->xref_table.trailer().getKey(key), 124 + m->trailer.getKey(key),
131 skip_stream_parameters); 125 skip_stream_parameters);
132 } 126 }
133 } 127 }
@@ -175,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -175,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
175 // values for them. 169 // values for them.
176 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; 170 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
177 pushInheritedAttributesToPageInternal( 171 pushInheritedAttributesToPageInternal(
178 - m->xref_table.trailer().getKey("/Root").getKey("/Pages"), 172 + m->trailer.getKey("/Root").getKey("/Pages"),
179 key_ancestors, 173 key_ancestors,
180 allow_changes, 174 allow_changes,
181 warn_skipped_keys); 175 warn_skipped_keys);
182 if (!key_ancestors.empty()) { 176 if (!key_ancestors.empty()) {
183 - throw std::logic_error(  
184 - "key_ancestors not empty after pushing inherited attributes to pages"); 177 + throw std::logic_error("key_ancestors not empty after"
  178 + " pushing inherited attributes to pages");
185 } 179 }
186 m->pushed_inherited_attributes_to_pages = true; 180 m->pushed_inherited_attributes_to_pages = true;
187 m->ever_pushed_inherited_attributes_to_pages = true; 181 m->ever_pushed_inherited_attributes_to_pages = true;
@@ -448,45 +442,3 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj) @@ -448,45 +442,3 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj)
448 m->obj_user_to_objects = t_obj_user_to_objects; 442 m->obj_user_to_objects = t_obj_user_to_objects;
449 m->object_to_obj_users = t_object_to_obj_users; 443 m->object_to_obj_users = t_object_to_obj_users;
450 } 444 }
451 -  
452 -void  
453 -QPDF::filterCompressedObjects(QPDF::Xref_table const& xref)  
454 -{  
455 - if (!xref.object_streams()) {  
456 - return;  
457 - }  
458 -  
459 - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed  
460 - // objects. If something is a user of a compressed object, then it is really a user of the  
461 - // object stream that contains it.  
462 -  
463 - std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;  
464 - std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;  
465 -  
466 - for (auto const& i1: m->obj_user_to_objects) {  
467 - ObjUser const& ou = i1.first;  
468 - // Loop over objects.  
469 - for (auto const& og: i1.second) {  
470 - if (auto stream = xref.stream_number(og.getObj())) {  
471 - t_obj_user_to_objects[ou].insert(QPDFObjGen(stream, 0));  
472 - } else {  
473 - t_obj_user_to_objects[ou].insert(og);  
474 - }  
475 - }  
476 - }  
477 -  
478 - for (auto const& i1: m->object_to_obj_users) {  
479 - QPDFObjGen const& og = i1.first;  
480 - // Loop over obj_users.  
481 - for (auto const& ou: i1.second) {  
482 - if (auto stream = xref.stream_number(og.getObj())) {  
483 - t_object_to_obj_users[QPDFObjGen(stream, 0)].insert(ou);  
484 - } else {  
485 - t_object_to_obj_users[og].insert(ou);  
486 - }  
487 - }  
488 - }  
489 -  
490 - m->obj_user_to_objects = t_obj_user_to_objects;  
491 - m->object_to_obj_users = t_object_to_obj_users;  
492 -}  
libqpdf/QPDF_pages.cc
1 -#include <qpdf/QPDF_private.hh> 1 +#include <qpdf/QPDF.hh>
2 2
3 #include <qpdf/QPDFExc.hh> 3 #include <qpdf/QPDFExc.hh>
4 #include <qpdf/QTC.hh> 4 #include <qpdf/QTC.hh>
libqpdf/qpdf/ObjTable.hh
@@ -46,12 +46,6 @@ class ObjTable: public std::vector&lt;T&gt; @@ -46,12 +46,6 @@ class ObjTable: public std::vector&lt;T&gt;
46 } 46 }
47 47
48 inline T const& 48 inline T const&
49 - operator[](unsigned int idx) const  
50 - {  
51 - return element(idx);  
52 - }  
53 -  
54 - inline T const&  
55 operator[](QPDFObjGen og) const 49 operator[](QPDFObjGen og) const
56 { 50 {
57 return element(static_cast<size_t>(og.getObj())); 51 return element(static_cast<size_t>(og.getObj()));
libqpdf/qpdf/QPDFObject_private.hh
@@ -6,13 +6,14 @@ @@ -6,13 +6,14 @@
6 6
7 #include <qpdf/Constants.h> 7 #include <qpdf/Constants.h>
8 #include <qpdf/JSON.hh> 8 #include <qpdf/JSON.hh>
  9 +#include <qpdf/QPDF.hh>
9 #include <qpdf/QPDFValue.hh> 10 #include <qpdf/QPDFValue.hh>
10 -#include <qpdf/QPDF_private.hh>  
11 #include <qpdf/Types.h> 11 #include <qpdf/Types.h>
12 12
13 #include <string> 13 #include <string>
14 #include <string_view> 14 #include <string_view>
15 15
  16 +class QPDF;
16 class QPDFObjectHandle; 17 class QPDFObjectHandle;
17 18
18 class QPDFObject 19 class QPDFObject
libqpdf/qpdf/QPDF_private.hh deleted
1 -#ifndef QPDF_PRIVATE_HH  
2 -#define QPDF_PRIVATE_HH  
3 -  
4 -#include <qpdf/QPDF.hh>  
5 -  
6 -#include <variant>  
7 -  
8 -// Xref_table encapsulates the pdf's xref table and trailer.  
9 -class QPDF::Xref_table  
10 -{  
11 - public:  
12 - Xref_table(QPDF& qpdf, InputSource* const& file) :  
13 - qpdf(qpdf),  
14 - file(file)  
15 - {  
16 - tokenizer.allowEOF();  
17 - }  
18 -  
19 - void initialize();  
20 - void initialize_empty();  
21 - void initialize_json();  
22 - void reconstruct(QPDFExc& e);  
23 - void show();  
24 - bool resolve();  
25 -  
26 - QPDFObjectHandle  
27 - trailer() const  
28 - {  
29 - return trailer_;  
30 - }  
31 -  
32 - void  
33 - trailer(QPDFObjectHandle&& oh)  
34 - {  
35 - trailer_ = std::move(oh);  
36 - }  
37 -  
38 - // Returns 0 if og is not in table.  
39 - size_t  
40 - type(QPDFObjGen og) const  
41 - {  
42 - int id = og.getObj();  
43 - if (id < 1 || static_cast<size_t>(id) >= table.size()) {  
44 - return 0;  
45 - }  
46 - auto& e = table[static_cast<size_t>(id)];  
47 - return e.gen() == og.getGen() ? e.type() : 0;  
48 - }  
49 -  
50 - // Returns 0 if og is not in table.  
51 - size_t  
52 - type(size_t id) const noexcept  
53 - {  
54 - if (id >= table.size()) {  
55 - return 0;  
56 - }  
57 - return table[id].type();  
58 - }  
59 -  
60 - // Returns 0 if og is not in table.  
61 - qpdf_offset_t  
62 - offset(QPDFObjGen og) const noexcept  
63 - {  
64 - int id = og.getObj();  
65 - if (id < 1 || static_cast<size_t>(id) >= table.size()) {  
66 - return 0;  
67 - }  
68 - return table[static_cast<size_t>(id)].offset();  
69 - }  
70 -  
71 - // Returns 0 if id is not in table.  
72 - int  
73 - stream_number(int id) const noexcept  
74 - {  
75 - if (id < 1 || static_cast<size_t>(id) >= table.size()) {  
76 - return 0;  
77 - }  
78 - return table[static_cast<size_t>(id)].stream_number();  
79 - }  
80 -  
81 - int  
82 - stream_index(int id) const noexcept  
83 - {  
84 - if (id < 1 || static_cast<size_t>(id) >= table.size()) {  
85 - return 0;  
86 - }  
87 - return table[static_cast<size_t>(id)].stream_index();  
88 - }  
89 -  
90 - QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;  
91 -  
92 - std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;  
93 -  
94 - bool  
95 - object_streams() const noexcept  
96 - {  
97 - return object_streams_;  
98 - }  
99 -  
100 - // Return a vector of object id and stream number for each compressed object.  
101 - std::vector<std::pair<unsigned int, int>>  
102 - compressed_objects() const  
103 - {  
104 - if (!initialized()) {  
105 - throw std::logic_error("Xref_table::compressed_objects called before parsing.");  
106 - }  
107 -  
108 - std::vector<std::pair<unsigned int, int>> result;  
109 - result.reserve(table.size());  
110 -  
111 - unsigned int i{0};  
112 - for (auto const& item: table) {  
113 - if (item.type() == 2) {  
114 - result.emplace_back(i, item.stream_number());  
115 - }  
116 - ++i;  
117 - }  
118 - return result;  
119 - }  
120 -  
121 - // Temporary access to underlying table size  
122 - size_t  
123 - size() const noexcept  
124 - {  
125 - return table.size();  
126 - }  
127 -  
128 - void  
129 - ignore_streams(bool val) noexcept  
130 - {  
131 - ignore_streams_ = val;  
132 - }  
133 -  
134 - bool  
135 - initialized() const noexcept  
136 - {  
137 - return initialized_;  
138 - }  
139 -  
140 - void  
141 - attempt_recovery(bool val) noexcept  
142 - {  
143 - attempt_recovery_ = val;  
144 - }  
145 -  
146 - int  
147 - max_id() const noexcept  
148 - {  
149 - return max_id_;  
150 - }  
151 -  
152 - // For Linearization  
153 -  
154 - qpdf_offset_t  
155 - end_after_space(QPDFObjGen og)  
156 - {  
157 - auto& e = entry(toS(og.getObj()));  
158 - switch (e.type()) {  
159 - case 1:  
160 - return e.end_after_space_;  
161 - case 2:  
162 - {  
163 - auto es = entry(toS(e.stream_number()));  
164 - return es.type() == 1 ? es.end_after_space_ : 0;  
165 - }  
166 - default:  
167 - return 0;  
168 - }  
169 - }  
170 -  
171 - qpdf_offset_t  
172 - end_before_space(QPDFObjGen og)  
173 - {  
174 - auto& e = entry(toS(og.getObj()));  
175 - switch (e.type()) {  
176 - case 1:  
177 - return e.end_before_space_;  
178 - case 2:  
179 - {  
180 - auto es = entry(toS(e.stream_number()));  
181 - return es.type() == 1 ? es.end_before_space_ : 0;  
182 - }  
183 - default:  
184 - return 0;  
185 - }  
186 - }  
187 -  
188 - void  
189 - linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)  
190 - {  
191 - if (type(id)) {  
192 - table[id].end_before_space_ = before;  
193 - table[id].end_after_space_ = after;  
194 - }  
195 - }  
196 -  
197 - bool  
198 - uncompressed_after_compressed() const noexcept  
199 - {  
200 - return uncompressed_after_compressed_;  
201 - }  
202 -  
203 - // Actual value from file  
204 - qpdf_offset_t  
205 - first_item_offset() const noexcept  
206 - {  
207 - return first_item_offset_;  
208 - }  
209 -  
210 - private:  
211 - // Object, count, offset of first entry  
212 - typedef std::tuple<int, int, qpdf_offset_t> Subsection;  
213 -  
214 - struct Uncompressed  
215 - {  
216 - Uncompressed(qpdf_offset_t offset) :  
217 - offset(offset)  
218 - {  
219 - }  
220 - qpdf_offset_t offset;  
221 - };  
222 -  
223 - struct Compressed  
224 - {  
225 - Compressed(int stream_number, int stream_index) :  
226 - stream_number(stream_number),  
227 - stream_index(stream_index)  
228 - {  
229 - }  
230 - int stream_number{0};  
231 - int stream_index{0};  
232 - };  
233 -  
234 - typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;  
235 -  
236 - struct Entry  
237 - {  
238 - Entry() = default;  
239 -  
240 - Entry(int gen, Xref entry) :  
241 - gen_(gen),  
242 - entry(entry)  
243 - {  
244 - }  
245 -  
246 - int  
247 - gen() const noexcept  
248 - {  
249 - return gen_;  
250 - }  
251 -  
252 - size_t  
253 - type() const noexcept  
254 - {  
255 - return entry.index();  
256 - }  
257 -  
258 - qpdf_offset_t  
259 - offset() const noexcept  
260 - {  
261 - return type() == 1 ? std::get<1>(entry).offset : 0;  
262 - }  
263 -  
264 - int  
265 - stream_number() const noexcept  
266 - {  
267 - return type() == 2 ? std::get<2>(entry).stream_number : 0;  
268 - }  
269 -  
270 - int  
271 - stream_index() const noexcept  
272 - {  
273 - return type() == 2 ? std::get<2>(entry).stream_index : 0;  
274 - }  
275 -  
276 - int gen_{0};  
277 - Xref entry;  
278 - qpdf_offset_t end_before_space_{0};  
279 - qpdf_offset_t end_after_space_{0};  
280 - };  
281 -  
282 - Entry&  
283 - entry(size_t id)  
284 - {  
285 - return id < table.size() ? table[id] : table[0];  
286 - }  
287 -  
288 - void read(qpdf_offset_t offset);  
289 -  
290 - // Methods to parse tables  
291 - qpdf_offset_t process_section(qpdf_offset_t offset);  
292 - std::vector<Subsection> subsections(std::string& line);  
293 - std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);  
294 - Subsection subsection(std::string const& line);  
295 - bool read_entry(qpdf_offset_t& f1, int& f2, char& type);  
296 - bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type);  
297 -  
298 - // Methods to parse streams  
299 - qpdf_offset_t read_stream(qpdf_offset_t offset);  
300 - qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);  
301 - std::pair<int, std::array<int, 3>>  
302 - process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);  
303 - std::pair<int, size_t> process_Size(  
304 - QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);  
305 - std::pair<int, std::vector<std::pair<int, int>>> process_Index(  
306 - QPDFObjectHandle& dict,  
307 - int max_num_entries,  
308 - std::function<QPDFExc(std::string_view)> damaged);  
309 -  
310 - QPDFObjectHandle read_trailer();  
311 -  
312 - QPDFTokenizer::Token  
313 - read_token(size_t max_len = 0)  
314 - {  
315 - return tokenizer.readToken(*file, "", true, max_len);  
316 - }  
317 -  
318 - // Methods to insert table entries  
319 - void insert(int obj, int f0, qpdf_offset_t f1, int f2);  
320 - void insert_free(QPDFObjGen);  
321 -  
322 - QPDFExc  
323 - damaged_pdf(std::string const& msg)  
324 - {  
325 - return qpdf.damagedPDF("", 0, msg);  
326 - }  
327 -  
328 - QPDFExc  
329 - damaged_table(std::string const& msg)  
330 - {  
331 - return qpdf.damagedPDF("xref table", msg);  
332 - }  
333 -  
334 - void  
335 - warn_damaged(std::string const& msg)  
336 - {  
337 - qpdf.warn(damaged_pdf(msg));  
338 - }  
339 -  
340 - QPDF& qpdf;  
341 - InputSource* const& file;  
342 - QPDFTokenizer tokenizer;  
343 -  
344 - std::vector<Entry> table;  
345 - QPDFObjectHandle trailer_;  
346 -  
347 - bool attempt_recovery_{true};  
348 - bool initialized_{false};  
349 - bool ignore_streams_{false};  
350 - bool reconstructed_{false};  
351 - bool object_streams_{false};  
352 - // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids  
353 - // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the  
354 - // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref  
355 - // table after reconstruction.  
356 - int max_id_{std::numeric_limits<int>::max() - 1};  
357 -  
358 - // Linearization data  
359 - bool uncompressed_after_compressed_{false};  
360 - qpdf_offset_t first_item_offset_{0}; // actual value from file  
361 -};  
362 -  
363 -// The Resolver class is restricted to QPDFObject so that only it can resolve indirect  
364 -// references.  
365 -class QPDF::Resolver  
366 -{  
367 - friend class QPDFObject;  
368 - friend class QPDF_Unresolved;  
369 -  
370 - private:  
371 - static QPDFObject*  
372 - resolved(QPDF* qpdf, QPDFObjGen og)  
373 - {  
374 - return qpdf->resolve(og);  
375 - }  
376 -};  
377 -  
378 -// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.  
379 -class QPDF::StreamCopier  
380 -{  
381 - friend class QPDFObjectHandle;  
382 -  
383 - private:  
384 - static void  
385 - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)  
386 - {  
387 - qpdf->copyStreamData(dest, src);  
388 - }  
389 -};  
390 -  
391 -// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides  
392 -// special access to allow the parser to create unresolved objects and dangling references.  
393 -class QPDF::ParseGuard  
394 -{  
395 - friend class QPDFParser;  
396 -  
397 - private:  
398 - ParseGuard(QPDF* qpdf) :  
399 - qpdf(qpdf)  
400 - {  
401 - if (qpdf) {  
402 - qpdf->inParse(true);  
403 - }  
404 - }  
405 -  
406 - static std::shared_ptr<QPDFObject>  
407 - getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)  
408 - {  
409 - return qpdf->getObjectForParser(id, gen, parse_pdf);  
410 - }  
411 -  
412 - ~ParseGuard()  
413 - {  
414 - if (qpdf) {  
415 - qpdf->inParse(false);  
416 - }  
417 - }  
418 - QPDF* qpdf;  
419 -};  
420 -  
421 -// Pipe class is restricted to QPDF_Stream.  
422 -class QPDF::Pipe  
423 -{  
424 - friend class QPDF_Stream;  
425 -  
426 - private:  
427 - static bool  
428 - pipeStreamData(  
429 - QPDF* qpdf,  
430 - QPDFObjGen const& og,  
431 - qpdf_offset_t offset,  
432 - size_t length,  
433 - QPDFObjectHandle dict,  
434 - Pipeline* pipeline,  
435 - bool suppress_warnings,  
436 - bool will_retry)  
437 - {  
438 - return qpdf->pipeStreamData(  
439 - og, offset, length, dict, pipeline, suppress_warnings, will_retry);  
440 - }  
441 -};  
442 -  
443 -class QPDF::ObjCache  
444 -{  
445 - public:  
446 - ObjCache() = default;  
447 -  
448 - ObjCache(std::shared_ptr<QPDFObject> object) :  
449 - object(object)  
450 - {  
451 - }  
452 -  
453 - std::shared_ptr<QPDFObject> object;  
454 -};  
455 -  
456 -class QPDF::ObjCopier  
457 -{  
458 - public:  
459 - std::map<QPDFObjGen, QPDFObjectHandle> object_map;  
460 - std::vector<QPDFObjectHandle> to_copy;  
461 - QPDFObjGen::set visiting;  
462 -};  
463 -  
464 -class QPDF::EncryptionParameters  
465 -{  
466 - friend class QPDF;  
467 -  
468 - public:  
469 - EncryptionParameters();  
470 -  
471 - private:  
472 - bool encrypted;  
473 - bool encryption_initialized;  
474 - int encryption_V;  
475 - int encryption_R;  
476 - bool encrypt_metadata;  
477 - std::map<std::string, encryption_method_e> crypt_filters;  
478 - encryption_method_e cf_stream;  
479 - encryption_method_e cf_string;  
480 - encryption_method_e cf_file;  
481 - std::string provided_password;  
482 - std::string user_password;  
483 - std::string encryption_key;  
484 - std::string cached_object_encryption_key;  
485 - QPDFObjGen cached_key_og;  
486 - bool user_password_matched;  
487 - bool owner_password_matched;  
488 -};  
489 -  
490 -class QPDF::ForeignStreamData  
491 -{  
492 - friend class QPDF;  
493 -  
494 - public:  
495 - ForeignStreamData(  
496 - std::shared_ptr<EncryptionParameters> encp,  
497 - std::shared_ptr<InputSource> file,  
498 - QPDFObjGen const& foreign_og,  
499 - qpdf_offset_t offset,  
500 - size_t length,  
501 - QPDFObjectHandle local_dict);  
502 -  
503 - private:  
504 - std::shared_ptr<EncryptionParameters> encp;  
505 - std::shared_ptr<InputSource> file;  
506 - QPDFObjGen foreign_og;  
507 - qpdf_offset_t offset;  
508 - size_t length;  
509 - QPDFObjectHandle local_dict;  
510 -};  
511 -  
512 -class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider  
513 -{  
514 - public:  
515 - CopiedStreamDataProvider(QPDF& destination_qpdf);  
516 - ~CopiedStreamDataProvider() override = default;  
517 - bool provideStreamData(  
518 - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override;  
519 - void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);  
520 - void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);  
521 -  
522 - private:  
523 - QPDF& destination_qpdf;  
524 - std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;  
525 - std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;  
526 -};  
527 -  
528 -class QPDF::StringDecrypter: public QPDFObjectHandle::StringDecrypter  
529 -{  
530 - friend class QPDF;  
531 -  
532 - public:  
533 - StringDecrypter(QPDF* qpdf, QPDFObjGen const& og);  
534 - ~StringDecrypter() override = default;  
535 - void decryptString(std::string& val) override;  
536 -  
537 - private:  
538 - QPDF* qpdf;  
539 - QPDFObjGen og;  
540 -};  
541 -  
542 -// PDF 1.4: Table F.4  
543 -struct QPDF::HPageOffsetEntry  
544 -{  
545 - int delta_nobjects{0}; // 1  
546 - qpdf_offset_t delta_page_length{0}; // 2  
547 - // vectors' sizes = nshared_objects  
548 - int nshared_objects{0}; // 3  
549 - std::vector<int> shared_identifiers; // 4  
550 - std::vector<int> shared_numerators; // 5  
551 - qpdf_offset_t delta_content_offset{0}; // 6  
552 - qpdf_offset_t delta_content_length{0}; // 7  
553 -};  
554 -  
555 -// PDF 1.4: Table F.3  
556 -struct QPDF::HPageOffset  
557 -{  
558 - int min_nobjects{0}; // 1  
559 - qpdf_offset_t first_page_offset{0}; // 2  
560 - int nbits_delta_nobjects{0}; // 3  
561 - int min_page_length{0}; // 4  
562 - int nbits_delta_page_length{0}; // 5  
563 - int min_content_offset{0}; // 6  
564 - int nbits_delta_content_offset{0}; // 7  
565 - int min_content_length{0}; // 8  
566 - int nbits_delta_content_length{0}; // 9  
567 - int nbits_nshared_objects{0}; // 10  
568 - int nbits_shared_identifier{0}; // 11  
569 - int nbits_shared_numerator{0}; // 12  
570 - int shared_denominator{0}; // 13  
571 - // vector size is npages  
572 - std::vector<HPageOffsetEntry> entries;  
573 -};  
574 -  
575 -// PDF 1.4: Table F.6  
576 -struct QPDF::HSharedObjectEntry  
577 -{  
578 - // Item 3 is a 128-bit signature (unsupported by Acrobat)  
579 - int delta_group_length{0}; // 1  
580 - int signature_present{0}; // 2 -- always 0  
581 - int nobjects_minus_one{0}; // 4 -- always 0  
582 -};  
583 -  
584 -// PDF 1.4: Table F.5  
585 -struct QPDF::HSharedObject  
586 -{  
587 - int first_shared_obj{0}; // 1  
588 - qpdf_offset_t first_shared_offset{0}; // 2  
589 - int nshared_first_page{0}; // 3  
590 - int nshared_total{0}; // 4  
591 - int nbits_nobjects{0}; // 5  
592 - int min_group_length{0}; // 6  
593 - int nbits_delta_group_length{0}; // 7  
594 - // vector size is nshared_total  
595 - std::vector<HSharedObjectEntry> entries;  
596 -};  
597 -  
598 -// PDF 1.4: Table F.9  
599 -struct QPDF::HGeneric  
600 -{  
601 - int first_object{0}; // 1  
602 - qpdf_offset_t first_object_offset{0}; // 2  
603 - int nobjects{0}; // 3  
604 - int group_length{0}; // 4  
605 -};  
606 -  
607 -// Other linearization data structures  
608 -  
609 -// Initialized from Linearization Parameter dictionary  
610 -struct QPDF::LinParameters  
611 -{  
612 - qpdf_offset_t file_size{0}; // /L  
613 - int first_page_object{0}; // /O  
614 - qpdf_offset_t first_page_end{0}; // /E  
615 - int npages{0}; // /N  
616 - qpdf_offset_t xref_zero_offset{0}; // /T  
617 - int first_page{0}; // /P  
618 - qpdf_offset_t H_offset{0}; // offset of primary hint stream  
619 - qpdf_offset_t H_length{0}; // length of primary hint stream  
620 -};  
621 -  
622 -// Computed hint table value data structures. These tables contain the computed values on which  
623 -// the hint table values are based. They exclude things like number of bits and store actual  
624 -// values instead of mins and deltas. File offsets are also absolute rather than being offset  
625 -// by the size of the primary hint table. We populate the hint table structures from these  
626 -// during writing and compare the hint table values with these during validation. We ignore  
627 -// some values for various reasons described in the code. Those values are omitted from these  
628 -// structures. Note also that object numbers are object numbers from the input file, not the  
629 -// output file.  
630 -  
631 -// Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.  
632 -  
633 -struct QPDF::CHPageOffsetEntry  
634 -{  
635 - int nobjects{0};  
636 - int nshared_objects{0};  
637 - // vectors' sizes = nshared_objects  
638 - std::vector<int> shared_identifiers;  
639 -};  
640 -  
641 -struct QPDF::CHPageOffset  
642 -{  
643 - // vector size is npages  
644 - std::vector<CHPageOffsetEntry> entries;  
645 -};  
646 -  
647 -struct QPDF::CHSharedObjectEntry  
648 -{  
649 - CHSharedObjectEntry(int object) :  
650 - object(object)  
651 - {  
652 - }  
653 -  
654 - int object;  
655 -};  
656 -  
657 -// PDF 1.4: Table F.5  
658 -struct QPDF::CHSharedObject  
659 -{  
660 - int first_shared_obj{0};  
661 - int nshared_first_page{0};  
662 - int nshared_total{0};  
663 - // vector size is nshared_total  
664 - std::vector<CHSharedObjectEntry> entries;  
665 -};  
666 -  
667 -// No need for CHGeneric -- HGeneric is fine as is.  
668 -  
669 -// Data structures to support optimization -- implemented in QPDF_optimization.cc  
670 -  
671 -class QPDF::ObjUser  
672 -{  
673 - public:  
674 - enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };  
675 -  
676 - // type is set to ou_bad  
677 - ObjUser();  
678 -  
679 - // type must be ou_root  
680 - ObjUser(user_e type);  
681 -  
682 - // type must be one of ou_page or ou_thumb  
683 - ObjUser(user_e type, int pageno);  
684 -  
685 - // type must be one of ou_trailer_key or ou_root_key  
686 - ObjUser(user_e type, std::string const& key);  
687 -  
688 - bool operator<(ObjUser const&) const;  
689 -  
690 - user_e ou_type;  
691 - int pageno; // if ou_page;  
692 - std::string key; // if ou_trailer_key or ou_root_key  
693 -};  
694 -  
695 -struct QPDF::UpdateObjectMapsFrame  
696 -{  
697 - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);  
698 -  
699 - ObjUser const& ou;  
700 - QPDFObjectHandle oh;  
701 - bool top;  
702 -};  
703 -  
704 -class QPDF::PatternFinder: public InputSource::Finder  
705 -{  
706 - public:  
707 - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :  
708 - qpdf(qpdf),  
709 - checker(checker)  
710 - {  
711 - }  
712 - ~PatternFinder() override = default;  
713 - bool  
714 - check() override  
715 - {  
716 - return (this->qpdf.*checker)();  
717 - }  
718 -  
719 - private:  
720 - QPDF& qpdf;  
721 - bool (QPDF::*checker)();  
722 -};  
723 -  
724 -class QPDF::Members  
725 -{  
726 - friend class QPDF;  
727 - friend class ResolveRecorder;  
728 -  
729 - public:  
730 - QPDF_DLL  
731 - ~Members() = default;  
732 -  
733 - private:  
734 - Members(QPDF& qpdf);  
735 - Members(Members const&) = delete;  
736 -  
737 - std::shared_ptr<QPDFLogger> log;  
738 - unsigned long long unique_id{0};  
739 - QPDFTokenizer tokenizer;  
740 - // Filename to use if there is no input PDF  
741 - std::string no_input_name{"closed input source"};  
742 - // If file_sp is updated, file must also be updated.  
743 - std::shared_ptr<InputSource> file_sp;  
744 - InputSource* file;  
745 - std::string last_object_description;  
746 - bool provided_password_is_hex_key{false};  
747 - bool suppress_warnings{false};  
748 - size_t max_warnings{0};  
749 - bool attempt_recovery{true};  
750 - bool check_mode{false};  
751 - std::shared_ptr<EncryptionParameters> encp;  
752 - std::string pdf_version;  
753 - Xref_table xref_table;  
754 - std::map<QPDFObjGen, ObjCache> obj_cache;  
755 - std::set<QPDFObjGen> resolving;  
756 - std::vector<QPDFObjectHandle> all_pages;  
757 - bool invalid_page_found{false};  
758 - std::map<QPDFObjGen, int> pageobj_to_pages_pos;  
759 - bool pushed_inherited_attributes_to_pages{false};  
760 - bool ever_pushed_inherited_attributes_to_pages{false};  
761 - bool ever_called_get_all_pages{false};  
762 - std::vector<QPDFExc> warnings;  
763 - std::map<unsigned long long, ObjCopier> object_copiers;  
764 - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;  
765 - // copied_stream_data_provider is owned by copied_streams  
766 - CopiedStreamDataProvider* copied_stream_data_provider{nullptr};  
767 - bool fixed_dangling_refs{false};  
768 - bool immediate_copy_from{false};  
769 - bool in_parse{false};  
770 - std::set<int> resolved_object_streams;  
771 -  
772 - // Linearization data  
773 - bool linearization_warnings{false};  
774 -  
775 - // Linearization parameter dictionary and hint table data: may be read from file or computed  
776 - // prior to writing a linearized file  
777 - QPDFObjectHandle lindict;  
778 - LinParameters linp;  
779 - HPageOffset page_offset_hints;  
780 - HSharedObject shared_object_hints;  
781 - HGeneric outline_hints;  
782 -  
783 - // Computed linearization data: used to populate above tables during writing and to compare  
784 - // with them during validation. c_ means computed.  
785 - LinParameters c_linp;  
786 - CHPageOffset c_page_offset_data;  
787 - CHSharedObject c_shared_object_data;  
788 - HGeneric c_outline_data;  
789 -  
790 - // Object ordering data for linearized files: initialized by calculateLinearizationData().  
791 - // Part numbers refer to the PDF 1.4 specification.  
792 - std::vector<QPDFObjectHandle> part4;  
793 - std::vector<QPDFObjectHandle> part6;  
794 - std::vector<QPDFObjectHandle> part7;  
795 - std::vector<QPDFObjectHandle> part8;  
796 - std::vector<QPDFObjectHandle> part9;  
797 -  
798 - // Optimization data  
799 - std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;  
800 - std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;  
801 -};  
802 -  
803 -// JobSetter class is restricted to QPDFJob.  
804 -class QPDF::JobSetter  
805 -{  
806 - friend class QPDFJob;  
807 -  
808 - private:  
809 - // Enable enhanced warnings for pdf file checking.  
810 - static void  
811 - setCheckMode(QPDF& qpdf, bool val)  
812 - {  
813 - qpdf.m->check_mode = val;  
814 - }  
815 -};  
816 -  
817 -class QPDF::ResolveRecorder  
818 -{  
819 - public:  
820 - ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) :  
821 - qpdf(qpdf),  
822 - iter(qpdf->m->resolving.insert(og).first)  
823 - {  
824 - }  
825 - virtual ~ResolveRecorder()  
826 - {  
827 - this->qpdf->m->resolving.erase(iter);  
828 - }  
829 -  
830 - private:  
831 - QPDF* qpdf;  
832 - std::set<QPDFObjGen>::const_iterator iter;  
833 -};  
834 -  
835 -// Writer class is restricted to QPDFWriter so that only it can call certain methods.  
836 -class QPDF::Writer  
837 -{  
838 - friend class QPDFWriter;  
839 -  
840 - private:  
841 - static void  
842 - optimize(  
843 - QPDF& qpdf,  
844 - QPDFWriter::ObjTable const& obj,  
845 - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)  
846 - {  
847 - return qpdf.optimize(obj, skip_stream_parameters);  
848 - }  
849 -  
850 - static void  
851 - getLinearizedParts(  
852 - QPDF& qpdf,  
853 - QPDFWriter::ObjTable const& obj,  
854 - std::vector<QPDFObjectHandle>& part4,  
855 - std::vector<QPDFObjectHandle>& part6,  
856 - std::vector<QPDFObjectHandle>& part7,  
857 - std::vector<QPDFObjectHandle>& part8,  
858 - std::vector<QPDFObjectHandle>& part9)  
859 - {  
860 - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);  
861 - }  
862 -  
863 - static void  
864 - generateHintStream(  
865 - QPDF& qpdf,  
866 - QPDFWriter::NewObjTable const& new_obj,  
867 - QPDFWriter::ObjTable const& obj,  
868 - std::shared_ptr<Buffer>& hint_stream,  
869 - int& S,  
870 - int& O,  
871 - bool compressed)  
872 - {  
873 - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);  
874 - }  
875 -  
876 - static std::vector<QPDFObjGen>  
877 - getCompressibleObjGens(QPDF& qpdf)  
878 - {  
879 - return qpdf.getCompressibleObjVector();  
880 - }  
881 -  
882 - static std::vector<bool>  
883 - getCompressibleObjSet(QPDF& qpdf)  
884 - {  
885 - return qpdf.getCompressibleObjSet();  
886 - }  
887 -  
888 - static Xref_table const&  
889 - getXRefTable(QPDF& qpdf)  
890 - {  
891 - return qpdf.m->xref_table;  
892 - }  
893 -  
894 - static size_t  
895 - tableSize(QPDF& qpdf)  
896 - {  
897 - return qpdf.tableSize();  
898 - }  
899 -};  
900 -  
901 -#endif // QPDF_PRIVATE_HH  
libqpdf/qpdf/qpdf-c_impl.hh
@@ -16,7 +16,7 @@ struct _qpdf_data @@ -16,7 +16,7 @@ struct _qpdf_data
16 _qpdf_data() = default; 16 _qpdf_data() = default;
17 17
18 _qpdf_data(std::unique_ptr<QPDF>&& qpdf) : 18 _qpdf_data(std::unique_ptr<QPDF>&& qpdf) :
19 - qpdf(std::move(qpdf)){}; 19 + qpdf(std::move(qpdf)) {};
20 20
21 ~_qpdf_data() = default; 21 ~_qpdf_data() = default;
22 22
qpdf/qpdf.testcov
@@ -48,6 +48,7 @@ QPDFWriter encrypted hint stream 0 @@ -48,6 +48,7 @@ QPDFWriter encrypted hint stream 0
48 QPDF opt inherited scalar 0 48 QPDF opt inherited scalar 0
49 QPDF xref reused object 0 49 QPDF xref reused object 0
50 QPDF xref gen > 0 1 50 QPDF xref gen > 0 1
  51 +QPDF xref size mismatch 0
51 QPDF not a pdf file 0 52 QPDF not a pdf file 0
52 QPDF can't find startxref 0 53 QPDF can't find startxref 0
53 QPDF invalid xref 0 54 QPDF invalid xref 0
@@ -104,6 +105,7 @@ QPDFWriter not recompressing /FlateDecode 0 @@ -104,6 +105,7 @@ QPDFWriter not recompressing /FlateDecode 0
104 QPDF_encryption xref stream from encrypted file 0 105 QPDF_encryption xref stream from encrypted file 0
105 QPDFJob unable to filter 0 106 QPDFJob unable to filter 0
106 QUtil non-trivial UTF-16 0 107 QUtil non-trivial UTF-16 0
  108 +QPDF xref overwrite object 0
107 QPDF xref overwrite invalid objgen 0 109 QPDF xref overwrite invalid objgen 0
108 QPDF decoding error warning 0 110 QPDF decoding error warning 0
109 qpdf-c called qpdf_init 0 111 qpdf-c called qpdf_init 0
@@ -435,6 +437,7 @@ QPDF xref skipped space 0 @@ -435,6 +437,7 @@ QPDF xref skipped space 0
435 QPDF eof skipping spaces before xref 1 437 QPDF eof skipping spaces before xref 1
436 QPDF_encryption user matches owner V < 5 0 438 QPDF_encryption user matches owner V < 5 0
437 QPDF_encryption same password 1 439 QPDF_encryption same password 1
  440 +QPDFWriter stream in ostream 0
438 QPDFParser duplicate dict key 0 441 QPDFParser duplicate dict key 0
439 QPDFWriter no encryption sig contents 0 442 QPDFWriter no encryption sig contents 0
440 QPDFPageObjectHelper colorspace lookup 0 443 QPDFPageObjectHelper colorspace lookup 0
qpdf/qtest/qpdf/bad12-recover.out
  1 +WARNING: bad12.pdf: reported number of objects (9) is not one plus the highest object number (7)
1 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj 2 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj
2 /QTest is implicit 3 /QTest is implicit
3 /QTest is direct and has type null (2) 4 /QTest is direct and has type null (2)
qpdf/qtest/qpdf/bad12.out
  1 +WARNING: bad12.pdf: reported number of objects (9) is not one plus the highest object number (7)
1 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj 2 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj
2 /QTest is implicit 3 /QTest is implicit
3 /QTest is direct and has type null (2) 4 /QTest is direct and has type null (2)
qpdf/qtest/qpdf/fuzz-16214.out
@@ -11,9 +11,11 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj @@ -11,9 +11,11 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table 11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set 12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream 13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
14 -WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type  
15 -WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys 14 +WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
  15 +WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream 16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length 17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340 18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
  19 +WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
  20 +WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
19 qpdf: operation succeeded with warnings; resulting file may have some problems 21 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/issue-147.out
@@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header
2 WARNING: issue-147.pdf: file is damaged 2 WARNING: issue-147.pdf: file is damaged
3 WARNING: issue-147.pdf: can't find startxref 3 WARNING: issue-147.pdf: can't find startxref
4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table 4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
5 -WARNING: issue-147.pdf: ignoring object with impossibly large id 62  
6 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 5 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
7 -qpdf: issue-147.pdf: unable to find /Root dictionary 6 +WARNING: issue-147.pdf: ignoring object with impossibly large id 62
  7 +qpdf: issue-147.pdf: unable to find objects while recovering damaged file
qpdf/qtest/qpdf/issue-335b.out
1 WARNING: issue-335b.pdf: can't find PDF header 1 WARNING: issue-335b.pdf: can't find PDF header
2 WARNING: issue-335b.pdf: file is damaged 2 WARNING: issue-335b.pdf: file is damaged
3 -WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry 3 +WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6)
4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table 4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table
5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file 5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file
qpdf/qtest/qpdf/recover-xref-stream.out
1 WARNING: recover-xref-stream.pdf: file is damaged 1 WARNING: recover-xref-stream.pdf: file is damaged
2 WARNING: recover-xref-stream.pdf: can't find startxref 2 WARNING: recover-xref-stream.pdf: can't find startxref
3 WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table 3 WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table
  4 +WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15)
4 qpdf: operation succeeded with warnings; resulting file may have some problems 5 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/recover-xref-stream.pdf
No preview for this file type
qpdf/qtest/qpdf/xref-errors.out
@@ -3,11 +3,6 @@ WARNING: xref-errors.pdf (xref table, offset 606): accepting invalid xref table @@ -3,11 +3,6 @@ WARNING: xref-errors.pdf (xref table, offset 606): accepting invalid xref table
3 WARNING: xref-errors.pdf (xref table, offset 627): accepting invalid xref table entry 3 WARNING: xref-errors.pdf (xref table, offset 627): accepting invalid xref table entry
4 WARNING: xref-errors.pdf (xref table, offset 648): accepting invalid xref table entry 4 WARNING: xref-errors.pdf (xref table, offset 648): accepting invalid xref table entry
5 WARNING: xref-errors.pdf (xref table, offset 667): accepting invalid xref table entry 5 WARNING: xref-errors.pdf (xref table, offset 667): accepting invalid xref table entry
6 -WARNING: xref-errors.pdf (xref table, offset 585): accepting invalid xref table entry  
7 -WARNING: xref-errors.pdf (xref table, offset 606): accepting invalid xref table entry  
8 -WARNING: xref-errors.pdf (xref table, offset 627): accepting invalid xref table entry  
9 -WARNING: xref-errors.pdf (xref table, offset 648): accepting invalid xref table entry  
10 -WARNING: xref-errors.pdf (xref table, offset 667): accepting invalid xref table entry  
11 checking xref-errors.pdf 6 checking xref-errors.pdf
12 PDF Version: 1.3 7 PDF Version: 1.3
13 File is not encrypted 8 File is not encrypted
qpdf/qtest/specific-bugs.test
@@ -16,7 +16,7 @@ my $td = new TestDriver(&#39;specific-bugs&#39;); @@ -16,7 +16,7 @@ my $td = new TestDriver(&#39;specific-bugs&#39;);
16 16
17 # The number is the github issue number in which the bug was reported. 17 # The number is the github issue number in which the bug was reported.
18 my @bug_tests = ( 18 my @bug_tests = (
19 -# ["51", "resolve loop", 2], 19 + ["51", "resolve loop", 2],
20 ["99", "object 0", 2], 20 ["99", "object 0", 2],
21 ["99b", "object 0", 2], 21 ["99b", "object 0", 2],
22 ["100", "xref reconstruction loop", 2], 22 ["100", "xref reconstruction loop", 2],
@@ -28,7 +28,7 @@ my @bug_tests = ( @@ -28,7 +28,7 @@ my @bug_tests = (
28 ["106", "zlib data error", 3], 28 ["106", "zlib data error", 3],
29 ["141a", "/W entry size 0", 2], 29 ["141a", "/W entry size 0", 2],
30 ["141b", "/W entry size 0", 2], 30 ["141b", "/W entry size 0", 2],
31 -# ["143", "self-referential ostream", 2, "--preserve-unreferenced"], 31 + ["143", "self-referential ostream", 2, "--preserve-unreferenced"],
32 ["146", "very deeply nested array", 2], 32 ["146", "very deeply nested array", 2],
33 ["147", "previously caused memory error", 2], 33 ["147", "previously caused memory error", 2],
34 ["148", "free memory on bad flate", 2], 34 ["148", "free memory on bad flate", 2],
@@ -38,7 +38,7 @@ my @bug_tests = ( @@ -38,7 +38,7 @@ my @bug_tests = (
38 ["263", "empty xref stream", 2], 38 ["263", "empty xref stream", 2],
39 ["335a", "ozz-fuzz-12152", 2], 39 ["335a", "ozz-fuzz-12152", 2],
40 ["335b", "ozz-fuzz-14845", 2], 40 ["335b", "ozz-fuzz-14845", 2],
41 -# ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"], 41 + ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
42 # When adding to this list, consider adding to CORPUS_FROM_TEST in 42 # When adding to this list, consider adding to CORPUS_FROM_TEST in
43 # fuzz/CMakeLists.txt and updating the count in 43 # fuzz/CMakeLists.txt and updating the count in
44 # fuzz/qtest/fuzz.test. 44 # fuzz/qtest/fuzz.test.