Commit 0d5c57c151a35be81b98eef6051303301d253112

Authored by m-holger
1 parent f1800410

Revert "Merge pull request #1272 from m-holger/xref_table"

This reverts commit ff2a78f579ebdd06b417e34260a17dba06e71137, reversing
changes made to 8f54319f7a6514110f4b05cbbf1cb1c9fc8cb6a0.
include/qpdf/QPDF.hh
... ... @@ -725,15 +725,165 @@ class QPDF
725 725 void removePage(QPDFObjectHandle page);
726 726 // End legacy page helpers
727 727  
728   - // End of the public API. The following classes and methods are for qpdf internal use only.
  728 + // Writer class is restricted to QPDFWriter so that only it can call certain methods.
  729 + class Writer
  730 + {
  731 + friend class QPDFWriter;
  732 +
  733 + private:
  734 + static void
  735 + optimize(
  736 + QPDF& qpdf,
  737 + QPDFWriter::ObjTable const& obj,
  738 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  739 + {
  740 + return qpdf.optimize(obj, skip_stream_parameters);
  741 + }
  742 +
  743 + static void
  744 + getLinearizedParts(
  745 + QPDF& qpdf,
  746 + QPDFWriter::ObjTable const& obj,
  747 + std::vector<QPDFObjectHandle>& part4,
  748 + std::vector<QPDFObjectHandle>& part6,
  749 + std::vector<QPDFObjectHandle>& part7,
  750 + std::vector<QPDFObjectHandle>& part8,
  751 + std::vector<QPDFObjectHandle>& part9)
  752 + {
  753 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
  754 + }
  755 +
  756 + static void
  757 + generateHintStream(
  758 + QPDF& qpdf,
  759 + QPDFWriter::NewObjTable const& new_obj,
  760 + QPDFWriter::ObjTable const& obj,
  761 + std::shared_ptr<Buffer>& hint_stream,
  762 + int& S,
  763 + int& O,
  764 + bool compressed)
  765 + {
  766 + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
  767 + }
  768 +
  769 + static std::vector<QPDFObjGen>
  770 + getCompressibleObjGens(QPDF& qpdf)
  771 + {
  772 + return qpdf.getCompressibleObjVector();
  773 + }
  774 +
  775 + static std::vector<bool>
  776 + getCompressibleObjSet(QPDF& qpdf)
  777 + {
  778 + return qpdf.getCompressibleObjSet();
  779 + }
  780 +
  781 + static std::map<QPDFObjGen, QPDFXRefEntry> const&
  782 + getXRefTable(QPDF& qpdf)
  783 + {
  784 + return qpdf.getXRefTableInternal();
  785 + }
  786 +
  787 + static size_t
  788 + tableSize(QPDF& qpdf)
  789 + {
  790 + return qpdf.tableSize();
  791 + }
  792 + };
  793 +
  794 + // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
  795 + // references.
  796 + class Resolver
  797 + {
  798 + friend class QPDFObject;
  799 + friend class QPDF_Unresolved;
  800 +
  801 + private:
  802 + static QPDFObject*
  803 + resolved(QPDF* qpdf, QPDFObjGen og)
  804 + {
  805 + return qpdf->resolve(og);
  806 + }
  807 + };
  808 +
  809 + // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
  810 + class StreamCopier
  811 + {
  812 + friend class QPDFObjectHandle;
  813 +
  814 + private:
  815 + static void
  816 + copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
  817 + {
  818 + qpdf->copyStreamData(dest, src);
  819 + }
  820 + };
  821 +
  822 + // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
  823 + // special access to allow the parser to create unresolved objects and dangling references.
  824 + class ParseGuard
  825 + {
  826 + friend class QPDFParser;
  827 +
  828 + private:
  829 + ParseGuard(QPDF* qpdf) :
  830 + qpdf(qpdf)
  831 + {
  832 + if (qpdf) {
  833 + qpdf->inParse(true);
  834 + }
  835 + }
  836 +
  837 + static std::shared_ptr<QPDFObject>
  838 + getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
  839 + {
  840 + return qpdf->getObjectForParser(id, gen, parse_pdf);
  841 + }
729 842  
730   - class Writer;
731   - class Resolver;
732   - class StreamCopier;
733   - class ParseGuard;
734   - class Pipe;
735   - class JobSetter;
736   - class Xref_table;
  843 + ~ParseGuard()
  844 + {
  845 + if (qpdf) {
  846 + qpdf->inParse(false);
  847 + }
  848 + }
  849 + QPDF* qpdf;
  850 + };
  851 +
  852 + // Pipe class is restricted to QPDF_Stream.
  853 + class Pipe
  854 + {
  855 + friend class QPDF_Stream;
  856 +
  857 + private:
  858 + static bool
  859 + pipeStreamData(
  860 + QPDF* qpdf,
  861 + QPDFObjGen const& og,
  862 + qpdf_offset_t offset,
  863 + size_t length,
  864 + QPDFObjectHandle dict,
  865 + Pipeline* pipeline,
  866 + bool suppress_warnings,
  867 + bool will_retry)
  868 + {
  869 + return qpdf->pipeStreamData(
  870 + og, offset, length, dict, pipeline, suppress_warnings, will_retry);
  871 + }
  872 + };
  873 +
  874 + // JobSetter class is restricted to QPDFJob.
  875 + class JobSetter
  876 + {
  877 + friend class QPDFJob;
  878 +
  879 + private:
  880 + // Enable enhanced warnings for pdf file checking.
  881 + static void
  882 + setCheckMode(QPDF& qpdf, bool val)
  883 + {
  884 + qpdf.m->check_mode = val;
  885 + }
  886 + };
737 887  
738 888 // For testing only -- do not add to DLL
739 889 static bool test_json_validators();
... ... @@ -748,18 +898,163 @@ class QPDF
748 898  
749 899 static std::string const qpdf_version;
750 900  
751   - class ObjCache;
752   - class ObjCopier;
753   - class EncryptionParameters;
754   - class ForeignStreamData;
755   - class CopiedStreamDataProvider;
756   - class StringDecrypter;
757   - class ResolveRecorder;
  901 + class ObjCache
  902 + {
  903 + public:
  904 + ObjCache() :
  905 + end_before_space(0),
  906 + end_after_space(0)
  907 + {
  908 + }
  909 + ObjCache(
  910 + std::shared_ptr<QPDFObject> object,
  911 + qpdf_offset_t end_before_space = 0,
  912 + qpdf_offset_t end_after_space = 0) :
  913 + object(object),
  914 + end_before_space(end_before_space),
  915 + end_after_space(end_after_space)
  916 + {
  917 + }
  918 +
  919 + std::shared_ptr<QPDFObject> object;
  920 + qpdf_offset_t end_before_space;
  921 + qpdf_offset_t end_after_space;
  922 + };
  923 +
  924 + class ObjCopier
  925 + {
  926 + public:
  927 + std::map<QPDFObjGen, QPDFObjectHandle> object_map;
  928 + std::vector<QPDFObjectHandle> to_copy;
  929 + QPDFObjGen::set visiting;
  930 + };
  931 +
  932 + class EncryptionParameters
  933 + {
  934 + friend class QPDF;
  935 +
  936 + public:
  937 + EncryptionParameters();
  938 +
  939 + private:
  940 + bool encrypted;
  941 + bool encryption_initialized;
  942 + int encryption_V;
  943 + int encryption_R;
  944 + bool encrypt_metadata;
  945 + std::map<std::string, encryption_method_e> crypt_filters;
  946 + encryption_method_e cf_stream;
  947 + encryption_method_e cf_string;
  948 + encryption_method_e cf_file;
  949 + std::string provided_password;
  950 + std::string user_password;
  951 + std::string encryption_key;
  952 + std::string cached_object_encryption_key;
  953 + QPDFObjGen cached_key_og;
  954 + bool user_password_matched;
  955 + bool owner_password_matched;
  956 + };
  957 +
  958 + class ForeignStreamData
  959 + {
  960 + friend class QPDF;
  961 +
  962 + public:
  963 + ForeignStreamData(
  964 + std::shared_ptr<EncryptionParameters> encp,
  965 + std::shared_ptr<InputSource> file,
  966 + QPDFObjGen const& foreign_og,
  967 + qpdf_offset_t offset,
  968 + size_t length,
  969 + QPDFObjectHandle local_dict);
  970 +
  971 + private:
  972 + std::shared_ptr<EncryptionParameters> encp;
  973 + std::shared_ptr<InputSource> file;
  974 + QPDFObjGen foreign_og;
  975 + qpdf_offset_t offset;
  976 + size_t length;
  977 + QPDFObjectHandle local_dict;
  978 + };
  979 +
  980 + class CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
  981 + {
  982 + public:
  983 + CopiedStreamDataProvider(QPDF& destination_qpdf);
  984 + ~CopiedStreamDataProvider() override = default;
  985 + bool provideStreamData(
  986 + QPDFObjGen const& og,
  987 + Pipeline* pipeline,
  988 + bool suppress_warnings,
  989 + bool will_retry) override;
  990 + void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);
  991 + void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);
  992 +
  993 + private:
  994 + QPDF& destination_qpdf;
  995 + std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
  996 + std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;
  997 + };
  998 +
  999 + class StringDecrypter: public QPDFObjectHandle::StringDecrypter
  1000 + {
  1001 + friend class QPDF;
  1002 +
  1003 + public:
  1004 + StringDecrypter(QPDF* qpdf, QPDFObjGen const& og);
  1005 + ~StringDecrypter() override = default;
  1006 + void decryptString(std::string& val) override;
  1007 +
  1008 + private:
  1009 + QPDF* qpdf;
  1010 + QPDFObjGen og;
  1011 + };
  1012 +
  1013 + class ResolveRecorder
  1014 + {
  1015 + public:
  1016 + ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) :
  1017 + qpdf(qpdf),
  1018 + iter(qpdf->m->resolving.insert(og).first)
  1019 + {
  1020 + }
  1021 + virtual ~ResolveRecorder()
  1022 + {
  1023 + this->qpdf->m->resolving.erase(iter);
  1024 + }
  1025 +
  1026 + private:
  1027 + QPDF* qpdf;
  1028 + std::set<QPDFObjGen>::const_iterator iter;
  1029 + };
  1030 +
758 1031 class JSONReactor;
759 1032  
760 1033 void parse(char const* password);
761 1034 void inParse(bool);
  1035 + void setTrailer(QPDFObjectHandle obj);
  1036 + void read_xref(qpdf_offset_t offset);
  1037 + bool resolveXRefTable();
  1038 + void reconstruct_xref(QPDFExc& e);
  1039 + bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
  1040 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1041 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1042 + qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
  1043 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
  1044 + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
  1045 + std::pair<int, std::array<int, 3>>
  1046 + processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
  1047 + int processXRefSize(
  1048 + QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
  1049 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
  1050 + QPDFObjectHandle& dict,
  1051 + int max_num_entries,
  1052 + std::function<QPDFExc(std::string_view)> damaged);
  1053 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  1054 + void insertFreeXrefEntry(QPDFObjGen);
  1055 + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
762 1056 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
  1057 + QPDFObjectHandle readTrailer();
763 1058 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
764 1059 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
765 1060 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
... ... @@ -786,7 +1081,11 @@ class QPDF
786 1081 std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
787 1082 std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
788 1083 void removeObject(QPDFObjGen og);
789   - void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object);
  1084 + void updateCache(
  1085 + QPDFObjGen const& og,
  1086 + std::shared_ptr<QPDFObject> const& object,
  1087 + qpdf_offset_t end_before_space,
  1088 + qpdf_offset_t end_after_space);
790 1089 static QPDFExc damagedPDF(
791 1090 InputSource& input,
792 1091 std::string const& object,
... ... @@ -823,6 +1122,7 @@ class QPDF
823 1122  
824 1123 // For QPDFWriter:
825 1124  
  1125 + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
826 1126 template <typename T>
827 1127 void optimize_internal(
828 1128 T const& object_stream_data,
... ... @@ -831,7 +1131,6 @@ class QPDF
831 1131 void optimize(
832 1132 QPDFWriter::ObjTable const& obj,
833 1133 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
834   - void optimize(Xref_table const& obj);
835 1134 size_t tableSize();
836 1135  
837 1136 // Get lists of all objects in order according to the part of a linearized file that they belong
... ... @@ -897,19 +1196,200 @@ class QPDF
897 1196 replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top);
898 1197 void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream);
899 1198  
900   - struct HPageOffsetEntry;
901   - struct HPageOffset;
902   - struct HSharedObjectEntry;
903   - struct HSharedObject;
904   - struct HGeneric;
905   - struct LinParameters;
906   - struct CHPageOffsetEntry;
907   - struct CHPageOffset;
908   - struct CHSharedObjectEntry;
909   - struct CHSharedObject;
910   - class ObjUser;
911   - struct UpdateObjectMapsFrame;
912   - class PatternFinder;
  1199 + // Linearization Hint table structures.
  1200 + // Naming conventions:
  1201 +
  1202 + // HSomething is the Something Hint Table or table header
  1203 + // HSomethingEntry is an entry in the Something table
  1204 +
  1205 + // delta_something + min_something = something
  1206 + // nbits_something = number of bits required for something
  1207 +
  1208 + // something_offset is the pre-adjusted offset in the file. If >=
  1209 + // H0_offset, H0_length must be added to get an actual file
  1210 + // offset.
  1211 +
  1212 + // PDF 1.4: Table F.4
  1213 + struct HPageOffsetEntry
  1214 + {
  1215 + int delta_nobjects{0}; // 1
  1216 + qpdf_offset_t delta_page_length{0}; // 2
  1217 + // vectors' sizes = nshared_objects
  1218 + int nshared_objects{0}; // 3
  1219 + std::vector<int> shared_identifiers; // 4
  1220 + std::vector<int> shared_numerators; // 5
  1221 + qpdf_offset_t delta_content_offset{0}; // 6
  1222 + qpdf_offset_t delta_content_length{0}; // 7
  1223 + };
  1224 +
  1225 + // PDF 1.4: Table F.3
  1226 + struct HPageOffset
  1227 + {
  1228 + int min_nobjects{0}; // 1
  1229 + qpdf_offset_t first_page_offset{0}; // 2
  1230 + int nbits_delta_nobjects{0}; // 3
  1231 + int min_page_length{0}; // 4
  1232 + int nbits_delta_page_length{0}; // 5
  1233 + int min_content_offset{0}; // 6
  1234 + int nbits_delta_content_offset{0}; // 7
  1235 + int min_content_length{0}; // 8
  1236 + int nbits_delta_content_length{0}; // 9
  1237 + int nbits_nshared_objects{0}; // 10
  1238 + int nbits_shared_identifier{0}; // 11
  1239 + int nbits_shared_numerator{0}; // 12
  1240 + int shared_denominator{0}; // 13
  1241 + // vector size is npages
  1242 + std::vector<HPageOffsetEntry> entries;
  1243 + };
  1244 +
  1245 + // PDF 1.4: Table F.6
  1246 + struct HSharedObjectEntry
  1247 + {
  1248 + // Item 3 is a 128-bit signature (unsupported by Acrobat)
  1249 + int delta_group_length{0}; // 1
  1250 + int signature_present{0}; // 2 -- always 0
  1251 + int nobjects_minus_one{0}; // 4 -- always 0
  1252 + };
  1253 +
  1254 + // PDF 1.4: Table F.5
  1255 + struct HSharedObject
  1256 + {
  1257 + int first_shared_obj{0}; // 1
  1258 + qpdf_offset_t first_shared_offset{0}; // 2
  1259 + int nshared_first_page{0}; // 3
  1260 + int nshared_total{0}; // 4
  1261 + int nbits_nobjects{0}; // 5
  1262 + int min_group_length{0}; // 6
  1263 + int nbits_delta_group_length{0}; // 7
  1264 + // vector size is nshared_total
  1265 + std::vector<HSharedObjectEntry> entries;
  1266 + };
  1267 +
  1268 + // PDF 1.4: Table F.9
  1269 + struct HGeneric
  1270 + {
  1271 + int first_object{0}; // 1
  1272 + qpdf_offset_t first_object_offset{0}; // 2
  1273 + int nobjects{0}; // 3
  1274 + int group_length{0}; // 4
  1275 + };
  1276 +
  1277 + // Other linearization data structures
  1278 +
  1279 + // Initialized from Linearization Parameter dictionary
  1280 + struct LinParameters
  1281 + {
  1282 + qpdf_offset_t file_size{0}; // /L
  1283 + int first_page_object{0}; // /O
  1284 + qpdf_offset_t first_page_end{0}; // /E
  1285 + int npages{0}; // /N
  1286 + qpdf_offset_t xref_zero_offset{0}; // /T
  1287 + int first_page{0}; // /P
  1288 + qpdf_offset_t H_offset{0}; // offset of primary hint stream
  1289 + qpdf_offset_t H_length{0}; // length of primary hint stream
  1290 + };
  1291 +
  1292 + // Computed hint table value data structures. These tables contain the computed values on which
  1293 + // the hint table values are based. They exclude things like number of bits and store actual
  1294 + // values instead of mins and deltas. File offsets are also absolute rather than being offset
  1295 + // by the size of the primary hint table. We populate the hint table structures from these
  1296 + // during writing and compare the hint table values with these during validation. We ignore
  1297 + // some values for various reasons described in the code. Those values are omitted from these
  1298 + // structures. Note also that object numbers are object numbers from the input file, not the
  1299 + // output file.
  1300 +
  1301 + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
  1302 +
  1303 + struct CHPageOffsetEntry
  1304 + {
  1305 + int nobjects{0};
  1306 + int nshared_objects{0};
  1307 + // vectors' sizes = nshared_objects
  1308 + std::vector<int> shared_identifiers;
  1309 + };
  1310 +
  1311 + struct CHPageOffset
  1312 + {
  1313 + // vector size is npages
  1314 + std::vector<CHPageOffsetEntry> entries;
  1315 + };
  1316 +
  1317 + struct CHSharedObjectEntry
  1318 + {
  1319 + CHSharedObjectEntry(int object) :
  1320 + object(object)
  1321 + {
  1322 + }
  1323 +
  1324 + int object;
  1325 + };
  1326 +
  1327 + // PDF 1.4: Table F.5
  1328 + struct CHSharedObject
  1329 + {
  1330 + int first_shared_obj{0};
  1331 + int nshared_first_page{0};
  1332 + int nshared_total{0};
  1333 + // vector size is nshared_total
  1334 + std::vector<CHSharedObjectEntry> entries;
  1335 + };
  1336 +
  1337 + // No need for CHGeneric -- HGeneric is fine as is.
  1338 +
  1339 + // Data structures to support optimization -- implemented in QPDF_optimization.cc
  1340 +
  1341 + class ObjUser
  1342 + {
  1343 + public:
  1344 + enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
  1345 +
  1346 + // type is set to ou_bad
  1347 + ObjUser();
  1348 +
  1349 + // type must be ou_root
  1350 + ObjUser(user_e type);
  1351 +
  1352 + // type must be one of ou_page or ou_thumb
  1353 + ObjUser(user_e type, int pageno);
  1354 +
  1355 + // type must be one of ou_trailer_key or ou_root_key
  1356 + ObjUser(user_e type, std::string const& key);
  1357 +
  1358 + bool operator<(ObjUser const&) const;
  1359 +
  1360 + user_e ou_type;
  1361 + int pageno; // if ou_page;
  1362 + std::string key; // if ou_trailer_key or ou_root_key
  1363 + };
  1364 +
  1365 + struct UpdateObjectMapsFrame
  1366 + {
  1367 + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
  1368 +
  1369 + ObjUser const& ou;
  1370 + QPDFObjectHandle oh;
  1371 + bool top;
  1372 + };
  1373 +
  1374 + class PatternFinder: public InputSource::Finder
  1375 + {
  1376 + public:
  1377 + PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
  1378 + qpdf(qpdf),
  1379 + checker(checker)
  1380 + {
  1381 + }
  1382 + ~PatternFinder() override = default;
  1383 + bool
  1384 + check() override
  1385 + {
  1386 + return (this->qpdf.*checker)();
  1387 + }
  1388 +
  1389 + private:
  1390 + QPDF& qpdf;
  1391 + bool (QPDF::*checker)();
  1392 + };
913 1393  
914 1394 // Methods to support pattern finding
915 1395 static bool validatePDFVersion(char const*&, std::string& version);
... ... @@ -931,7 +1411,6 @@ class QPDF
931 1411 QPDFObjectHandle
932 1412 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
933 1413 QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
934   - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj);
935 1414 int lengthNextN(int first_object, int n);
936 1415 void
937 1416 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
... ... @@ -977,7 +1456,6 @@ class QPDF
977 1456 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
978 1457 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
979 1458 void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
980   - void filterCompressedObjects(Xref_table const& object_stream_data);
981 1459  
982 1460 // JSON import
983 1461 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
... ... @@ -1008,7 +1486,90 @@ class QPDF
1008 1486 return QIntC::to_ulonglong(i);
1009 1487 }
1010 1488  
1011   - class Members;
  1489 + class Members
  1490 + {
  1491 + friend class QPDF;
  1492 + friend class ResolveRecorder;
  1493 +
  1494 + public:
  1495 + QPDF_DLL
  1496 + ~Members() = default;
  1497 +
  1498 + private:
  1499 + Members();
  1500 + Members(Members const&) = delete;
  1501 +
  1502 + std::shared_ptr<QPDFLogger> log;
  1503 + unsigned long long unique_id{0};
  1504 + QPDFTokenizer tokenizer;
  1505 + std::shared_ptr<InputSource> file;
  1506 + std::string last_object_description;
  1507 + bool provided_password_is_hex_key{false};
  1508 + bool ignore_xref_streams{false};
  1509 + bool suppress_warnings{false};
  1510 + size_t max_warnings{0};
  1511 + bool attempt_recovery{true};
  1512 + bool check_mode{false};
  1513 + std::shared_ptr<EncryptionParameters> encp;
  1514 + std::string pdf_version;
  1515 + std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
  1516 + // Various tables are indexed by object id, with potential size id + 1
  1517 + int xref_table_max_id{std::numeric_limits<int>::max() - 1};
  1518 + qpdf_offset_t xref_table_max_offset{0};
  1519 + std::set<int> deleted_objects;
  1520 + std::map<QPDFObjGen, ObjCache> obj_cache;
  1521 + std::set<QPDFObjGen> resolving;
  1522 + QPDFObjectHandle trailer;
  1523 + std::vector<QPDFObjectHandle> all_pages;
  1524 + bool invalid_page_found{false};
  1525 + std::map<QPDFObjGen, int> pageobj_to_pages_pos;
  1526 + bool pushed_inherited_attributes_to_pages{false};
  1527 + bool ever_pushed_inherited_attributes_to_pages{false};
  1528 + bool ever_called_get_all_pages{false};
  1529 + std::vector<QPDFExc> warnings;
  1530 + std::map<unsigned long long, ObjCopier> object_copiers;
  1531 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
  1532 + // copied_stream_data_provider is owned by copied_streams
  1533 + CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
  1534 + bool reconstructed_xref{false};
  1535 + bool fixed_dangling_refs{false};
  1536 + bool immediate_copy_from{false};
  1537 + bool in_parse{false};
  1538 + bool parsed{false};
  1539 + std::set<int> resolved_object_streams;
  1540 +
  1541 + // Linearization data
  1542 + qpdf_offset_t first_xref_item_offset{0}; // actual value from file
  1543 + bool uncompressed_after_compressed{false};
  1544 + bool linearization_warnings{false};
  1545 +
  1546 + // Linearization parameter dictionary and hint table data: may be read from file or computed
  1547 + // prior to writing a linearized file
  1548 + QPDFObjectHandle lindict;
  1549 + LinParameters linp;
  1550 + HPageOffset page_offset_hints;
  1551 + HSharedObject shared_object_hints;
  1552 + HGeneric outline_hints;
  1553 +
  1554 + // Computed linearization data: used to populate above tables during writing and to compare
  1555 + // with them during validation. c_ means computed.
  1556 + LinParameters c_linp;
  1557 + CHPageOffset c_page_offset_data;
  1558 + CHSharedObject c_shared_object_data;
  1559 + HGeneric c_outline_data;
  1560 +
  1561 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  1562 + // Part numbers refer to the PDF 1.4 specification.
  1563 + std::vector<QPDFObjectHandle> part4;
  1564 + std::vector<QPDFObjectHandle> part6;
  1565 + std::vector<QPDFObjectHandle> part7;
  1566 + std::vector<QPDFObjectHandle> part8;
  1567 + std::vector<QPDFObjectHandle> part9;
  1568 +
  1569 + // Optimization data
  1570 + std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
  1571 + std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
  1572 + };
1012 1573  
1013 1574 // Keep all member variables inside the Members object, which we dynamically allocate. This
1014 1575 // makes it possible to add new private members without breaking binary compatibility.
... ...
libqpdf/QPDF.cc
1 1 #include <qpdf/qpdf-config.h> // include first for large file support
2 2  
3   -#include <qpdf/QPDF_private.hh>
  3 +#include <qpdf/QPDF.hh>
4 4  
5 5 #include <array>
6 6 #include <atomic>
... ... @@ -32,51 +32,67 @@
32 32 // being static as well.
33 33 std::string const QPDF::qpdf_version(QPDF_VERSION);
34 34  
  35 +static char const* EMPTY_PDF = (
  36 + // force line break
  37 + "%PDF-1.3\n"
  38 + "1 0 obj\n"
  39 + "<< /Type /Catalog /Pages 2 0 R >>\n"
  40 + "endobj\n"
  41 + "2 0 obj\n"
  42 + "<< /Type /Pages /Kids [] /Count 0 >>\n"
  43 + "endobj\n"
  44 + "xref\n"
  45 + "0 3\n"
  46 + "0000000000 65535 f \n"
  47 + "0000000009 00000 n \n"
  48 + "0000000058 00000 n \n"
  49 + "trailer << /Size 3 /Root 1 0 R >>\n"
  50 + "startxref\n"
  51 + "110\n"
  52 + "%%EOF\n");
  53 +
35 54 namespace
36 55 {
37   - class InvalidInputSource final: public InputSource
  56 + class InvalidInputSource: public InputSource
38 57 {
39 58 public:
40   - InvalidInputSource(std::string const& name) :
41   - name(name)
42   - {
43   - }
44   - ~InvalidInputSource() final = default;
  59 + ~InvalidInputSource() override = default;
45 60 qpdf_offset_t
46   - findAndSkipNextEOL() final
  61 + findAndSkipNextEOL() override
47 62 {
48 63 throwException();
49 64 return 0;
50 65 }
51 66 std::string const&
52   - getName() const final
  67 + getName() const override
53 68 {
  69 + static std::string name("closed input source");
54 70 return name;
55 71 }
56 72 qpdf_offset_t
57   - tell() final
  73 + tell() override
58 74 {
59 75 throwException();
60 76 return 0;
61 77 }
62 78 void
63   - seek(qpdf_offset_t offset, int whence) final
  79 + seek(qpdf_offset_t offset, int whence) override
64 80 {
65 81 throwException();
66 82 }
67 83 void
68   - rewind() final
  84 + rewind() override
69 85 {
70 86 throwException();
71 87 }
72 88 size_t
73   - read(char* buffer, size_t length) final
  89 + read(char* buffer, size_t length) override
74 90 {
75 91 throwException();
76 92 return 0;
77 93 }
78 94 void
79   - unreadCh(char ch) final
  95 + unreadCh(char ch) override
80 96 {
81 97 throwException();
82 98 }
... ... @@ -89,8 +105,6 @@ namespace
89 105 "source. QPDF operations are invalid before processFile (or "
90 106 "another process method) or after closeInputSource");
91 107 }
92   -
93   - std::string const& name;
94 108 };
95 109 } // namespace
96 110  
... ... @@ -182,17 +196,15 @@ QPDF::EncryptionParameters::EncryptionParameters() :
182 196 {
183 197 }
184 198  
185   -QPDF::Members::Members(QPDF& qpdf) :
  199 +QPDF::Members::Members() :
186 200 log(QPDFLogger::defaultLogger()),
187   - file_sp(new InvalidInputSource(no_input_name)),
188   - file(file_sp.get()),
189   - encp(new EncryptionParameters),
190   - xref_table(qpdf, file)
  201 + file(new InvalidInputSource()),
  202 + encp(new EncryptionParameters)
191 203 {
192 204 }
193 205  
194 206 QPDF::QPDF() :
195   - m(new Members(*this))
  207 + m(new Members())
196 208 {
197 209 m->tokenizer.allowEOF();
198 210 // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
... ... @@ -213,6 +225,9 @@ QPDF::~QPDF()
213 225 // are reachable from this object to release their association with this QPDF. Direct objects
214 226 // are not destroyed since they can be moved to other QPDF objects safely.
215 227  
  228 + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
  229 + // the xref table anyway just to prevent any possibility of resolve() succeeding.
  230 + m->xref_table.clear();
216 231 for (auto const& iter: m->obj_cache) {
217 232 iter.second.object->disconnect();
218 233 if (iter.second.object->getTypeCode() != ::ot_null) {
... ... @@ -256,17 +271,14 @@ QPDF::processMemoryFile(
256 271 void
257 272 QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
258 273 {
259   - m->file_sp = source;
260   - m->file = source.get();
  274 + m->file = source;
261 275 parse(password);
262 276 }
263 277  
264 278 void
265 279 QPDF::closeInputSource()
266 280 {
267   - m->no_input_name = "closed input source";
268   - m->file_sp = std::shared_ptr<InputSource>(new InvalidInputSource(m->no_input_name));
269   - m->file = m->file_sp.get();
  281 + m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
270 282 }
271 283  
272 284 void
... ... @@ -278,9 +290,7 @@ QPDF::setPasswordIsHexKey(bool val)
278 290 void
279 291 QPDF::emptyPDF()
280 292 {
281   - m->pdf_version = "1.3";
282   - m->no_input_name = "empty PDF";
283   - m->xref_table.initialize_empty();
  293 + processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
284 294 }
285 295  
286 296 void
... ... @@ -293,7 +303,7 @@ QPDF::registerStreamFilter(
293 303 void
294 304 QPDF::setIgnoreXRefStreams(bool val)
295 305 {
296   - m->xref_table.ignore_streams(val);
  306 + m->ignore_xref_streams = val;
297 307 }
298 308  
299 309 std::shared_ptr<QPDFLogger>
... ... @@ -331,7 +341,6 @@ void
331 341 QPDF::setAttemptRecovery(bool val)
332 342 {
333 343 m->attempt_recovery = val;
334   - m->xref_table.attempt_recovery(val);
335 344 }
336 345  
337 346 void
... ... @@ -401,9 +410,7 @@ QPDF::findHeader()
401 410 // PDF header, all explicit offsets in the file are such that 0 points to the beginning
402 411 // of the header.
403 412 QTC::TC("qpdf", "QPDF global offset");
404   - m->file_sp =
405   - std::shared_ptr<InputSource>(new OffsetInputSource(m->file_sp, global_offset));
406   - m->file = m->file_sp.get();
  413 + m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
407 414 }
408 415 }
409 416 return valid;
... ... @@ -436,8 +443,46 @@ QPDF::parse(char const* password)
436 443 m->pdf_version = "1.2";
437 444 }
438 445  
439   - m->xref_table.initialize();
  446 + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
  447 + // 30 characters to leave room for the startxref stuff.
  448 + m->file->seek(0, SEEK_END);
  449 + qpdf_offset_t end_offset = m->file->tell();
  450 + m->xref_table_max_offset = end_offset;
  451 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  452 + // scenarios at least 3 bytes are required.
  453 + if (m->xref_table_max_id > m->xref_table_max_offset / 3) {
  454 + m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
  455 + }
  456 + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
  457 + PatternFinder sf(*this, &QPDF::findStartxref);
  458 + qpdf_offset_t xref_offset = 0;
  459 + if (m->file->findLast("startxref", start_offset, 0, sf)) {
  460 + xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
  461 + }
  462 +
  463 + try {
  464 + if (xref_offset == 0) {
  465 + QTC::TC("qpdf", "QPDF can't find startxref");
  466 + throw damagedPDF("", 0, "can't find startxref");
  467 + }
  468 + try {
  469 + read_xref(xref_offset);
  470 + } catch (QPDFExc&) {
  471 + throw;
  472 + } catch (std::exception& e) {
  473 + throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());
  474 + }
  475 + } catch (QPDFExc& e) {
  476 + if (m->attempt_recovery) {
  477 + reconstruct_xref(e);
  478 + QTC::TC("qpdf", "QPDF reconstructed xref table");
  479 + } else {
  480 + throw;
  481 + }
  482 + }
  483 +
440 484 initializeEncryption();
  485 + m->parsed = true;
441 486 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
442 487 // QPDFs created from JSON have an empty xref table and no root object yet.
443 488 throw damagedPDF("", 0, "unable to find page tree");
... ... @@ -479,77 +524,18 @@ QPDF::warn(
479 524 }
480 525  
481 526 void
482   -QPDF::Xref_table::initialize_empty()
  527 +QPDF::setTrailer(QPDFObjectHandle obj)
483 528 {
484   - initialized_ = true;
485   - trailer_ = QPDFObjectHandle::newDictionary();
486   - auto rt = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
487   - auto pgs = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
488   - pgs.replaceKey("/Type", QPDFObjectHandle::newName("/Pages"));
489   - pgs.replaceKey("/Kids", QPDFObjectHandle::newArray());
490   - pgs.replaceKey("/Count", QPDFObjectHandle::newInteger(0));
491   - rt.replaceKey("/Type", QPDFObjectHandle::newName("/Catalog"));
492   - rt.replaceKey("/Pages", pgs);
493   - trailer_.replaceKey("/Root", rt);
494   - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(3));
495   -}
496   -
497   -void
498   -QPDF::Xref_table::initialize_json()
499   -{
500   - initialized_ = true;
501   - table.resize(1);
502   - trailer_ = QPDFObjectHandle::newDictionary();
503   - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(1));
504   -}
505   -
506   -void
507   -QPDF::Xref_table::initialize()
508   -{
509   - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
510   - // 30 characters to leave room for the startxref stuff.
511   - file->seek(0, SEEK_END);
512   - qpdf_offset_t end_offset = file->tell();
513   - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
514   - // scenarios at least 3 bytes are required.
515   - if (max_id_ > end_offset / 3) {
516   - max_id_ = static_cast<int>(end_offset / 3);
517   - }
518   - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
519   - PatternFinder sf(qpdf, &QPDF::findStartxref);
520   - qpdf_offset_t xref_offset = 0;
521   - if (file->findLast("startxref", start_offset, 0, sf)) {
522   - xref_offset = QUtil::string_to_ll(read_token().getValue().c_str());
523   - }
524   -
525   - try {
526   - if (xref_offset == 0) {
527   - QTC::TC("qpdf", "QPDF can't find startxref");
528   - throw damaged_pdf("can't find startxref");
529   - }
530   - try {
531   - read(xref_offset);
532   - } catch (QPDFExc&) {
533   - throw;
534   - } catch (std::exception& e) {
535   - throw damaged_pdf(std::string("error reading xref: ") + e.what());
536   - }
537   - } catch (QPDFExc& e) {
538   - if (attempt_recovery_) {
539   - reconstruct(e);
540   - QTC::TC("qpdf", "QPDF reconstructed xref table");
541   - } else {
542   - throw;
543   - }
  529 + if (m->trailer) {
  530 + return;
544 531 }
545   -
546   - initialized_ = true;
  532 + m->trailer = obj;
547 533 }
548 534  
549 535 void
550   -QPDF::Xref_table::reconstruct(QPDFExc& e)
  536 +QPDF::reconstruct_xref(QPDFExc& e)
551 537 {
552   - if (reconstructed_) {
  538 + if (m->reconstructed_xref) {
553 539 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
554 540 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
555 541 throw e;
... ... @@ -557,93 +543,78 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
557 543  
558 544 // If recovery generates more than 1000 warnings, the file is so severely damaged that there
559 545 // probably is no point trying to continue.
560   - const auto max_warnings = qpdf.m->warnings.size() + 1000U;
  546 + const auto max_warnings = m->warnings.size() + 1000U;
561 547 auto check_warnings = [this, max_warnings]() {
562   - if (qpdf.m->warnings.size() > max_warnings) {
563   - throw damaged_pdf("too many errors while reconstructing cross-reference table");
  548 + if (m->warnings.size() > max_warnings) {
  549 + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
564 550 }
565 551 };
566 552  
567   - reconstructed_ = true;
  553 + m->reconstructed_xref = true;
568 554 // We may find more objects, which may contain dangling references.
569   - qpdf.m->fixed_dangling_refs = false;
  555 + m->fixed_dangling_refs = false;
570 556  
571   - warn_damaged("file is damaged");
572   - qpdf.warn(e);
573   - warn_damaged("Attempting to reconstruct cross-reference table");
  557 + warn(damagedPDF("", 0, "file is damaged"));
  558 + warn(e);
  559 + warn(damagedPDF("", 0, "Attempting to reconstruct cross-reference table"));
574 560  
575 561 // Delete all references to type 1 (uncompressed) objects
576   - for (auto& iter: table) {
577   - if (iter.type() == 1) {
578   - iter = {};
  562 + std::set<QPDFObjGen> to_delete;
  563 + for (auto const& iter: m->xref_table) {
  564 + if (iter.second.getType() == 1) {
  565 + to_delete.insert(iter.first);
579 566 }
580 567 }
  568 + for (auto const& iter: to_delete) {
  569 + m->xref_table.erase(iter);
  570 + }
581 571  
582   - std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
583   - std::vector<qpdf_offset_t> trailers;
584   - int max_found = 0;
585   -
586   - file->seek(0, SEEK_END);
587   - qpdf_offset_t eof = file->tell();
588   - file->seek(0, SEEK_SET);
  572 + m->file->seek(0, SEEK_END);
  573 + qpdf_offset_t eof = m->file->tell();
  574 + m->file->seek(0, SEEK_SET);
589 575 // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
590 576 static size_t const MAX_LEN = 10;
591   - while (file->tell() < eof) {
592   - QPDFTokenizer::Token t1 = read_token(MAX_LEN);
593   - qpdf_offset_t token_start = file->tell() - toO(t1.getValue().length());
  577 + while (m->file->tell() < eof) {
  578 + QPDFTokenizer::Token t1 = readToken(*m->file, MAX_LEN);
  579 + qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
594 580 if (t1.isInteger()) {
595   - auto pos = file->tell();
596   - QPDFTokenizer::Token t2 = read_token(MAX_LEN);
597   - if (t2.isInteger() && read_token(MAX_LEN).isWord("obj")) {
  581 + auto pos = m->file->tell();
  582 + QPDFTokenizer::Token t2 = readToken(*m->file, MAX_LEN);
  583 + if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) {
598 584 int obj = QUtil::string_to_int(t1.getValue().c_str());
599 585 int gen = QUtil::string_to_int(t2.getValue().c_str());
600   - if (obj <= max_id_) {
601   - objects.emplace_back(obj, gen, token_start);
602   - if (obj > max_found) {
603   - max_found = obj;
604   - }
  586 + if (obj <= m->xref_table_max_id) {
  587 + insertReconstructedXrefEntry(obj, token_start, gen);
605 588 } else {
606   - warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
  589 + warn(damagedPDF(
  590 + "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
607 591 }
608 592 }
609   - file->seek(pos, SEEK_SET);
610   - } else if (!trailer_ && t1.isWord("trailer")) {
611   - trailers.emplace_back(file->tell());
612   - }
613   - file->findAndSkipNextEOL();
614   - }
615   -
616   - table.resize(toS(max_found) + 1);
617   -
618   - for (auto tr: trailers) {
619   - file->seek(tr, SEEK_SET);
620   - auto t = read_trailer();
621   - if (!t.isDictionary()) {
622   - // Oh well. It was worth a try.
623   - } else {
624   - trailer_ = t;
625   - break;
  593 + m->file->seek(pos, SEEK_SET);
  594 + } else if (!m->trailer && t1.isWord("trailer")) {
  595 + auto pos = m->file->tell();
  596 + QPDFObjectHandle t = readTrailer();
  597 + if (!t.isDictionary()) {
  598 + // Oh well. It was worth a try.
  599 + } else {
  600 + setTrailer(t);
  601 + }
  602 + m->file->seek(pos, SEEK_SET);
626 603 }
627 604 check_warnings();
  605 + m->file->findAndSkipNextEOL();
628 606 }
  607 + m->deleted_objects.clear();
629 608  
630   - auto rend = objects.rend();
631   - for (auto it = objects.rbegin(); it != rend; it++) {
632   - auto [obj, gen, token_start] = *it;
633   - insert(obj, 1, token_start, gen);
634   - check_warnings();
635   - }
636   -
637   - if (!trailer_) {
  609 + if (!m->trailer) {
638 610 qpdf_offset_t max_offset{0};
639 611 // If there are any xref streams, take the last one to appear.
640   - int i = -1;
641   - for (auto const& item: table) {
642   - ++i;
643   - if (item.type() != 1) {
  612 + for (auto const& iter: m->xref_table) {
  613 + auto entry = iter.second;
  614 + if (entry.getType() != 1) {
644 615 continue;
645 616 }
646   - auto oh = qpdf.getObject(i, item.gen());
  617 + auto oh = getObjectByObjGen(iter.first);
647 618 try {
648 619 if (!oh.isStreamOfType("/XRef")) {
649 620 continue;
... ... @@ -651,44 +622,44 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
651 622 } catch (std::exception&) {
652 623 continue;
653 624 }
654   - auto offset = item.offset();
  625 + auto offset = entry.getOffset();
655 626 if (offset > max_offset) {
656 627 max_offset = offset;
657   - trailer_ = oh.getDict();
  628 + setTrailer(oh.getDict());
658 629 }
659 630 check_warnings();
660 631 }
661 632 if (max_offset > 0) {
662 633 try {
663   - read(max_offset);
  634 + read_xref(max_offset);
664 635 } catch (std::exception&) {
665   - throw damaged_pdf(
666   - "error decoding candidate xref stream while recovering damaged file");
  636 + throw damagedPDF(
  637 + "", 0, "error decoding candidate xref stream while recovering damaged file");
667 638 }
668 639 QTC::TC("qpdf", "QPDF recover xref stream");
669 640 }
670 641 }
671 642  
672   - if (!trailer_) {
  643 + if (!m->trailer) {
673 644 // We could check the last encountered object to see if it was an xref stream. If so, we
674 645 // could try to get the trailer from there. This may make it possible to recover files with
675 646 // bad startxref pointers even when they have object streams.
676 647  
677   - throw damaged_pdf("unable to find trailer dictionary while recovering damaged file");
  648 + throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
678 649 }
679   - if (table.empty()) {
  650 + if (m->xref_table.empty()) {
680 651 // We cannot check for an empty xref table in parse because empty tables are valid when
681 652 // creating QPDF objects from JSON.
682   - throw damaged_pdf("unable to find objects while recovering damaged file");
  653 + throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
683 654 }
684 655 check_warnings();
685   - if (!initialized_) {
686   - initialized_ = true;
687   - qpdf.getAllPages();
  656 + if (!m->parsed) {
  657 + m->parsed = true;
  658 + getAllPages();
688 659 check_warnings();
689   - if (qpdf.m->all_pages.empty()) {
690   - initialized_ = false;
691   - throw damaged_pdf("unable to find any pages while recovering damaged file");
  660 + if (m->all_pages.empty()) {
  661 + m->parsed = false;
  662 + throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
692 663 }
693 664 }
694 665 // We could iterate through the objects looking for streams and try to find objects inside of
... ... @@ -699,7 +670,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
699 670 }
700 671  
701 672 void
702   -QPDF::Xref_table::read(qpdf_offset_t xref_offset)
  673 +QPDF::read_xref(qpdf_offset_t xref_offset)
703 674 {
704 675 std::map<int, int> free_table;
705 676 std::set<qpdf_offset_t> visited;
... ... @@ -707,7 +678,7 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
707 678 visited.insert(xref_offset);
708 679 char buf[7];
709 680 memset(buf, 0, sizeof(buf));
710   - file->seek(xref_offset, SEEK_SET);
  681 + m->file->seek(xref_offset, SEEK_SET);
711 682 // Some files miss the mark a little with startxref. We could do a better job of searching
712 683 // in the neighborhood for something that looks like either an xref table or stream, but the
713 684 // simple heuristic of skipping whitespace can help with the xref table case and is harmless
... ... @@ -716,11 +687,11 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
716 687 bool skipped_space = false;
717 688 while (!done) {
718 689 char ch;
719   - if (1 == file->read(&ch, 1)) {
  690 + if (1 == m->file->read(&ch, 1)) {
720 691 if (QUtil::is_space(ch)) {
721 692 skipped_space = true;
722 693 } else {
723   - file->unreadCh(ch);
  694 + m->file->unreadCh(ch);
724 695 done = true;
725 696 }
726 697 } else {
... ... @@ -729,13 +700,13 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
729 700 }
730 701 }
731 702  
732   - file->read(buf, sizeof(buf) - 1);
  703 + m->file->read(buf, sizeof(buf) - 1);
733 704 // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
734 705 // where it is terminated by arbitrary whitespace.
735 706 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
736 707 if (skipped_space) {
737 708 QTC::TC("qpdf", "QPDF xref skipped space");
738   - warn_damaged("extraneous whitespace seen before xref");
  709 + warn(damagedPDF("", 0, "extraneous whitespace seen before xref"));
739 710 }
740 711 QTC::TC(
741 712 "qpdf",
... ... @@ -749,38 +720,54 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
749 720 while (QUtil::is_space(buf[skip])) {
750 721 ++skip;
751 722 }
752   - xref_offset = process_section(xref_offset + skip);
  723 + xref_offset = read_xrefTable(xref_offset + skip);
753 724 } else {
754   - xref_offset = read_stream(xref_offset);
  725 + xref_offset = read_xrefStream(xref_offset);
755 726 }
756 727 if (visited.count(xref_offset) != 0) {
757 728 QTC::TC("qpdf", "QPDF xref loop");
758   - throw damaged_pdf("loop detected following xref tables");
  729 + throw damagedPDF("", 0, "loop detected following xref tables");
759 730 }
760 731 }
761 732  
762   - if (!trailer_) {
763   - throw damaged_pdf("unable to find trailer while reading xref");
  733 + if (!m->trailer) {
  734 + throw damagedPDF("", 0, "unable to find trailer while reading xref");
764 735 }
765   - int size = trailer_.getKey("/Size").getIntValueAsInt();
766   -
767   - if (size < 3) {
768   - throw damaged_pdf("too few objects - file can't have a page tree");
  736 + int size = m->trailer.getKey("/Size").getIntValueAsInt();
  737 + int max_obj = 0;
  738 + if (!m->xref_table.empty()) {
  739 + max_obj = m->xref_table.rbegin()->first.getObj();
  740 + }
  741 + if (!m->deleted_objects.empty()) {
  742 + max_obj = std::max(max_obj, *(m->deleted_objects.rbegin()));
  743 + }
  744 + if ((size < 1) || (size - 1 != max_obj)) {
  745 + QTC::TC("qpdf", "QPDF xref size mismatch");
  746 + warn(damagedPDF(
  747 + "",
  748 + 0,
  749 + ("reported number of objects (" + std::to_string(size) +
  750 + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
769 751 }
770 752  
771   - // We are no longer reporting what the highest id in the xref table is. I don't think it adds
772   - // anything. If we want to report more detail, we should report the total number of missing
773   - // entries, including missing entries before the last actual entry.
  753 + // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
  754 + // never depend on its being set.
  755 + m->deleted_objects.clear();
  756 +
  757 + // Make sure we keep only the highest generation for any object.
  758 + QPDFObjGen last_og{-1, 0};
  759 + for (auto const& item: m->xref_table) {
  760 + auto id = item.first.getObj();
  761 + if (id == last_og.getObj() && id > 0) {
  762 + removeObject(last_og);
  763 + }
  764 + last_og = item.first;
  765 + }
774 766 }
775 767  
776   -QPDF::Xref_table::Subsection
777   -QPDF::Xref_table::subsection(std::string const& line)
  768 +bool
  769 +QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
778 770 {
779   - auto terminate = [this]() -> void {
780   - QTC::TC("qpdf", "QPDF invalid xref");
781   - throw damaged_table("xref syntax invalid");
782   - };
783   -
784 771 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
785 772 // buffer.
786 773 char const* p = line.c_str();
... ... @@ -792,7 +779,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
792 779 }
793 780 // Require digit
794 781 if (!QUtil::is_digit(*p)) {
795   - terminate();
  782 + return false;
796 783 }
797 784 // Gather digits
798 785 std::string obj_str;
... ... @@ -801,7 +788,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
801 788 }
802 789 // Require space
803 790 if (!QUtil::is_space(*p)) {
804   - terminate();
  791 + return false;
805 792 }
806 793 // Skip spaces
807 794 while (QUtil::is_space(*p)) {
... ... @@ -809,7 +796,7 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
809 796 }
810 797 // Require digit
811 798 if (!QUtil::is_digit(*p)) {
812   - terminate();
  799 + return false;
813 800 }
814 801 // Gather digits
815 802 std::string num_str;
... ... @@ -820,82 +807,18 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
820 807 while (QUtil::is_space(*p)) {
821 808 ++p;
822 809 }
823   - auto obj = QUtil::string_to_int(obj_str.c_str());
824   - auto count = QUtil::string_to_int(num_str.c_str());
825   - if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {
826   - throw damaged_table("xref table subsection header contains impossibly large entry");
827   - }
828   - return {obj, count, file->getLastOffset() + toI(p - start)};
829   -}
830   -
831   -std::vector<QPDF::Xref_table::Subsection>
832   -QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
833   -{
834   - std::vector<QPDF::Xref_table::Subsection> result;
835   - qpdf_offset_t f1 = 0;
836   - int f2 = 0;
837   - char type = '\0';
838   -
839   - file->seek(start, SEEK_SET);
840   -
841   - while (true) {
842   - line.assign(50, '\0');
843   - file->read(line.data(), line.size());
844   - auto [obj, num, offset] = result.emplace_back(subsection(line));
845   - file->seek(offset, SEEK_SET);
846   - for (qpdf_offset_t i = obj; i - num < obj; ++i) {
847   - if (!read_entry(f1, f2, type)) {
848   - QTC::TC("qpdf", "QPDF invalid xref entry");
849   - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");
850   - }
851   - }
852   - qpdf_offset_t pos = file->tell();
853   - if (read_token().isWord("trailer")) {
854   - return result;
855   - } else {
856   - file->seek(pos, SEEK_SET);
857   - }
858   - }
859   -}
860   -
861   -// Optimistically read and parse all subsection headers. If an error is encountered return the
862   -// result of bad_subsections.
863   -std::vector<QPDF::Xref_table::Subsection>
864   -QPDF::Xref_table::subsections(std::string& line)
865   -{
866   - auto recovery_offset = file->tell();
867   - try {
868   - std::vector<QPDF::Xref_table::Subsection> result;
869   -
870   - while (true) {
871   - line.assign(50, '\0');
872   - file->read(line.data(), line.size());
873   - auto& sub = result.emplace_back(subsection(line));
874   - auto count = std::get<1>(sub);
875   - auto offset = std::get<2>(sub);
876   - file->seek(offset + 20 * toO(count) - 1, SEEK_SET);
877   - file->read(line.data(), 1);
878   - if (!(line[0] == '\n' || line[0] == '\n')) {
879   - return bad_subsections(line, recovery_offset);
880   - }
881   - qpdf_offset_t pos = file->tell();
882   - if (read_token().isWord("trailer")) {
883   - return result;
884   - } else {
885   - file->seek(pos, SEEK_SET);
886   - }
887   - }
888   - } catch (...) {
889   - return bad_subsections(line, recovery_offset);
890   - }
  810 + bytes = toI(p - start);
  811 + obj = QUtil::string_to_int(obj_str.c_str());
  812 + num = QUtil::string_to_int(num_str.c_str());
  813 + return true;
891 814 }
892 815  
893 816 bool
894   -QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type)
  817 +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
895 818 {
896 819 // Reposition after initial read attempt and reread.
897   - file->seek(file->getLastOffset(), SEEK_SET);
898   - auto line = file->readLine(30);
  820 + m->file->seek(m->file->getLastOffset(), SEEK_SET);
  821 + auto line = m->file->readLine(30);
899 822  
900 823 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
901 824 // buffer.
... ... @@ -961,7 +884,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
961 884 }
962 885  
963 886 if (invalid) {
964   - qpdf.warn(damaged_table("accepting invalid xref table entry"));
  887 + warn(damagedPDF("xref table", "accepting invalid xref table entry"));
965 888 }
966 889  
967 890 f1 = QUtil::string_to_ll(f1_str.c_str());
... ... @@ -973,10 +896,10 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
973 896 // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
974 897 // result.
975 898 bool
976   -QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type)
  899 +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
977 900 {
978 901 std::array<char, 21> line;
979   - if (file->read(line.data(), 20) != 20) {
  902 + if (m->file->read(line.data(), 20) != 20) {
980 903 // C++20: [[unlikely]]
981 904 return false;
982 905 }
... ... @@ -1022,78 +945,84 @@ QPDF::Xref_table::read_entry(qpdf_offset_t&amp; f1, int&amp; f2, char&amp; type)
1022 945 return true;
1023 946 }
1024 947 }
1025   - return read_bad_entry(f1, f2, type);
  948 + return read_bad_xrefEntry(f1, f2, type);
1026 949 }
1027 950  
1028 951 // Read a single cross-reference table section and associated trailer.
1029 952 qpdf_offset_t
1030   -QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
  953 +QPDF::read_xrefTable(qpdf_offset_t xref_offset)
1031 954 {
1032   - file->seek(xref_offset, SEEK_SET);
  955 + m->file->seek(xref_offset, SEEK_SET);
1033 956 std::string line;
1034   - auto subs = subsections(line);
1035   -
1036   - auto cur_trailer_offset = file->tell();
1037   - auto cur_trailer = read_trailer();
1038   - if (!cur_trailer.isDictionary()) {
1039   - QTC::TC("qpdf", "QPDF missing trailer");
1040   - throw qpdf.damagedPDF("", "expected trailer dictionary");
1041   - }
1042   -
1043   - if (!trailer_) {
1044   - unsigned int sz;
1045   - trailer_ = cur_trailer;
1046   -
1047   - if (!trailer_.hasKey("/Size")) {
1048   - QTC::TC("qpdf", "QPDF trailer lacks size");
1049   - throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");
1050   - }
1051   - if (!trailer_.getKey("/Size").getValueAsUInt(sz)) {
1052   - QTC::TC("qpdf", "QPDF trailer size not integer");
1053   - throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1054   - }
1055   -
1056   - table.resize(sz);
1057   - }
1058   -
1059   - for (auto [obj, num, offset]: subs) {
1060   - file->seek(offset, SEEK_SET);
  957 + while (true) {
  958 + line.assign(50, '\0');
  959 + m->file->read(line.data(), line.size());
  960 + int obj = 0;
  961 + int num = 0;
  962 + int bytes = 0;
  963 + if (!parse_xrefFirst(line, obj, num, bytes)) {
  964 + QTC::TC("qpdf", "QPDF invalid xref");
  965 + throw damagedPDF("xref table", "xref syntax invalid");
  966 + }
  967 + m->file->seek(m->file->getLastOffset() + bytes, SEEK_SET);
1061 968 for (qpdf_offset_t i = obj; i - num < obj; ++i) {
1062 969 if (i == 0) {
1063 970 // This is needed by checkLinearization()
1064   - first_item_offset_ = file->tell();
  971 + m->first_xref_item_offset = m->file->tell();
1065 972 }
1066 973 // For xref_table, these will always be small enough to be ints
1067 974 qpdf_offset_t f1 = 0;
1068 975 int f2 = 0;
1069 976 char type = '\0';
1070   - if (!read_entry(f1, f2, type)) {
1071   - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");
  977 + if (!read_xrefEntry(f1, f2, type)) {
  978 + QTC::TC("qpdf", "QPDF invalid xref entry");
  979 + throw damagedPDF(
  980 + "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
1072 981 }
1073 982 if (type == 'f') {
1074   - insert_free(QPDFObjGen(toI(i), f2));
  983 + insertFreeXrefEntry(QPDFObjGen(toI(i), f2));
1075 984 } else {
1076   - insert(toI(i), 1, f1, f2);
  985 + insertXrefEntry(toI(i), 1, f1, f2);
1077 986 }
1078 987 }
1079   - qpdf_offset_t pos = file->tell();
1080   - if (read_token().isWord("trailer")) {
  988 + qpdf_offset_t pos = m->file->tell();
  989 + if (readToken(*m->file).isWord("trailer")) {
1081 990 break;
1082 991 } else {
1083   - file->seek(pos, SEEK_SET);
  992 + m->file->seek(pos, SEEK_SET);
  993 + }
  994 + }
  995 +
  996 + // Set offset to previous xref table if any
  997 + QPDFObjectHandle cur_trailer = readTrailer();
  998 + if (!cur_trailer.isDictionary()) {
  999 + QTC::TC("qpdf", "QPDF missing trailer");
  1000 + throw damagedPDF("", "expected trailer dictionary");
  1001 + }
  1002 +
  1003 + if (!m->trailer) {
  1004 + setTrailer(cur_trailer);
  1005 +
  1006 + if (!m->trailer.hasKey("/Size")) {
  1007 + QTC::TC("qpdf", "QPDF trailer lacks size");
  1008 + throw damagedPDF("trailer", "trailer dictionary lacks /Size key");
  1009 + }
  1010 + if (!m->trailer.getKey("/Size").isInteger()) {
  1011 + QTC::TC("qpdf", "QPDF trailer size not integer");
  1012 + throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1084 1013 }
1085 1014 }
1086 1015  
1087 1016 if (cur_trailer.hasKey("/XRefStm")) {
1088   - if (ignore_streams_) {
  1017 + if (m->ignore_xref_streams) {
1089 1018 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
1090 1019 } else {
1091 1020 if (cur_trailer.getKey("/XRefStm").isInteger()) {
1092 1021 // Read the xref stream but disregard any return value -- we'll use our trailer's
1093 1022 // /Prev key instead of the xref stream's.
1094   - (void)read_stream(cur_trailer.getKey("/XRefStm").getIntValue());
  1023 + (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
1095 1024 } else {
1096   - throw qpdf.damagedPDF("xref stream", cur_trailer_offset, "invalid /XRefStm");
  1025 + throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
1097 1026 }
1098 1027 }
1099 1028 }
... ... @@ -1101,8 +1030,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
1101 1030 if (cur_trailer.hasKey("/Prev")) {
1102 1031 if (!cur_trailer.getKey("/Prev").isInteger()) {
1103 1032 QTC::TC("qpdf", "QPDF trailer prev not integer");
1104   - throw qpdf.damagedPDF(
1105   - "trailer", cur_trailer_offset, "/Prev key in trailer dictionary is not an integer");
  1033 + throw damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer");
1106 1034 }
1107 1035 QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
1108 1036 return cur_trailer.getKey("/Prev").getIntValue();
... ... @@ -1113,35 +1041,34 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
1113 1041  
1114 1042 // Read a single cross-reference stream.
1115 1043 qpdf_offset_t
1116   -QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
  1044 +QPDF::read_xrefStream(qpdf_offset_t xref_offset)
1117 1045 {
1118   - if (!ignore_streams_) {
  1046 + if (!m->ignore_xref_streams) {
1119 1047 QPDFObjGen x_og;
1120 1048 QPDFObjectHandle xref_obj;
1121 1049 try {
1122   - xref_obj = qpdf.readObjectAtOffset(
1123   - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
  1050 + xref_obj =
  1051 + readObjectAtOffset(false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
1124 1052 } catch (QPDFExc&) {
1125 1053 // ignore -- report error below
1126 1054 }
1127 1055 if (xref_obj.isStreamOfType("/XRef")) {
1128 1056 QTC::TC("qpdf", "QPDF found xref stream");
1129   - return process_stream(xref_offset, xref_obj);
  1057 + return processXRefStream(xref_offset, xref_obj);
1130 1058 }
1131 1059 }
1132 1060  
1133 1061 QTC::TC("qpdf", "QPDF can't find xref");
1134   - throw qpdf.damagedPDF("", xref_offset, "xref not found");
  1062 + throw damagedPDF("", xref_offset, "xref not found");
1135 1063 return 0; // unreachable
1136 1064 }
1137 1065  
1138 1066 // Return the entry size of the xref stream and the processed W array.
1139 1067 std::pair<int, std::array<int, 3>>
1140   -QPDF::Xref_table::process_W(
1141   - QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
  1068 +QPDF::processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
1142 1069 {
1143 1070 auto W_obj = dict.getKey("/W");
1144   - if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&
  1071 + if (!(W_obj.isArray() && (W_obj.getArrayNItems() >= 3) && W_obj.getArrayItem(0).isInteger() &&
1145 1072 W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
1146 1073 throw damaged("Cross-reference stream does not have a proper /W key");
1147 1074 }
... ... @@ -1166,10 +1093,9 @@ QPDF::Xref_table::process_W(
1166 1093 return {entry_size, W};
1167 1094 }
1168 1095  
1169   -// Validate Size entry and return the maximum number of entries that the xref stream can contain and
1170   -// the value of the Size entry.
1171   -std::pair<int, size_t>
1172   -QPDF::Xref_table::process_Size(
  1096 +// Validate Size key and return the maximum number of entries that the xref stream can contain.
  1097 +int
  1098 +QPDF::processXRefSize(
1173 1099 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
1174 1100 {
1175 1101 // Number of entries is limited by the highest possible object id and stream size.
... ... @@ -1188,12 +1114,12 @@ QPDF::Xref_table::process_Size(
1188 1114 throw damaged("Cross-reference stream has an impossibly large /Size key");
1189 1115 }
1190 1116 // We are not validating that Size <= (Size key of parent xref / trailer).
1191   - return {max_num_entries, toS(size)};
  1117 + return max_num_entries;
1192 1118 }
1193 1119  
1194 1120 // Return the number of entries of the xref stream and the processed Index array.
1195 1121 std::pair<int, std::vector<std::pair<int, int>>>
1196   -QPDF::Xref_table::process_Index(
  1122 +QPDF::processXRefIndex(
1197 1123 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
1198 1124 {
1199 1125 auto size = dict.getKey("/Size").getIntValueAsInt();
... ... @@ -1260,17 +1186,17 @@ QPDF::Xref_table::process_Index(
1260 1186 }
1261 1187  
1262 1188 qpdf_offset_t
1263   -QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
  1189 +QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
1264 1190 {
1265 1191 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
1266   - return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
  1192 + return damagedPDF("xref stream", xref_offset, msg.data());
1267 1193 };
1268 1194  
1269 1195 auto dict = xref_obj.getDict();
1270 1196  
1271   - auto [entry_size, W] = process_W(dict, damaged);
1272   - auto [max_num_entries, size] = process_Size(dict, entry_size, damaged);
1273   - auto [num_entries, indx] = process_Index(dict, max_num_entries, damaged);
  1197 + auto [entry_size, W] = processXRefW(dict, damaged);
  1198 + int max_num_entries = processXRefSize(dict, entry_size, damaged);
  1199 + auto [num_entries, indx] = processXRefIndex(dict, max_num_entries, damaged);
1274 1200  
1275 1201 std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
1276 1202 size_t actual_size = bp->getSize();
... ... @@ -1283,16 +1209,8 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1283 1209 if (expected_size > actual_size) {
1284 1210 throw x;
1285 1211 } else {
1286   - qpdf.warn(x);
1287   - }
1288   - }
1289   -
1290   - if (!trailer_) {
1291   - trailer_ = dict;
1292   - if (size > toS(max_id_)) {
1293   - throw damaged("Cross-reference stream /Size entry is impossibly large");
  1212 + warn(x);
1294 1213 }
1295   - table.resize(size);
1296 1214 }
1297 1215  
1298 1216 bool saw_first_compressed_object = false;
... ... @@ -1320,29 +1238,33 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1320 1238 // object record, in which case the generation number appears as the third field.
1321 1239 if (saw_first_compressed_object) {
1322 1240 if (fields[0] != 2) {
1323   - uncompressed_after_compressed_ = true;
  1241 + m->uncompressed_after_compressed = true;
1324 1242 }
1325 1243 } else if (fields[0] == 2) {
1326 1244 saw_first_compressed_object = true;
1327 1245 }
1328 1246 if (obj == 0) {
1329 1247 // This is needed by checkLinearization()
1330   - first_item_offset_ = xref_offset;
  1248 + m->first_xref_item_offset = xref_offset;
1331 1249 } else if (fields[0] == 0) {
1332 1250 // Ignore fields[2], which we don't care about in this case. This works around the
1333 1251 // issue of some PDF files that put invalid values, like -1, here for deleted
1334 1252 // objects.
1335   - insert_free(QPDFObjGen(obj, 0));
  1253 + insertFreeXrefEntry(QPDFObjGen(obj, 0));
1336 1254 } else {
1337   - insert(obj, toI(fields[0]), fields[1], toI(fields[2]));
  1255 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1338 1256 }
1339 1257 ++obj;
1340 1258 }
1341 1259 }
1342 1260  
  1261 + if (!m->trailer) {
  1262 + setTrailer(dict);
  1263 + }
  1264 +
1343 1265 if (dict.hasKey("/Prev")) {
1344 1266 if (!dict.getKey("/Prev").isInteger()) {
1345   - throw qpdf.damagedPDF(
  1267 + throw damagedPDF(
1346 1268 "xref stream", "/Prev key in xref stream dictionary is not an integer");
1347 1269 }
1348 1270 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
... ... @@ -1353,7 +1275,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1353 1275 }
1354 1276  
1355 1277 void
1356   -QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
  1278 +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1357 1279 {
1358 1280 // Populate the xref table in such a way that the first reference to an object that we see,
1359 1281 // which is the one in the latest xref table in which it appears, is the one that gets stored.
... ... @@ -1362,35 +1284,23 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1362 1284 // If there is already an entry for this object and generation in the table, it means that a
1363 1285 // later xref table has registered this object. Disregard this one.
1364 1286  
1365   - int new_gen = f0 == 2 ? 0 : f2;
1366   -
1367   - if (!(obj > 0 && static_cast<size_t>(obj) < table.size() && 0 <= f2 && new_gen < 65535)) {
1368   - // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
1369   - // is probably no point having another warning but we could count invalid items in order to
1370   - // decide when to give up.
1371   - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
  1287 + if (obj > m->xref_table_max_id) {
  1288 + // ignore impossibly large object ids or object ids > Size.
1372 1289 return;
1373 1290 }
1374 1291  
1375   - auto& entry = table[static_cast<size_t>(obj)];
1376   - auto old_type = entry.type();
1377   -
1378   - if (!old_type && entry.gen() > 0) {
1379   - // At the moment we are processing the updates last to first and therefore the gen doesn't
1380   - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need
1381   - // to be revisited when we want to support incremental updates or more comprhensive
1382   - // checking.
  1292 + if (m->deleted_objects.count(obj)) {
1383 1293 QTC::TC("qpdf", "QPDF xref deleted object");
1384 1294 return;
1385 1295 }
1386 1296  
1387 1297 if (f0 == 2 && static_cast<int>(f1) == obj) {
1388   - qpdf.warn(qpdf.damagedPDF(
1389   - "xref stream", "self-referential object stream " + std::to_string(obj)));
  1298 + warn(damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj)));
1390 1299 return;
1391 1300 }
1392 1301  
1393   - if (old_type && entry.gen() >= new_gen) {
  1302 + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
  1303 + if (!created) {
1394 1304 QTC::TC("qpdf", "QPDF xref reused object");
1395 1305 return;
1396 1306 }
... ... @@ -1398,129 +1308,85 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1398 1308 switch (f0) {
1399 1309 case 1:
1400 1310 // f2 is generation
1401   - QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0);
1402   - entry = {f2, Uncompressed(f1)};
  1311 + QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
  1312 + iter->second = QPDFXRefEntry(f1);
1403 1313 break;
1404 1314  
1405 1315 case 2:
1406   - entry = {0, Compressed(toI(f1), f2)};
1407   - object_streams_ = true;
  1316 + iter->second = QPDFXRefEntry(toI(f1), f2);
1408 1317 break;
1409 1318  
1410 1319 default:
1411   - throw qpdf.damagedPDF(
1412   - "xref stream", "unknown xref stream entry type " + std::to_string(f0));
  1320 + throw damagedPDF("xref stream", "unknown xref stream entry type " + std::to_string(f0));
1413 1321 break;
1414 1322 }
1415 1323 }
1416 1324  
1417 1325 void
1418   -QPDF::Xref_table::insert_free(QPDFObjGen og)
  1326 +QPDF::insertFreeXrefEntry(QPDFObjGen og)
1419 1327 {
1420   - // At the moment we are processing the updates last to first and therefore the gen doesn't
1421   - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
1422   - // revisited when we want to support incremental updates or more comprhensive checking.
1423   - if (og.getObj() < 1) {
1424   - return;
1425   - }
1426   - size_t id = static_cast<size_t>(og.getObj());
1427   - if (id < table.size() && !type(id)) {
1428   - table[id] = {1, {}};
  1328 + if (!m->xref_table.count(og)) {
  1329 + m->deleted_objects.insert(og.getObj());
1429 1330 }
1430 1331 }
1431 1332  
1432   -QPDFObjGen
1433   -QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
  1333 +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
  1334 +// beginning to end.
  1335 +void
  1336 +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
1434 1337 {
1435   - int id = 0;
1436   - int gen = 0;
1437   - qpdf_offset_t start = 0;
1438   -
1439   - int i = 0;
1440   - for (auto const& item: table) {
1441   - auto o = item.offset();
1442   - if (start < o && o <= offset) {
1443   - start = o;
1444   - id = i;
1445   - gen = item.gen();
1446   - }
1447   - ++i;
  1338 + if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) {
  1339 + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
  1340 + return;
1448 1341 }
1449   - return QPDFObjGen(id, gen);
1450   -}
1451 1342  
1452   -std::map<QPDFObjGen, QPDFXRefEntry>
1453   -QPDF::Xref_table::as_map() const
1454   -{
1455   - std::map<QPDFObjGen, QPDFXRefEntry> result;
1456   - int i{0};
1457   - for (auto const& item: table) {
1458   - switch (item.type()) {
1459   - case 0:
1460   - break;
1461   - case 1:
1462   - result.emplace(QPDFObjGen(i, item.gen()), item.offset());
1463   - break;
1464   - case 2:
1465   - result.emplace(
1466   - QPDFObjGen(i, 0), QPDFXRefEntry(item.stream_number(), item.stream_index()));
1467   - break;
1468   - default:
1469   - throw std::logic_error("Xref_table: invalid entry type");
1470   - }
1471   - ++i;
  1343 + QPDFObjGen og(obj, f2);
  1344 + if (!m->deleted_objects.count(obj)) {
  1345 + // deleted_objects stores the uncompressed objects removed from the xref table at the start
  1346 + // of recovery.
  1347 + QTC::TC("qpdf", "QPDF xref overwrite object");
  1348 + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
1472 1349 }
1473   - return result;
1474 1350 }
1475 1351  
1476 1352 void
1477 1353 QPDF::showXRefTable()
1478 1354 {
1479   - m->xref_table.show();
1480   -}
1481   -
1482   -void
1483   -QPDF::Xref_table::show()
1484   -{
1485   - auto& cout = *qpdf.m->log->getInfo();
1486   - int i = -1;
1487   - for (auto const& item: table) {
1488   - ++i;
1489   - if (item.type()) {
1490   - cout << std::to_string(i) << "/" << std::to_string(item.gen()) << ": ";
1491   - switch (item.type()) {
1492   - case 1:
1493   - cout << "uncompressed; offset = " << item.offset() << "\n";
1494   - break;
  1355 + auto& cout = *m->log->getInfo();
  1356 + for (auto const& iter: m->xref_table) {
  1357 + QPDFObjGen const& og = iter.first;
  1358 + QPDFXRefEntry const& entry = iter.second;
  1359 + cout << og.unparse('/') << ": ";
  1360 + switch (entry.getType()) {
  1361 + case 1:
  1362 + cout << "uncompressed; offset = " << entry.getOffset();
  1363 + break;
1495 1364  
1496   - case 2:
1497   - cout << "compressed; stream = " << item.stream_number()
1498   - << ", index = " << item.stream_index() << "\n";
1499   - break;
  1365 + case 2:
  1366 + *m->log->getInfo() << "compressed; stream = " << entry.getObjStreamNumber()
  1367 + << ", index = " << entry.getObjStreamIndex();
  1368 + break;
1500 1369  
1501   - default:
1502   - throw std::logic_error(
1503   - "unknown cross-reference table type while showing xref_table");
1504   - }
  1370 + default:
  1371 + throw std::logic_error("unknown cross-reference table type while"
  1372 + " showing xref_table");
  1373 + break;
1505 1374 }
  1375 + m->log->info("\n");
1506 1376 }
1507 1377 }
1508 1378  
1509 1379 // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
1510 1380 // return false. Otherwise return true.
1511 1381 bool
1512   -QPDF::Xref_table::resolve()
1513   -{
1514   - bool may_change = !reconstructed_;
1515   - int i = -1;
1516   - for (auto& item: table) {
1517   - ++i;
1518   - if (item.type()) {
1519   - if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) {
1520   - qpdf.resolve(QPDFObjGen(i, item.gen()));
1521   - if (may_change && reconstructed_) {
1522   - return false;
1523   - }
  1382 +QPDF::resolveXRefTable()
  1383 +{
  1384 + bool may_change = !m->reconstructed_xref;
  1385 + for (auto& iter: m->xref_table) {
  1386 + if (isUnresolved(iter.first)) {
  1387 + resolve(iter.first);
  1388 + if (may_change && m->reconstructed_xref) {
  1389 + return false;
1524 1390 }
1525 1391 }
1526 1392 }
... ... @@ -1535,9 +1401,9 @@ QPDF::fixDanglingReferences(bool force)
1535 1401 if (m->fixed_dangling_refs) {
1536 1402 return;
1537 1403 }
1538   - if (!m->xref_table.resolve()) {
  1404 + if (!resolveXRefTable()) {
1539 1405 QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
1540   - m->xref_table.resolve();
  1406 + resolveXRefTable();
1541 1407 }
1542 1408 m->fixed_dangling_refs = true;
1543 1409 }
... ... @@ -1584,21 +1450,21 @@ QPDF::setLastObjectDescription(std::string const&amp; description, QPDFObjGen const&amp;
1584 1450 }
1585 1451  
1586 1452 QPDFObjectHandle
1587   -QPDF::Xref_table::read_trailer()
  1453 +QPDF::readTrailer()
1588 1454 {
1589   - qpdf_offset_t offset = file->tell();
  1455 + qpdf_offset_t offset = m->file->tell();
1590 1456 bool empty = false;
1591 1457 auto object =
1592   - QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false);
  1458 + QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false);
1593 1459 if (empty) {
1594 1460 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1595 1461 // actual PDF files and Adobe Reader appears to ignore them.
1596   - qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null"));
1597   - } else if (object.isDictionary() && read_token().isWord("stream")) {
1598   - qpdf.warn(qpdf.damagedPDF("trailer", file->tell(), "stream keyword found in trailer"));
  1462 + warn(damagedPDF("trailer", "empty object treated as null"));
  1463 + } else if (object.isDictionary() && readToken(*m->file).isWord("stream")) {
  1464 + warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1599 1465 }
1600 1466 // Override last_offset so that it points to the beginning of the object we just read
1601   - file->setLastOffset(offset);
  1467 + m->file->setLastOffset(offset);
1602 1468 return object;
1603 1469 }
1604 1470  
... ... @@ -1666,7 +1532,7 @@ QPDF::readStream(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset_t offset)
1666 1532 } catch (QPDFExc& e) {
1667 1533 if (m->attempt_recovery) {
1668 1534 warn(e);
1669   - length = recoverStreamLength(m->file_sp, og, stream_offset);
  1535 + length = recoverStreamLength(m->file, og, stream_offset);
1670 1536 } else {
1671 1537 throw;
1672 1538 }
... ... @@ -1773,9 +1639,21 @@ QPDF::recoverStreamLength(
1773 1639 }
1774 1640  
1775 1641 if (length) {
  1642 + auto end = stream_offset + toO(length);
  1643 + qpdf_offset_t found_offset = 0;
  1644 + QPDFObjGen found_og;
  1645 +
1776 1646 // Make sure this is inside this object
1777   - auto found = m->xref_table.at_offset(stream_offset + toO(length));
1778   - if (found == QPDFObjGen() || found == og) {
  1647 + for (auto const& [current_og, entry]: m->xref_table) {
  1648 + if (entry.getType() == 1) {
  1649 + qpdf_offset_t obj_offset = entry.getOffset();
  1650 + if (found_offset < obj_offset && obj_offset < end) {
  1651 + found_offset = obj_offset;
  1652 + found_og = current_og;
  1653 + }
  1654 + }
  1655 + }
  1656 + if (!found_offset || found_og == og) {
1779 1657 // If we are trying to recover an XRef stream the xref table will not contain and
1780 1658 // won't contain any entries, therefore we cannot check the found length. Otherwise we
1781 1659 // found endstream\nendobj within the space allowed for this object, so we're probably
... ... @@ -1884,18 +1762,21 @@ QPDF::readObjectAtOffset(
1884 1762 } catch (QPDFExc& e) {
1885 1763 if (try_recovery) {
1886 1764 // Try again after reconstructing xref table
1887   - m->xref_table.reconstruct(e);
1888   - if (m->xref_table.type(exp_og) == 1) {
  1765 + reconstruct_xref(e);
  1766 + if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {
  1767 + qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
  1768 + QPDFObjectHandle result =
  1769 + readObjectAtOffset(false, new_offset, description, exp_og, og, false);
1889 1770 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1890   - return readObjectAtOffset(
1891   - false, m->xref_table.offset(exp_og), description, exp_og, og, false);
  1771 + return result;
1892 1772 } else {
1893 1773 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1894 1774 warn(damagedPDF(
1895 1775 "",
1896 1776 0,
1897 1777 ("object " + exp_og.unparse(' ') +
1898   - " not found in file after regenerating cross reference table")));
  1778 + " not found in file after regenerating cross reference "
  1779 + "table")));
1899 1780 return QPDFObjectHandle::newNull();
1900 1781 }
1901 1782 } else {
... ... @@ -1928,7 +1809,7 @@ QPDF::readObjectAtOffset(
1928 1809 }
1929 1810 }
1930 1811 qpdf_offset_t end_after_space = m->file->tell();
1931   - if (skip_cache_if_in_xref && m->xref_table.type(og)) {
  1812 + if (skip_cache_if_in_xref && m->xref_table.count(og)) {
1932 1813 // Ordinarily, an object gets read here when resolved through xref table or stream. In
1933 1814 // the special case of the xref stream and linearization hint tables, the offset comes
1934 1815 // from another source. For the specific case of xref streams, the xref stream is read
... ... @@ -1956,9 +1837,7 @@ QPDF::readObjectAtOffset(
1956 1837 // could use !check_og in place of skip_cache_if_in_xref.
1957 1838 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1958 1839 } else {
1959   - m->xref_table.linearization_offsets(
1960   - toS(og.getObj()), end_before_space, end_after_space);
1961   - updateCache(og, oh.getObj());
  1840 + updateCache(og, oh.getObj(), end_before_space, end_after_space);
1962 1841 }
1963 1842 }
1964 1843  
... ... @@ -1977,43 +1856,44 @@ QPDF::resolve(QPDFObjGen og)
1977 1856 // has to be resolved during object parsing, such as stream length.
1978 1857 QTC::TC("qpdf", "QPDF recursion loop in resolve");
1979 1858 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1980   - updateCache(og, QPDF_Null::create());
  1859 + updateCache(og, QPDF_Null::create(), -1, -1);
1981 1860 return m->obj_cache[og].object.get();
1982 1861 }
1983 1862 ResolveRecorder rr(this, og);
1984 1863  
1985   - try {
1986   - switch (m->xref_table.type(og)) {
1987   - case 0:
1988   - break;
1989   - case 1:
1990   - {
1991   - // Object stored in cache by readObjectAtOffset
1992   - QPDFObjGen a_og;
1993   - QPDFObjectHandle oh =
1994   - readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
1995   - }
1996   - break;
  1864 + if (m->xref_table.count(og) != 0) {
  1865 + QPDFXRefEntry const& entry = m->xref_table[og];
  1866 + try {
  1867 + switch (entry.getType()) {
  1868 + case 1:
  1869 + {
  1870 + qpdf_offset_t offset = entry.getOffset();
  1871 + // Object stored in cache by readObjectAtOffset
  1872 + QPDFObjGen a_og;
  1873 + QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);
  1874 + }
  1875 + break;
1997 1876  
1998   - case 2:
1999   - resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
2000   - break;
  1877 + case 2:
  1878 + resolveObjectsInStream(entry.getObjStreamNumber());
  1879 + break;
2001 1880  
2002   - default:
2003   - throw damagedPDF(
2004   - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
  1881 + default:
  1882 + throw damagedPDF(
  1883 + "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
  1884 + }
  1885 + } catch (QPDFExc& e) {
  1886 + warn(e);
  1887 + } catch (std::exception& e) {
  1888 + warn(damagedPDF(
  1889 + "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
2005 1890 }
2006   - } catch (QPDFExc& e) {
2007   - warn(e);
2008   - } catch (std::exception& e) {
2009   - warn(damagedPDF(
2010   - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
2011 1891 }
2012 1892  
2013 1893 if (isUnresolved(og)) {
2014 1894 // PDF spec says unknown objects resolve to the null object.
2015 1895 QTC::TC("qpdf", "QPDF resolve failure to null");
2016   - updateCache(og, QPDF_Null::create());
  1896 + updateCache(og, QPDF_Null::create(), -1, -1);
2017 1897 }
2018 1898  
2019 1899 auto result(m->obj_cache[og].object);
... ... @@ -2035,6 +1915,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2035 1915 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
2036 1916 }
2037 1917  
  1918 + // For linearization data in the object, use the data from the object stream for the objects in
  1919 + // the stream.
  1920 + QPDFObjGen stream_og(obj_stream_number, 0);
  1921 + qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
  1922 + qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
  1923 +
2038 1924 QPDFObjectHandle dict = obj_stream.getDict();
2039 1925 if (!dict.isDictionaryOfType("/ObjStm")) {
2040 1926 QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
... ... @@ -2059,7 +1945,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2059 1945 (m->file->getName() + " object stream " + std::to_string(obj_stream_number)),
2060 1946 bp.get()));
2061 1947  
2062   - qpdf_offset_t last_offset = -1;
2063 1948 for (int i = 0; i < n; ++i) {
2064 1949 QPDFTokenizer::Token tnum = readToken(*input);
2065 1950 QPDFTokenizer::Token toffset = readToken(*input);
... ... @@ -2073,7 +1958,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2073 1958  
2074 1959 int num = QUtil::string_to_int(tnum.getValue().c_str());
2075 1960 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
2076   - if (num > m->xref_table.max_id()) {
  1961 + if (num > m->xref_table_max_id) {
2077 1962 continue;
2078 1963 }
2079 1964 if (num == obj_stream_number) {
... ... @@ -2085,15 +1970,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2085 1970 "object stream claims to contain itself"));
2086 1971 continue;
2087 1972 }
2088   - if (offset <= last_offset) {
2089   - throw damagedPDF(
2090   - *input,
2091   - m->last_object_description,
2092   - input->getLastOffset(),
2093   - "expected offsets in object stream to be increasing");
2094   - }
2095   - last_offset = offset;
2096   -
2097 1973 offsets[num] = toI(offset + first);
2098 1974 }
2099 1975  
... ... @@ -2105,12 +1981,13 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2105 1981 m->last_object_description += "object ";
2106 1982 for (auto const& iter: offsets) {
2107 1983 QPDFObjGen og(iter.first, 0);
2108   - if (m->xref_table.type(og) == 2 &&
2109   - m->xref_table.stream_number(og.getObj()) == obj_stream_number) {
  1984 + auto entry = m->xref_table.find(og);
  1985 + if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
  1986 + entry->second.getObjStreamNumber() == obj_stream_number) {
2110 1987 int offset = iter.second;
2111 1988 input->seek(offset, SEEK_SET);
2112 1989 QPDFObjectHandle oh = readObjectInStream(input, iter.first);
2113   - updateCache(og, oh.getObj());
  1990 + updateCache(og, oh.getObj(), end_before_space, end_after_space);
2114 1991 } else {
2115 1992 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
2116 1993 }
... ... @@ -2125,14 +2002,20 @@ QPDF::newIndirect(QPDFObjGen const&amp; og, std::shared_ptr&lt;QPDFObject&gt; const&amp; obj)
2125 2002 }
2126 2003  
2127 2004 void
2128   -QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)
  2005 +QPDF::updateCache(
  2006 + QPDFObjGen const& og,
  2007 + std::shared_ptr<QPDFObject> const& object,
  2008 + qpdf_offset_t end_before_space,
  2009 + qpdf_offset_t end_after_space)
2129 2010 {
2130 2011 object->setObjGen(this, og);
2131 2012 if (isCached(og)) {
2132 2013 auto& cache = m->obj_cache[og];
2133 2014 cache.object->assign(object);
  2015 + cache.end_before_space = end_before_space;
  2016 + cache.end_after_space = end_after_space;
2134 2017 } else {
2135   - m->obj_cache[og] = ObjCache(object);
  2018 + m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space);
2136 2019 }
2137 2020 }
2138 2021  
... ... @@ -2162,7 +2045,7 @@ QPDFObjectHandle
2162 2045 QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
2163 2046 {
2164 2047 QPDFObjGen next{nextObjGen()};
2165   - m->obj_cache[next] = ObjCache(obj);
  2048 + m->obj_cache[next] = ObjCache(obj, -1, -1);
2166 2049 return newIndirect(next, m->obj_cache[next].object);
2167 2050 }
2168 2051  
... ... @@ -2218,7 +2101,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2218 2101 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2219 2102 return iter->second.object;
2220 2103 }
2221   - if (m->xref_table.type(og) || !m->xref_table.initialized()) {
  2104 + if (m->xref_table.count(og) || !m->parsed) {
2222 2105 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2223 2106 }
2224 2107 if (parse_pdf) {
... ... @@ -2234,9 +2117,8 @@ QPDF::getObjectForJSON(int id, int gen)
2234 2117 auto [it, inserted] = m->obj_cache.try_emplace(og);
2235 2118 auto& obj = it->second.object;
2236 2119 if (inserted) {
2237   - obj = (m->xref_table.initialized() && !m->xref_table.type(og))
2238   - ? QPDF_Null::create(this, og)
2239   - : QPDF_Unresolved::create(this, og);
  2120 + obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og)
  2121 + : QPDF_Unresolved::create(this, og);
2240 2122 }
2241 2123 return obj;
2242 2124 }
... ... @@ -2246,10 +2128,10 @@ QPDF::getObject(QPDFObjGen const&amp; og)
2246 2128 {
2247 2129 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2248 2130 return {it->second.object};
2249   - } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
  2131 + } else if (m->parsed && !m->xref_table.count(og)) {
2250 2132 return QPDF_Null::create();
2251 2133 } else {
2252   - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
  2134 + auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
2253 2135 return {result.first->second.object};
2254 2136 }
2255 2137 }
... ... @@ -2285,12 +2167,13 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
2285 2167 QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
2286 2168 throw std::logic_error("QPDF::replaceObject called with indirect object handle");
2287 2169 }
2288   - updateCache(og, oh.getObj());
  2170 + updateCache(og, oh.getObj(), -1, -1);
2289 2171 }
2290 2172  
2291 2173 void
2292 2174 QPDF::removeObject(QPDFObjGen og)
2293 2175 {
  2176 + m->xref_table.erase(og);
2294 2177 if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
2295 2178 // Take care of any object handles that may be floating around.
2296 2179 cached->second.object->assign(QPDF_Null::create());
... ... @@ -2559,7 +2442,7 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2559 2442 } else {
2560 2443 auto foreign_stream_data = std::make_shared<ForeignStreamData>(
2561 2444 foreign_stream_qpdf.m->encp,
2562   - foreign_stream_qpdf.m->file_sp,
  2445 + foreign_stream_qpdf.m->file,
2563 2446 foreign.getObjGen(),
2564 2447 stream->getParsedOffset(),
2565 2448 stream->getLength(),
... ... @@ -2643,13 +2526,13 @@ QPDF::getExtensionLevel()
2643 2526 QPDFObjectHandle
2644 2527 QPDF::getTrailer()
2645 2528 {
2646   - return m->xref_table.trailer();
  2529 + return m->trailer;
2647 2530 }
2648 2531  
2649 2532 QPDFObjectHandle
2650 2533 QPDF::getRoot()
2651 2534 {
2652   - QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");
  2535 + QPDFObjectHandle root = m->trailer.getKey("/Root");
2653 2536 if (!root.isDictionary()) {
2654 2537 throw damagedPDF("", 0, "unable to find /Root dictionary");
2655 2538 } else if (
... ... @@ -2665,10 +2548,17 @@ QPDF::getRoot()
2665 2548 std::map<QPDFObjGen, QPDFXRefEntry>
2666 2549 QPDF::getXRefTable()
2667 2550 {
2668   - if (!m->xref_table.initialized()) {
  2551 + return getXRefTableInternal();
  2552 +}
  2553 +
  2554 +std::map<QPDFObjGen, QPDFXRefEntry> const&
  2555 +QPDF::getXRefTableInternal()
  2556 +{
  2557 + if (!m->parsed) {
2669 2558 throw std::logic_error("QPDF::getXRefTable called before parsing.");
2670 2559 }
2671   - return m->xref_table.as_map();
  2560 +
  2561 + return m->xref_table;
2672 2562 }
2673 2563  
2674 2564 size_t
... ... @@ -2676,10 +2566,7 @@ QPDF::tableSize()
2676 2566 {
2677 2567 // If obj_cache is dense, accommodate all object in tables,else accommodate only original
2678 2568 // objects.
2679   - auto max_xref = toI(m->xref_table.size());
2680   - if (max_xref > 0) {
2681   - --max_xref;
2682   - }
  2569 + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
2683 2570 auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
2684 2571 auto max_id = std::numeric_limits<int>::max() - 1;
2685 2572 if (max_obj >= max_id || max_xref >= max_id) {
... ... @@ -2717,14 +2604,14 @@ QPDF::getCompressibleObjGens()
2717 2604 // iterating through the xref table since it avoids preserving orphaned items.
2718 2605  
2719 2606 // Exclude encryption dictionary, if any
2720   - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
  2607 + QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
2721 2608 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
2722 2609  
2723 2610 const size_t max_obj = getObjectCount();
2724 2611 std::vector<bool> visited(max_obj, false);
2725 2612 std::vector<QPDFObjectHandle> queue;
2726 2613 queue.reserve(512);
2727   - queue.push_back(m->xref_table.trailer());
  2614 + queue.push_back(m->trailer);
2728 2615 std::vector<T> result;
2729 2616 if constexpr (std::is_same_v<T, QPDFObjGen>) {
2730 2617 result.reserve(m->obj_cache.size());
... ... @@ -2879,7 +2766,7 @@ QPDF::pipeStreamData(
2879 2766 {
2880 2767 return pipeStreamData(
2881 2768 m->encp,
2882   - m->file_sp,
  2769 + m->file,
2883 2770 *this,
2884 2771 og,
2885 2772 offset,
... ...
libqpdf/QPDFJob.cc
... ... @@ -13,6 +13,7 @@
13 13 #include <qpdf/Pl_StdioFile.hh>
14 14 #include <qpdf/Pl_String.hh>
15 15 #include <qpdf/QIntC.hh>
  16 +#include <qpdf/QPDF.hh>
16 17 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
17 18 #include <qpdf/QPDFCryptoProvider.hh>
18 19 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
... ... @@ -25,7 +26,6 @@
25 26 #include <qpdf/QPDFSystemError.hh>
26 27 #include <qpdf/QPDFUsage.hh>
27 28 #include <qpdf/QPDFWriter.hh>
28   -#include <qpdf/QPDF_private.hh>
29 29 #include <qpdf/QTC.hh>
30 30 #include <qpdf/QUtil.hh>
31 31  
... ...
libqpdf/QPDFWriter.cc
... ... @@ -14,10 +14,10 @@
14 14 #include <qpdf/Pl_RC4.hh>
15 15 #include <qpdf/Pl_StdioFile.hh>
16 16 #include <qpdf/QIntC.hh>
  17 +#include <qpdf/QPDF.hh>
17 18 #include <qpdf/QPDFObjectHandle.hh>
18 19 #include <qpdf/QPDF_Name.hh>
19 20 #include <qpdf/QPDF_String.hh>
20   -#include <qpdf/QPDF_private.hh>
21 21 #include <qpdf/QTC.hh>
22 22 #include <qpdf/QUtil.hh>
23 23 #include <qpdf/RC4.hh>
... ... @@ -1698,6 +1698,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1698 1698 if (obj_to_write.isStream()) {
1699 1699 // This condition occurred in a fuzz input. Ideally we should block it at parse
1700 1700 // time, but it's not clear to me how to construct a case for this.
  1701 + QTC::TC("qpdf", "QPDFWriter stream in ostream");
1701 1702 obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1702 1703 obj_to_write = QPDFObjectHandle::newNull();
1703 1704 }
... ... @@ -1936,26 +1937,47 @@ void
1936 1937 QPDFWriter::preserveObjectStreams()
1937 1938 {
1938 1939 auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1939   - m->obj.streams_empty = !xref.object_streams();
1940   - if (m->obj.streams_empty) {
1941   - return;
1942   - }
1943   - // This code filters out objects that are not allowed to be in object streams. In addition to
1944   - // removing objects that were erroneously included in object streams in the source PDF, it also
1945   - // prevents unreferenced objects from being included.
  1940 + // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
  1941 + // streams out of old objects that have generation numbers greater than zero. However in an
  1942 + // existing PDF, all object stream objects and all objects in them must have generation 0
  1943 + // because the PDF spec does not provide any way to do otherwise. This code filters out objects
  1944 + // that are not allowed to be in object streams. In addition to removing objects that were
  1945 + // erroneously included in object streams in the source PDF, it also prevents unreferenced
  1946 + // objects from being included.
  1947 + auto end = xref.cend();
  1948 + m->obj.streams_empty = true;
1946 1949 if (m->preserve_unreferenced_objects) {
1947   - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1948   - for (auto [id, stream]: xref.compressed_objects()) {
1949   - m->obj[id].object_stream = stream;
  1950 + for (auto iter = xref.cbegin(); iter != end; ++iter) {
  1951 + if (iter->second.getType() == 2) {
  1952 + // Pdf contains object streams.
  1953 + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
  1954 + m->obj.streams_empty = false;
  1955 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1956 + }
1950 1957 }
1951 1958 } else {
1952   - QTC::TC("qpdf", "QPDFWriter preserve object streams");
1953   - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1954   - for (auto [id, stream]: xref.compressed_objects()) {
1955   - if (eligible[id]) {
1956   - m->obj[id].object_stream = stream;
1957   - } else {
1958   - QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  1959 + // Start by scanning for first compressed object in case we don't have any object streams to
  1960 + // process.
  1961 + for (auto iter = xref.cbegin(); iter != end; ++iter) {
  1962 + if (iter->second.getType() == 2) {
  1963 + // Pdf contains object streams.
  1964 + QTC::TC("qpdf", "QPDFWriter preserve object streams");
  1965 + m->obj.streams_empty = false;
  1966 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  1967 + // The object pointed to by iter may be a previous generation, in which case it is
  1968 + // removed by getCompressibleObjSet. We need to restart the loop (while the object
  1969 + // table may contain multiple generations of an object).
  1970 + for (iter = xref.cbegin(); iter != end; ++iter) {
  1971 + if (iter->second.getType() == 2) {
  1972 + auto id = static_cast<size_t>(iter->first.getObj());
  1973 + if (id < eligible.size() && eligible[id]) {
  1974 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1975 + } else {
  1976 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  1977 + }
  1978 + }
  1979 + }
  1980 + return;
1959 1981 }
1960 1982 }
1961 1983 }
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -10,8 +10,8 @@
10 10 #include <qpdf/Pl_Flate.hh>
11 11 #include <qpdf/Pl_QPDFTokenizer.hh>
12 12 #include <qpdf/QIntC.hh>
  13 +#include <qpdf/QPDF.hh>
13 14 #include <qpdf/QPDFExc.hh>
14   -#include <qpdf/QPDF_private.hh>
15 15 #include <qpdf/QTC.hh>
16 16 #include <qpdf/QUtil.hh>
17 17 #include <qpdf/SF_ASCII85Decode.hh>
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -3,7 +3,7 @@
3 3  
4 4 #include <qpdf/assert_debug.h>
5 5  
6   -#include <qpdf/QPDF_private.hh>
  6 +#include <qpdf/QPDF.hh>
7 7  
8 8 #include <qpdf/QPDFExc.hh>
9 9  
... ... @@ -727,7 +727,7 @@ QPDF::initializeEncryption()
727 727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
728 728 // dictionary.
729 729  
730   - if (!m->xref_table.trailer().hasKey("/Encrypt")) {
  730 + if (!m->trailer.hasKey("/Encrypt")) {
731 731 return;
732 732 }
733 733  
... ... @@ -736,7 +736,7 @@ QPDF::initializeEncryption()
736 736 m->encp->encrypted = true;
737 737  
738 738 std::string id1;
739   - QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID");
  739 + QPDFObjectHandle id_obj = m->trailer.getKey("/ID");
740 740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
741 741 id1 = id_obj.getArrayItem(0).getStringValue();
742 742 } else {
... ... @@ -745,7 +745,7 @@ QPDF::initializeEncryption()
745 745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
746 746 }
747 747  
748   - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
  748 + QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
749 749 if (!encryption_dict.isDictionary()) {
750 750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
751 751 }
... ...
libqpdf/QPDF_json.cc
... ... @@ -51,6 +51,17 @@
51 51 // ] | <- st_top
52 52 // } |
53 53  
  54 +static char const* JSON_PDF = (
  55 + // force line break
  56 + "%PDF-1.3\n"
  57 + "xref\n"
  58 + "0 1\n"
  59 + "0000000000 65535 f \n"
  60 + "trailer << /Size 1 >>\n"
  61 + "startxref\n"
  62 + "9\n"
  63 + "%%EOF\n");
  64 +
54 65 // Validator methods -- these are much more performant than std::regex.
55 66 static bool
56 67 is_indirect_object(std::string const& v, int& obj, int& gen)
... ... @@ -256,10 +267,10 @@ class QPDF::JSONReactor: public JSON::Reactor
256 267 struct StackFrame
257 268 {
258 269 StackFrame(state_e state) :
259   - state(state){};
  270 + state(state) {};
260 271 StackFrame(state_e state, QPDFObjectHandle&& object) :
261 272 state(state),
262   - object(object){};
  273 + object(object) {};
263 274 state_e state;
264 275 QPDFObjectHandle object;
265 276 };
... ... @@ -582,7 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
582 593 this->saw_value = true;
583 594 // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
584 595 if (setNextStateIfDictionary("trailer.value", value, st_object)) {
585   - pdf.m->xref_table.trailer(makeObject(value));
  596 + this->pdf.m->trailer = makeObject(value);
  597 + setObjectDescription(this->pdf.m->trailer, value);
586 598 }
587 599 } else if (key == "stream") {
588 600 // Don't need to set saw_stream here since there's already an error.
... ... @@ -774,9 +786,7 @@ QPDF::createFromJSON(std::string const&amp; json_file)
774 786 void
775 787 QPDF::createFromJSON(std::shared_ptr<InputSource> is)
776 788 {
777   - m->pdf_version = "1.3";
778   - m->no_input_name = is->getName();
779   - m->xref_table.initialize_json();
  789 + processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
780 790 importJSON(is, true);
781 791 }
782 792  
... ...
libqpdf/QPDF_linearization.cc
1 1 // See doc/linearization.
2 2  
3   -#include <qpdf/QPDF_private.hh>
  3 +#include <qpdf/QPDF.hh>
4 4  
5 5 #include <qpdf/BitStream.hh>
6 6 #include <qpdf/BitWriter.hh>
... ... @@ -288,8 +288,9 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
288 288 QPDFObjGen og;
289 289 QPDFObjectHandle H =
290 290 readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
291   - qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
292   - qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
  291 + ObjCache& oc = m->obj_cache[og];
  292 + qpdf_offset_t min_end_offset = oc.end_before_space;
  293 + qpdf_offset_t max_end_offset = oc.end_after_space;
293 294 if (!H.isStream()) {
294 295 throw damagedPDF("linearization dictionary", "hint table is not a stream");
295 296 }
... ... @@ -300,11 +301,14 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
300 301 // increasing length to cover it, even though the specification says all objects in the
301 302 // linearization parameter dictionary must be direct. We have to get the file position of the
302 303 // end of length in this case.
303   - auto length_og = Hdict.getKey("/Length").getObjGen();
304   - if (length_og.isIndirect()) {
  304 + QPDFObjectHandle length_obj = Hdict.getKey("/Length");
  305 + if (length_obj.isIndirect()) {
305 306 QTC::TC("qpdf", "QPDF hint table length indirect");
306   - min_end_offset = m->xref_table.end_before_space(length_og);
307   - max_end_offset = m->xref_table.end_after_space(length_og);
  307 + // Force resolution
  308 + (void)length_obj.getIntValue();
  309 + ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()];
  310 + min_end_offset = oc2.end_before_space;
  311 + max_end_offset = oc2.end_after_space;
308 312 } else {
309 313 QTC::TC("qpdf", "QPDF hint table length direct");
310 314 }
... ... @@ -441,7 +445,7 @@ QPDF::checkLinearizationInternal()
441 445 for (size_t i = 0; i < toS(npages); ++i) {
442 446 QPDFObjectHandle const& page = pages.at(i);
443 447 QPDFObjGen og(page.getObjGen());
444   - if (m->xref_table.type(og) == 2) {
  448 + if (m->xref_table[og].getType() == 2) {
445 449 linearizationWarning(
446 450 "page dictionary for page " + std::to_string(i) + " is compressed");
447 451 }
... ... @@ -457,11 +461,12 @@ QPDF::checkLinearizationInternal()
457 461 break;
458 462 }
459 463 }
460   - if (m->file->tell() != m->xref_table.first_item_offset()) {
  464 + if (m->file->tell() != m->first_xref_item_offset) {
461 465 QTC::TC("qpdf", "QPDF err /T mismatch");
462 466 linearizationWarning(
463   - "space before first xref item (/T) mismatch (computed = " +
464   - std::to_string(m->xref_table.first_item_offset()) +
  467 + "space before first xref item (/T) mismatch "
  468 + "(computed = " +
  469 + std::to_string(m->first_xref_item_offset) +
465 470 "; file = " + std::to_string(m->file->tell()));
466 471 }
467 472  
... ... @@ -472,7 +477,7 @@ QPDF::checkLinearizationInternal()
472 477 // compressed objects are supposed to be at the end of the containing xref section if any object
473 478 // streams are in use.
474 479  
475   - if (m->xref_table.uncompressed_after_compressed()) {
  480 + if (m->uncompressed_after_compressed) {
476 481 linearizationWarning("linearized file contains an uncompressed object after a compressed "
477 482 "one in a cross-reference stream");
478 483 }
... ... @@ -480,9 +485,18 @@ QPDF::checkLinearizationInternal()
480 485 // Further checking requires optimization and order calculation. Don't allow optimization to
481 486 // make changes. If it has to, then the file is not properly linearized. We use the xref table
482 487 // to figure out which objects are compressed and which are uncompressed.
483   -
484   - optimize(m->xref_table);
485   - calculateLinearizationData(m->xref_table);
  488 + { // local scope
  489 + std::map<int, int> object_stream_data;
  490 + for (auto const& iter: m->xref_table) {
  491 + QPDFObjGen const& og = iter.first;
  492 + QPDFXRefEntry const& entry = iter.second;
  493 + if (entry.getType() == 2) {
  494 + object_stream_data[og.getObj()] = entry.getObjStreamNumber();
  495 + }
  496 + }
  497 + optimize(object_stream_data, false);
  498 + calculateLinearizationData(object_stream_data);
  499 + }
486 500  
487 501 // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
488 502 // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
... ... @@ -499,14 +513,13 @@ QPDF::checkLinearizationInternal()
499 513 qpdf_offset_t max_E = -1;
500 514 for (auto const& oh: m->part6) {
501 515 QPDFObjGen og(oh.getObjGen());
502   - auto before = m->xref_table.end_before_space(og);
503   - auto after = m->xref_table.end_after_space(og);
504   - if (before <= 0) {
  516 + if (m->obj_cache.count(og) == 0) {
505 517 // All objects have to have been dereferenced to be classified.
506 518 throw std::logic_error("linearization part6 object not in cache");
507 519 }
508   - min_E = std::max(min_E, before);
509   - max_E = std::max(max_E, after);
  520 + ObjCache const& oc = m->obj_cache[og];
  521 + min_E = std::max(min_E, oc.end_before_space);
  522 + max_E = std::max(max_E, oc.end_after_space);
510 523 }
511 524 if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) {
512 525 QTC::TC("qpdf", "QPDF warn /E mismatch");
... ... @@ -533,11 +546,10 @@ QPDF::maxEnd(ObjUser const&amp; ou)
533 546 }
534 547 qpdf_offset_t end = 0;
535 548 for (auto const& og: m->obj_user_to_objects[ou]) {
536   - auto e = m->xref_table.end_after_space(og);
537   - if (e <= 0) {
  549 + if (m->obj_cache.count(og) == 0) {
538 550 stopOnError("unknown object referenced in object user table");
539 551 }
540   - end = std::max(end, e);
  552 + end = std::max(end, m->obj_cache[og].end_after_space);
541 553 }
542 554 return end;
543 555 }
... ... @@ -545,18 +557,23 @@ QPDF::maxEnd(ObjUser const&amp; ou)
545 557 qpdf_offset_t
546 558 QPDF::getLinearizationOffset(QPDFObjGen const& og)
547 559 {
548   - switch (m->xref_table.type(og)) {
  560 + QPDFXRefEntry entry = m->xref_table[og];
  561 + qpdf_offset_t result = 0;
  562 + switch (entry.getType()) {
549 563 case 1:
550   - return m->xref_table.offset(og);
  564 + result = entry.getOffset();
  565 + break;
551 566  
552 567 case 2:
553 568 // For compressed objects, return the offset of the object stream that contains them.
554   - return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
  569 + result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
  570 + break;
555 571  
556 572 default:
557 573 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
558   - return 0; // unreachable
  574 + break;
559 575 }
  576 + return result;
560 577 }
561 578  
562 579 QPDFObjectHandle
... ... @@ -571,16 +588,6 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj
571 588 }
572 589  
573 590 QPDFObjectHandle
574   -QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref)
575   -{
576   - auto og = obj.getObjGen();
577   - if (obj.isNull() || xref.type(og) != 2) {
578   - return obj;
579   - }
580   - return getObject(xref.stream_number(og.getObj()), 0);
581   -}
582   -
583   -QPDFObjectHandle
584 591 QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj)
585 592 {
586 593 if (obj.contains(oh)) {
... ... @@ -597,13 +604,15 @@ QPDF::lengthNextN(int first_object, int n)
597 604 int length = 0;
598 605 for (int i = 0; i < n; ++i) {
599 606 QPDFObjGen og(first_object + i, 0);
600   - auto end = m->xref_table.end_after_space(og);
601   - if (end <= 0) {
  607 + if (m->xref_table.count(og) == 0) {
602 608 linearizationWarning(
603 609 "no xref table entry for " + std::to_string(first_object + i) + " 0");
604   - continue;
  610 + } else {
  611 + if (m->obj_cache.count(og) == 0) {
  612 + stopOnError("found unknown object while calculating length for linearization data");
  613 + }
  614 + length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
605 615 }
606   - length += toI(end - getLinearizationOffset(og));
607 616 }
608 617 return length;
609 618 }
... ... @@ -627,7 +636,7 @@ QPDF::checkHPageOffset(
627 636 int npages = toI(pages.size());
628 637 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
629 638 QPDFObjGen first_page_og(pages.at(0).getObjGen());
630   - if (m->xref_table.type(first_page_og) == 0) {
  639 + if (m->xref_table.count(first_page_og) == 0) {
631 640 stopOnError("supposed first page object is not known");
632 641 }
633 642 qpdf_offset_t offset = getLinearizationOffset(first_page_og);
... ... @@ -638,7 +647,7 @@ QPDF::checkHPageOffset(
638 647 for (int pageno = 0; pageno < npages; ++pageno) {
639 648 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
640 649 int first_object = page_og.getObj();
641   - if (m->xref_table.type(page_og) == 0) {
  650 + if (m->xref_table.count(page_og) == 0) {
642 651 stopOnError("unknown object in page offset hint table");
643 652 }
644 653 offset = getLinearizationOffset(page_og);
... ... @@ -760,7 +769,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
760 769 cur_object = so.first_shared_obj;
761 770  
762 771 QPDFObjGen og(cur_object, 0);
763   - if (m->xref_table.type(og) == 0) {
  772 + if (m->xref_table.count(og) == 0) {
764 773 stopOnError("unknown object in shared object hint table");
765 774 }
766 775 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -811,7 +820,7 @@ QPDF::checkHOutlines()
811 820 return;
812 821 }
813 822 QPDFObjGen og(outlines.getObjGen());
814   - if (m->xref_table.type(og) == 0) {
  823 + if (m->xref_table.count(og) == 0) {
815 824 stopOnError("unknown object in outlines hint table");
816 825 }
817 826 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -830,7 +839,8 @@ QPDF::checkHOutlines()
830 839 std::to_string(table_length) + "; computed = " + std::to_string(length));
831 840 }
832 841 } else {
833   - linearizationWarning("incorrect first object number in outline hints table.");
  842 + linearizationWarning("incorrect first object number in outline "
  843 + "hints table.");
834 844 }
835 845 } else {
836 846 linearizationWarning("incorrect object count in outline hint table");
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -2,7 +2,7 @@
2 2  
3 3 #include <qpdf/assert_debug.h>
4 4  
5   -#include <qpdf/QPDF_private.hh>
  5 +#include <qpdf/QPDF.hh>
6 6  
7 7 #include <qpdf/QPDFExc.hh>
8 8 #include <qpdf/QPDFWriter_private.hh>
... ... @@ -78,12 +78,6 @@ QPDF::optimize(
78 78 optimize_internal(obj, true, skip_stream_parameters);
79 79 }
80 80  
81   -void
82   -QPDF::optimize(QPDF::Xref_table const& xref)
83   -{
84   - optimize_internal(xref, false, nullptr);
85   -}
86   -
87 81 template <typename T>
88 82 void
89 83 QPDF::optimize_internal(
... ... @@ -121,13 +115,13 @@ QPDF::optimize_internal(
121 115 }
122 116  
123 117 // Traverse document-level items
124   - for (auto const& key: m->xref_table.trailer().getKeys()) {
  118 + for (auto const& key: m->trailer.getKeys()) {
125 119 if (key == "/Root") {
126 120 // handled separately
127 121 } else {
128 122 updateObjectMaps(
129 123 ObjUser(ObjUser::ou_trailer_key, key),
130   - m->xref_table.trailer().getKey(key),
  124 + m->trailer.getKey(key),
131 125 skip_stream_parameters);
132 126 }
133 127 }
... ... @@ -175,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
175 169 // values for them.
176 170 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
177 171 pushInheritedAttributesToPageInternal(
178   - m->xref_table.trailer().getKey("/Root").getKey("/Pages"),
  172 + m->trailer.getKey("/Root").getKey("/Pages"),
179 173 key_ancestors,
180 174 allow_changes,
181 175 warn_skipped_keys);
182 176 if (!key_ancestors.empty()) {
183   - throw std::logic_error(
184   - "key_ancestors not empty after pushing inherited attributes to pages");
  177 + throw std::logic_error("key_ancestors not empty after"
  178 + " pushing inherited attributes to pages");
185 179 }
186 180 m->pushed_inherited_attributes_to_pages = true;
187 181 m->ever_pushed_inherited_attributes_to_pages = true;
... ... @@ -448,45 +442,3 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj)
448 442 m->obj_user_to_objects = t_obj_user_to_objects;
449 443 m->object_to_obj_users = t_object_to_obj_users;
450 444 }
451   -
452   -void
453   -QPDF::filterCompressedObjects(QPDF::Xref_table const& xref)
454   -{
455   - if (!xref.object_streams()) {
456   - return;
457   - }
458   -
459   - // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
460   - // objects. If something is a user of a compressed object, then it is really a user of the
461   - // object stream that contains it.
462   -
463   - std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
464   - std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
465   -
466   - for (auto const& i1: m->obj_user_to_objects) {
467   - ObjUser const& ou = i1.first;
468   - // Loop over objects.
469   - for (auto const& og: i1.second) {
470   - if (auto stream = xref.stream_number(og.getObj())) {
471   - t_obj_user_to_objects[ou].insert(QPDFObjGen(stream, 0));
472   - } else {
473   - t_obj_user_to_objects[ou].insert(og);
474   - }
475   - }
476   - }
477   -
478   - for (auto const& i1: m->object_to_obj_users) {
479   - QPDFObjGen const& og = i1.first;
480   - // Loop over obj_users.
481   - for (auto const& ou: i1.second) {
482   - if (auto stream = xref.stream_number(og.getObj())) {
483   - t_object_to_obj_users[QPDFObjGen(stream, 0)].insert(ou);
484   - } else {
485   - t_object_to_obj_users[og].insert(ou);
486   - }
487   - }
488   - }
489   -
490   - m->obj_user_to_objects = t_obj_user_to_objects;
491   - m->object_to_obj_users = t_object_to_obj_users;
492   -}
... ...
libqpdf/QPDF_pages.cc
1   -#include <qpdf/QPDF_private.hh>
  1 +#include <qpdf/QPDF.hh>
2 2  
3 3 #include <qpdf/QPDFExc.hh>
4 4 #include <qpdf/QTC.hh>
... ...
libqpdf/qpdf/ObjTable.hh
... ... @@ -46,12 +46,6 @@ class ObjTable: public std::vector&lt;T&gt;
46 46 }
47 47  
48 48 inline T const&
49   - operator[](unsigned int idx) const
50   - {
51   - return element(idx);
52   - }
53   -
54   - inline T const&
55 49 operator[](QPDFObjGen og) const
56 50 {
57 51 return element(static_cast<size_t>(og.getObj()));
... ...
libqpdf/qpdf/QPDFObject_private.hh
... ... @@ -6,13 +6,14 @@
6 6  
7 7 #include <qpdf/Constants.h>
8 8 #include <qpdf/JSON.hh>
  9 +#include <qpdf/QPDF.hh>
9 10 #include <qpdf/QPDFValue.hh>
10   -#include <qpdf/QPDF_private.hh>
11 11 #include <qpdf/Types.h>
12 12  
13 13 #include <string>
14 14 #include <string_view>
15 15  
  16 +class QPDF;
16 17 class QPDFObjectHandle;
17 18  
18 19 class QPDFObject
... ...
libqpdf/qpdf/QPDF_private.hh deleted
1   -#ifndef QPDF_PRIVATE_HH
2   -#define QPDF_PRIVATE_HH
3   -
4   -#include <qpdf/QPDF.hh>
5   -
6   -#include <variant>
7   -
8   -// Xref_table encapsulates the pdf's xref table and trailer.
9   -class QPDF::Xref_table
10   -{
11   - public:
12   - Xref_table(QPDF& qpdf, InputSource* const& file) :
13   - qpdf(qpdf),
14   - file(file)
15   - {
16   - tokenizer.allowEOF();
17   - }
18   -
19   - void initialize();
20   - void initialize_empty();
21   - void initialize_json();
22   - void reconstruct(QPDFExc& e);
23   - void show();
24   - bool resolve();
25   -
26   - QPDFObjectHandle
27   - trailer() const
28   - {
29   - return trailer_;
30   - }
31   -
32   - void
33   - trailer(QPDFObjectHandle&& oh)
34   - {
35   - trailer_ = std::move(oh);
36   - }
37   -
38   - // Returns 0 if og is not in table.
39   - size_t
40   - type(QPDFObjGen og) const
41   - {
42   - int id = og.getObj();
43   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
44   - return 0;
45   - }
46   - auto& e = table[static_cast<size_t>(id)];
47   - return e.gen() == og.getGen() ? e.type() : 0;
48   - }
49   -
50   - // Returns 0 if og is not in table.
51   - size_t
52   - type(size_t id) const noexcept
53   - {
54   - if (id >= table.size()) {
55   - return 0;
56   - }
57   - return table[id].type();
58   - }
59   -
60   - // Returns 0 if og is not in table.
61   - qpdf_offset_t
62   - offset(QPDFObjGen og) const noexcept
63   - {
64   - int id = og.getObj();
65   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
66   - return 0;
67   - }
68   - return table[static_cast<size_t>(id)].offset();
69   - }
70   -
71   - // Returns 0 if id is not in table.
72   - int
73   - stream_number(int id) const noexcept
74   - {
75   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
76   - return 0;
77   - }
78   - return table[static_cast<size_t>(id)].stream_number();
79   - }
80   -
81   - int
82   - stream_index(int id) const noexcept
83   - {
84   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
85   - return 0;
86   - }
87   - return table[static_cast<size_t>(id)].stream_index();
88   - }
89   -
90   - QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
91   -
92   - std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
93   -
94   - bool
95   - object_streams() const noexcept
96   - {
97   - return object_streams_;
98   - }
99   -
100   - // Return a vector of object id and stream number for each compressed object.
101   - std::vector<std::pair<unsigned int, int>>
102   - compressed_objects() const
103   - {
104   - if (!initialized()) {
105   - throw std::logic_error("Xref_table::compressed_objects called before parsing.");
106   - }
107   -
108   - std::vector<std::pair<unsigned int, int>> result;
109   - result.reserve(table.size());
110   -
111   - unsigned int i{0};
112   - for (auto const& item: table) {
113   - if (item.type() == 2) {
114   - result.emplace_back(i, item.stream_number());
115   - }
116   - ++i;
117   - }
118   - return result;
119   - }
120   -
121   - // Temporary access to underlying table size
122   - size_t
123   - size() const noexcept
124   - {
125   - return table.size();
126   - }
127   -
128   - void
129   - ignore_streams(bool val) noexcept
130   - {
131   - ignore_streams_ = val;
132   - }
133   -
134   - bool
135   - initialized() const noexcept
136   - {
137   - return initialized_;
138   - }
139   -
140   - void
141   - attempt_recovery(bool val) noexcept
142   - {
143   - attempt_recovery_ = val;
144   - }
145   -
146   - int
147   - max_id() const noexcept
148   - {
149   - return max_id_;
150   - }
151   -
152   - // For Linearization
153   -
154   - qpdf_offset_t
155   - end_after_space(QPDFObjGen og)
156   - {
157   - auto& e = entry(toS(og.getObj()));
158   - switch (e.type()) {
159   - case 1:
160   - return e.end_after_space_;
161   - case 2:
162   - {
163   - auto es = entry(toS(e.stream_number()));
164   - return es.type() == 1 ? es.end_after_space_ : 0;
165   - }
166   - default:
167   - return 0;
168   - }
169   - }
170   -
171   - qpdf_offset_t
172   - end_before_space(QPDFObjGen og)
173   - {
174   - auto& e = entry(toS(og.getObj()));
175   - switch (e.type()) {
176   - case 1:
177   - return e.end_before_space_;
178   - case 2:
179   - {
180   - auto es = entry(toS(e.stream_number()));
181   - return es.type() == 1 ? es.end_before_space_ : 0;
182   - }
183   - default:
184   - return 0;
185   - }
186   - }
187   -
188   - void
189   - linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
190   - {
191   - if (type(id)) {
192   - table[id].end_before_space_ = before;
193   - table[id].end_after_space_ = after;
194   - }
195   - }
196   -
197   - bool
198   - uncompressed_after_compressed() const noexcept
199   - {
200   - return uncompressed_after_compressed_;
201   - }
202   -
203   - // Actual value from file
204   - qpdf_offset_t
205   - first_item_offset() const noexcept
206   - {
207   - return first_item_offset_;
208   - }
209   -
210   - private:
211   - // Object, count, offset of first entry
212   - typedef std::tuple<int, int, qpdf_offset_t> Subsection;
213   -
214   - struct Uncompressed
215   - {
216   - Uncompressed(qpdf_offset_t offset) :
217   - offset(offset)
218   - {
219   - }
220   - qpdf_offset_t offset;
221   - };
222   -
223   - struct Compressed
224   - {
225   - Compressed(int stream_number, int stream_index) :
226   - stream_number(stream_number),
227   - stream_index(stream_index)
228   - {
229   - }
230   - int stream_number{0};
231   - int stream_index{0};
232   - };
233   -
234   - typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
235   -
236   - struct Entry
237   - {
238   - Entry() = default;
239   -
240   - Entry(int gen, Xref entry) :
241   - gen_(gen),
242   - entry(entry)
243   - {
244   - }
245   -
246   - int
247   - gen() const noexcept
248   - {
249   - return gen_;
250   - }
251   -
252   - size_t
253   - type() const noexcept
254   - {
255   - return entry.index();
256   - }
257   -
258   - qpdf_offset_t
259   - offset() const noexcept
260   - {
261   - return type() == 1 ? std::get<1>(entry).offset : 0;
262   - }
263   -
264   - int
265   - stream_number() const noexcept
266   - {
267   - return type() == 2 ? std::get<2>(entry).stream_number : 0;
268   - }
269   -
270   - int
271   - stream_index() const noexcept
272   - {
273   - return type() == 2 ? std::get<2>(entry).stream_index : 0;
274   - }
275   -
276   - int gen_{0};
277   - Xref entry;
278   - qpdf_offset_t end_before_space_{0};
279   - qpdf_offset_t end_after_space_{0};
280   - };
281   -
282   - Entry&
283   - entry(size_t id)
284   - {
285   - return id < table.size() ? table[id] : table[0];
286   - }
287   -
288   - void read(qpdf_offset_t offset);
289   -
290   - // Methods to parse tables
291   - qpdf_offset_t process_section(qpdf_offset_t offset);
292   - std::vector<Subsection> subsections(std::string& line);
293   - std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
294   - Subsection subsection(std::string const& line);
295   - bool read_entry(qpdf_offset_t& f1, int& f2, char& type);
296   - bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type);
297   -
298   - // Methods to parse streams
299   - qpdf_offset_t read_stream(qpdf_offset_t offset);
300   - qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
301   - std::pair<int, std::array<int, 3>>
302   - process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
303   - std::pair<int, size_t> process_Size(
304   - QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
305   - std::pair<int, std::vector<std::pair<int, int>>> process_Index(
306   - QPDFObjectHandle& dict,
307   - int max_num_entries,
308   - std::function<QPDFExc(std::string_view)> damaged);
309   -
310   - QPDFObjectHandle read_trailer();
311   -
312   - QPDFTokenizer::Token
313   - read_token(size_t max_len = 0)
314   - {
315   - return tokenizer.readToken(*file, "", true, max_len);
316   - }
317   -
318   - // Methods to insert table entries
319   - void insert(int obj, int f0, qpdf_offset_t f1, int f2);
320   - void insert_free(QPDFObjGen);
321   -
322   - QPDFExc
323   - damaged_pdf(std::string const& msg)
324   - {
325   - return qpdf.damagedPDF("", 0, msg);
326   - }
327   -
328   - QPDFExc
329   - damaged_table(std::string const& msg)
330   - {
331   - return qpdf.damagedPDF("xref table", msg);
332   - }
333   -
334   - void
335   - warn_damaged(std::string const& msg)
336   - {
337   - qpdf.warn(damaged_pdf(msg));
338   - }
339   -
340   - QPDF& qpdf;
341   - InputSource* const& file;
342   - QPDFTokenizer tokenizer;
343   -
344   - std::vector<Entry> table;
345   - QPDFObjectHandle trailer_;
346   -
347   - bool attempt_recovery_{true};
348   - bool initialized_{false};
349   - bool ignore_streams_{false};
350   - bool reconstructed_{false};
351   - bool object_streams_{false};
352   - // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
353   - // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
354   - // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
355   - // table after reconstruction.
356   - int max_id_{std::numeric_limits<int>::max() - 1};
357   -
358   - // Linearization data
359   - bool uncompressed_after_compressed_{false};
360   - qpdf_offset_t first_item_offset_{0}; // actual value from file
361   -};
362   -
363   -// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
364   -// references.
365   -class QPDF::Resolver
366   -{
367   - friend class QPDFObject;
368   - friend class QPDF_Unresolved;
369   -
370   - private:
371   - static QPDFObject*
372   - resolved(QPDF* qpdf, QPDFObjGen og)
373   - {
374   - return qpdf->resolve(og);
375   - }
376   -};
377   -
378   -// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
379   -class QPDF::StreamCopier
380   -{
381   - friend class QPDFObjectHandle;
382   -
383   - private:
384   - static void
385   - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
386   - {
387   - qpdf->copyStreamData(dest, src);
388   - }
389   -};
390   -
391   -// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
392   -// special access to allow the parser to create unresolved objects and dangling references.
393   -class QPDF::ParseGuard
394   -{
395   - friend class QPDFParser;
396   -
397   - private:
398   - ParseGuard(QPDF* qpdf) :
399   - qpdf(qpdf)
400   - {
401   - if (qpdf) {
402   - qpdf->inParse(true);
403   - }
404   - }
405   -
406   - static std::shared_ptr<QPDFObject>
407   - getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
408   - {
409   - return qpdf->getObjectForParser(id, gen, parse_pdf);
410   - }
411   -
412   - ~ParseGuard()
413   - {
414   - if (qpdf) {
415   - qpdf->inParse(false);
416   - }
417   - }
418   - QPDF* qpdf;
419   -};
420   -
421   -// Pipe class is restricted to QPDF_Stream.
422   -class QPDF::Pipe
423   -{
424   - friend class QPDF_Stream;
425   -
426   - private:
427   - static bool
428   - pipeStreamData(
429   - QPDF* qpdf,
430   - QPDFObjGen const& og,
431   - qpdf_offset_t offset,
432   - size_t length,
433   - QPDFObjectHandle dict,
434   - Pipeline* pipeline,
435   - bool suppress_warnings,
436   - bool will_retry)
437   - {
438   - return qpdf->pipeStreamData(
439   - og, offset, length, dict, pipeline, suppress_warnings, will_retry);
440   - }
441   -};
442   -
443   -class QPDF::ObjCache
444   -{
445   - public:
446   - ObjCache() = default;
447   -
448   - ObjCache(std::shared_ptr<QPDFObject> object) :
449   - object(object)
450   - {
451   - }
452   -
453   - std::shared_ptr<QPDFObject> object;
454   -};
455   -
456   -class QPDF::ObjCopier
457   -{
458   - public:
459   - std::map<QPDFObjGen, QPDFObjectHandle> object_map;
460   - std::vector<QPDFObjectHandle> to_copy;
461   - QPDFObjGen::set visiting;
462   -};
463   -
464   -class QPDF::EncryptionParameters
465   -{
466   - friend class QPDF;
467   -
468   - public:
469   - EncryptionParameters();
470   -
471   - private:
472   - bool encrypted;
473   - bool encryption_initialized;
474   - int encryption_V;
475   - int encryption_R;
476   - bool encrypt_metadata;
477   - std::map<std::string, encryption_method_e> crypt_filters;
478   - encryption_method_e cf_stream;
479   - encryption_method_e cf_string;
480   - encryption_method_e cf_file;
481   - std::string provided_password;
482   - std::string user_password;
483   - std::string encryption_key;
484   - std::string cached_object_encryption_key;
485   - QPDFObjGen cached_key_og;
486   - bool user_password_matched;
487   - bool owner_password_matched;
488   -};
489   -
490   -class QPDF::ForeignStreamData
491   -{
492   - friend class QPDF;
493   -
494   - public:
495   - ForeignStreamData(
496   - std::shared_ptr<EncryptionParameters> encp,
497   - std::shared_ptr<InputSource> file,
498   - QPDFObjGen const& foreign_og,
499   - qpdf_offset_t offset,
500   - size_t length,
501   - QPDFObjectHandle local_dict);
502   -
503   - private:
504   - std::shared_ptr<EncryptionParameters> encp;
505   - std::shared_ptr<InputSource> file;
506   - QPDFObjGen foreign_og;
507   - qpdf_offset_t offset;
508   - size_t length;
509   - QPDFObjectHandle local_dict;
510   -};
511   -
512   -class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
513   -{
514   - public:
515   - CopiedStreamDataProvider(QPDF& destination_qpdf);
516   - ~CopiedStreamDataProvider() override = default;
517   - bool provideStreamData(
518   - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override;
519   - void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);
520   - void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);
521   -
522   - private:
523   - QPDF& destination_qpdf;
524   - std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
525   - std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;
526   -};
527   -
528   -class QPDF::StringDecrypter: public QPDFObjectHandle::StringDecrypter
529   -{
530   - friend class QPDF;
531   -
532   - public:
533   - StringDecrypter(QPDF* qpdf, QPDFObjGen const& og);
534   - ~StringDecrypter() override = default;
535   - void decryptString(std::string& val) override;
536   -
537   - private:
538   - QPDF* qpdf;
539   - QPDFObjGen og;
540   -};
541   -
542   -// PDF 1.4: Table F.4
543   -struct QPDF::HPageOffsetEntry
544   -{
545   - int delta_nobjects{0}; // 1
546   - qpdf_offset_t delta_page_length{0}; // 2
547   - // vectors' sizes = nshared_objects
548   - int nshared_objects{0}; // 3
549   - std::vector<int> shared_identifiers; // 4
550   - std::vector<int> shared_numerators; // 5
551   - qpdf_offset_t delta_content_offset{0}; // 6
552   - qpdf_offset_t delta_content_length{0}; // 7
553   -};
554   -
555   -// PDF 1.4: Table F.3
556   -struct QPDF::HPageOffset
557   -{
558   - int min_nobjects{0}; // 1
559   - qpdf_offset_t first_page_offset{0}; // 2
560   - int nbits_delta_nobjects{0}; // 3
561   - int min_page_length{0}; // 4
562   - int nbits_delta_page_length{0}; // 5
563   - int min_content_offset{0}; // 6
564   - int nbits_delta_content_offset{0}; // 7
565   - int min_content_length{0}; // 8
566   - int nbits_delta_content_length{0}; // 9
567   - int nbits_nshared_objects{0}; // 10
568   - int nbits_shared_identifier{0}; // 11
569   - int nbits_shared_numerator{0}; // 12
570   - int shared_denominator{0}; // 13
571   - // vector size is npages
572   - std::vector<HPageOffsetEntry> entries;
573   -};
574   -
575   -// PDF 1.4: Table F.6
576   -struct QPDF::HSharedObjectEntry
577   -{
578   - // Item 3 is a 128-bit signature (unsupported by Acrobat)
579   - int delta_group_length{0}; // 1
580   - int signature_present{0}; // 2 -- always 0
581   - int nobjects_minus_one{0}; // 4 -- always 0
582   -};
583   -
584   -// PDF 1.4: Table F.5
585   -struct QPDF::HSharedObject
586   -{
587   - int first_shared_obj{0}; // 1
588   - qpdf_offset_t first_shared_offset{0}; // 2
589   - int nshared_first_page{0}; // 3
590   - int nshared_total{0}; // 4
591   - int nbits_nobjects{0}; // 5
592   - int min_group_length{0}; // 6
593   - int nbits_delta_group_length{0}; // 7
594   - // vector size is nshared_total
595   - std::vector<HSharedObjectEntry> entries;
596   -};
597   -
598   -// PDF 1.4: Table F.9
599   -struct QPDF::HGeneric
600   -{
601   - int first_object{0}; // 1
602   - qpdf_offset_t first_object_offset{0}; // 2
603   - int nobjects{0}; // 3
604   - int group_length{0}; // 4
605   -};
606   -
607   -// Other linearization data structures
608   -
609   -// Initialized from Linearization Parameter dictionary
610   -struct QPDF::LinParameters
611   -{
612   - qpdf_offset_t file_size{0}; // /L
613   - int first_page_object{0}; // /O
614   - qpdf_offset_t first_page_end{0}; // /E
615   - int npages{0}; // /N
616   - qpdf_offset_t xref_zero_offset{0}; // /T
617   - int first_page{0}; // /P
618   - qpdf_offset_t H_offset{0}; // offset of primary hint stream
619   - qpdf_offset_t H_length{0}; // length of primary hint stream
620   -};
621   -
622   -// Computed hint table value data structures. These tables contain the computed values on which
623   -// the hint table values are based. They exclude things like number of bits and store actual
624   -// values instead of mins and deltas. File offsets are also absolute rather than being offset
625   -// by the size of the primary hint table. We populate the hint table structures from these
626   -// during writing and compare the hint table values with these during validation. We ignore
627   -// some values for various reasons described in the code. Those values are omitted from these
628   -// structures. Note also that object numbers are object numbers from the input file, not the
629   -// output file.
630   -
631   -// Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
632   -
633   -struct QPDF::CHPageOffsetEntry
634   -{
635   - int nobjects{0};
636   - int nshared_objects{0};
637   - // vectors' sizes = nshared_objects
638   - std::vector<int> shared_identifiers;
639   -};
640   -
641   -struct QPDF::CHPageOffset
642   -{
643   - // vector size is npages
644   - std::vector<CHPageOffsetEntry> entries;
645   -};
646   -
647   -struct QPDF::CHSharedObjectEntry
648   -{
649   - CHSharedObjectEntry(int object) :
650   - object(object)
651   - {
652   - }
653   -
654   - int object;
655   -};
656   -
657   -// PDF 1.4: Table F.5
658   -struct QPDF::CHSharedObject
659   -{
660   - int first_shared_obj{0};
661   - int nshared_first_page{0};
662   - int nshared_total{0};
663   - // vector size is nshared_total
664   - std::vector<CHSharedObjectEntry> entries;
665   -};
666   -
667   -// No need for CHGeneric -- HGeneric is fine as is.
668   -
669   -// Data structures to support optimization -- implemented in QPDF_optimization.cc
670   -
671   -class QPDF::ObjUser
672   -{
673   - public:
674   - enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
675   -
676   - // type is set to ou_bad
677   - ObjUser();
678   -
679   - // type must be ou_root
680   - ObjUser(user_e type);
681   -
682   - // type must be one of ou_page or ou_thumb
683   - ObjUser(user_e type, int pageno);
684   -
685   - // type must be one of ou_trailer_key or ou_root_key
686   - ObjUser(user_e type, std::string const& key);
687   -
688   - bool operator<(ObjUser const&) const;
689   -
690   - user_e ou_type;
691   - int pageno; // if ou_page;
692   - std::string key; // if ou_trailer_key or ou_root_key
693   -};
694   -
695   -struct QPDF::UpdateObjectMapsFrame
696   -{
697   - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
698   -
699   - ObjUser const& ou;
700   - QPDFObjectHandle oh;
701   - bool top;
702   -};
703   -
704   -class QPDF::PatternFinder: public InputSource::Finder
705   -{
706   - public:
707   - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
708   - qpdf(qpdf),
709   - checker(checker)
710   - {
711   - }
712   - ~PatternFinder() override = default;
713   - bool
714   - check() override
715   - {
716   - return (this->qpdf.*checker)();
717   - }
718   -
719   - private:
720   - QPDF& qpdf;
721   - bool (QPDF::*checker)();
722   -};
723   -
724   -class QPDF::Members
725   -{
726   - friend class QPDF;
727   - friend class ResolveRecorder;
728   -
729   - public:
730   - QPDF_DLL
731   - ~Members() = default;
732   -
733   - private:
734   - Members(QPDF& qpdf);
735   - Members(Members const&) = delete;
736   -
737   - std::shared_ptr<QPDFLogger> log;
738   - unsigned long long unique_id{0};
739   - QPDFTokenizer tokenizer;
740   - // Filename to use if there is no input PDF
741   - std::string no_input_name{"closed input source"};
742   - // If file_sp is updated, file must also be updated.
743   - std::shared_ptr<InputSource> file_sp;
744   - InputSource* file;
745   - std::string last_object_description;
746   - bool provided_password_is_hex_key{false};
747   - bool suppress_warnings{false};
748   - size_t max_warnings{0};
749   - bool attempt_recovery{true};
750   - bool check_mode{false};
751   - std::shared_ptr<EncryptionParameters> encp;
752   - std::string pdf_version;
753   - Xref_table xref_table;
754   - std::map<QPDFObjGen, ObjCache> obj_cache;
755   - std::set<QPDFObjGen> resolving;
756   - std::vector<QPDFObjectHandle> all_pages;
757   - bool invalid_page_found{false};
758   - std::map<QPDFObjGen, int> pageobj_to_pages_pos;
759   - bool pushed_inherited_attributes_to_pages{false};
760   - bool ever_pushed_inherited_attributes_to_pages{false};
761   - bool ever_called_get_all_pages{false};
762   - std::vector<QPDFExc> warnings;
763   - std::map<unsigned long long, ObjCopier> object_copiers;
764   - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
765   - // copied_stream_data_provider is owned by copied_streams
766   - CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
767   - bool fixed_dangling_refs{false};
768   - bool immediate_copy_from{false};
769   - bool in_parse{false};
770   - std::set<int> resolved_object_streams;
771   -
772   - // Linearization data
773   - bool linearization_warnings{false};
774   -
775   - // Linearization parameter dictionary and hint table data: may be read from file or computed
776   - // prior to writing a linearized file
777   - QPDFObjectHandle lindict;
778   - LinParameters linp;
779   - HPageOffset page_offset_hints;
780   - HSharedObject shared_object_hints;
781   - HGeneric outline_hints;
782   -
783   - // Computed linearization data: used to populate above tables during writing and to compare
784   - // with them during validation. c_ means computed.
785   - LinParameters c_linp;
786   - CHPageOffset c_page_offset_data;
787   - CHSharedObject c_shared_object_data;
788   - HGeneric c_outline_data;
789   -
790   - // Object ordering data for linearized files: initialized by calculateLinearizationData().
791   - // Part numbers refer to the PDF 1.4 specification.
792   - std::vector<QPDFObjectHandle> part4;
793   - std::vector<QPDFObjectHandle> part6;
794   - std::vector<QPDFObjectHandle> part7;
795   - std::vector<QPDFObjectHandle> part8;
796   - std::vector<QPDFObjectHandle> part9;
797   -
798   - // Optimization data
799   - std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
800   - std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
801   -};
802   -
803   -// JobSetter class is restricted to QPDFJob.
804   -class QPDF::JobSetter
805   -{
806   - friend class QPDFJob;
807   -
808   - private:
809   - // Enable enhanced warnings for pdf file checking.
810   - static void
811   - setCheckMode(QPDF& qpdf, bool val)
812   - {
813   - qpdf.m->check_mode = val;
814   - }
815   -};
816   -
817   -class QPDF::ResolveRecorder
818   -{
819   - public:
820   - ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) :
821   - qpdf(qpdf),
822   - iter(qpdf->m->resolving.insert(og).first)
823   - {
824   - }
825   - virtual ~ResolveRecorder()
826   - {
827   - this->qpdf->m->resolving.erase(iter);
828   - }
829   -
830   - private:
831   - QPDF* qpdf;
832   - std::set<QPDFObjGen>::const_iterator iter;
833   -};
834   -
835   -// Writer class is restricted to QPDFWriter so that only it can call certain methods.
836   -class QPDF::Writer
837   -{
838   - friend class QPDFWriter;
839   -
840   - private:
841   - static void
842   - optimize(
843   - QPDF& qpdf,
844   - QPDFWriter::ObjTable const& obj,
845   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
846   - {
847   - return qpdf.optimize(obj, skip_stream_parameters);
848   - }
849   -
850   - static void
851   - getLinearizedParts(
852   - QPDF& qpdf,
853   - QPDFWriter::ObjTable const& obj,
854   - std::vector<QPDFObjectHandle>& part4,
855   - std::vector<QPDFObjectHandle>& part6,
856   - std::vector<QPDFObjectHandle>& part7,
857   - std::vector<QPDFObjectHandle>& part8,
858   - std::vector<QPDFObjectHandle>& part9)
859   - {
860   - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
861   - }
862   -
863   - static void
864   - generateHintStream(
865   - QPDF& qpdf,
866   - QPDFWriter::NewObjTable const& new_obj,
867   - QPDFWriter::ObjTable const& obj,
868   - std::shared_ptr<Buffer>& hint_stream,
869   - int& S,
870   - int& O,
871   - bool compressed)
872   - {
873   - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
874   - }
875   -
876   - static std::vector<QPDFObjGen>
877   - getCompressibleObjGens(QPDF& qpdf)
878   - {
879   - return qpdf.getCompressibleObjVector();
880   - }
881   -
882   - static std::vector<bool>
883   - getCompressibleObjSet(QPDF& qpdf)
884   - {
885   - return qpdf.getCompressibleObjSet();
886   - }
887   -
888   - static Xref_table const&
889   - getXRefTable(QPDF& qpdf)
890   - {
891   - return qpdf.m->xref_table;
892   - }
893   -
894   - static size_t
895   - tableSize(QPDF& qpdf)
896   - {
897   - return qpdf.tableSize();
898   - }
899   -};
900   -
901   -#endif // QPDF_PRIVATE_HH
libqpdf/qpdf/qpdf-c_impl.hh
... ... @@ -16,7 +16,7 @@ struct _qpdf_data
16 16 _qpdf_data() = default;
17 17  
18 18 _qpdf_data(std::unique_ptr<QPDF>&& qpdf) :
19   - qpdf(std::move(qpdf)){};
  19 + qpdf(std::move(qpdf)) {};
20 20  
21 21 ~_qpdf_data() = default;
22 22  
... ...
qpdf/qpdf.testcov
... ... @@ -48,6 +48,7 @@ QPDFWriter encrypted hint stream 0
48 48 QPDF opt inherited scalar 0
49 49 QPDF xref reused object 0
50 50 QPDF xref gen > 0 1
  51 +QPDF xref size mismatch 0
51 52 QPDF not a pdf file 0
52 53 QPDF can't find startxref 0
53 54 QPDF invalid xref 0
... ... @@ -104,6 +105,7 @@ QPDFWriter not recompressing /FlateDecode 0
104 105 QPDF_encryption xref stream from encrypted file 0
105 106 QPDFJob unable to filter 0
106 107 QUtil non-trivial UTF-16 0
  108 +QPDF xref overwrite object 0
107 109 QPDF xref overwrite invalid objgen 0
108 110 QPDF decoding error warning 0
109 111 qpdf-c called qpdf_init 0
... ... @@ -435,6 +437,7 @@ QPDF xref skipped space 0
435 437 QPDF eof skipping spaces before xref 1
436 438 QPDF_encryption user matches owner V < 5 0
437 439 QPDF_encryption same password 1
  440 +QPDFWriter stream in ostream 0
438 441 QPDFParser duplicate dict key 0
439 442 QPDFWriter no encryption sig contents 0
440 443 QPDFPageObjectHelper colorspace lookup 0
... ...
qpdf/qtest/qpdf/bad12-recover.out
  1 +WARNING: bad12.pdf: reported number of objects (9) is not one plus the highest object number (7)
1 2 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj
2 3 /QTest is implicit
3 4 /QTest is direct and has type null (2)
... ...
qpdf/qtest/qpdf/bad12.out
  1 +WARNING: bad12.pdf: reported number of objects (9) is not one plus the highest object number (7)
1 2 WARNING: bad12.pdf (object 2 0, offset 128): expected endobj
2 3 /QTest is implicit
3 4 /QTest is direct and has type null (2)
... ...
qpdf/qtest/qpdf/fuzz-16214.out
... ... @@ -11,9 +11,11 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
11 11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
12 12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
13 13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
14   -WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
15   -WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
  14 +WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
  15 +WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
16 16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
17 17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
18 18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
  19 +WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
  20 +WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
19 21 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/issue-147.out
... ... @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header
2 2 WARNING: issue-147.pdf: file is damaged
3 3 WARNING: issue-147.pdf: can't find startxref
4 4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
5   -WARNING: issue-147.pdf: ignoring object with impossibly large id 62
6 5 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
7   -qpdf: issue-147.pdf: unable to find /Root dictionary
  6 +WARNING: issue-147.pdf: ignoring object with impossibly large id 62
  7 +qpdf: issue-147.pdf: unable to find objects while recovering damaged file
... ...
qpdf/qtest/qpdf/issue-335b.out
1 1 WARNING: issue-335b.pdf: can't find PDF header
2 2 WARNING: issue-335b.pdf: file is damaged
3   -WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry
  3 +WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6)
4 4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table
5 5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file
... ...
qpdf/qtest/qpdf/recover-xref-stream.out
1 1 WARNING: recover-xref-stream.pdf: file is damaged
2 2 WARNING: recover-xref-stream.pdf: can't find startxref
3 3 WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table
  4 +WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15)
4 5 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/recover-xref-stream.pdf
No preview for this file type
qpdf/qtest/qpdf/xref-errors.out
... ... @@ -3,11 +3,6 @@ WARNING: xref-errors.pdf (xref table, offset 606): accepting invalid xref table
3 3 WARNING: xref-errors.pdf (xref table, offset 627): accepting invalid xref table entry
4 4 WARNING: xref-errors.pdf (xref table, offset 648): accepting invalid xref table entry
5 5 WARNING: xref-errors.pdf (xref table, offset 667): accepting invalid xref table entry
6   -WARNING: xref-errors.pdf (xref table, offset 585): accepting invalid xref table entry
7   -WARNING: xref-errors.pdf (xref table, offset 606): accepting invalid xref table entry
8   -WARNING: xref-errors.pdf (xref table, offset 627): accepting invalid xref table entry
9   -WARNING: xref-errors.pdf (xref table, offset 648): accepting invalid xref table entry
10   -WARNING: xref-errors.pdf (xref table, offset 667): accepting invalid xref table entry
11 6 checking xref-errors.pdf
12 7 PDF Version: 1.3
13 8 File is not encrypted
... ...
qpdf/qtest/specific-bugs.test
... ... @@ -16,7 +16,7 @@ my $td = new TestDriver(&#39;specific-bugs&#39;);
16 16  
17 17 # The number is the github issue number in which the bug was reported.
18 18 my @bug_tests = (
19   -# ["51", "resolve loop", 2],
  19 + ["51", "resolve loop", 2],
20 20 ["99", "object 0", 2],
21 21 ["99b", "object 0", 2],
22 22 ["100", "xref reconstruction loop", 2],
... ... @@ -28,7 +28,7 @@ my @bug_tests = (
28 28 ["106", "zlib data error", 3],
29 29 ["141a", "/W entry size 0", 2],
30 30 ["141b", "/W entry size 0", 2],
31   -# ["143", "self-referential ostream", 2, "--preserve-unreferenced"],
  31 + ["143", "self-referential ostream", 2, "--preserve-unreferenced"],
32 32 ["146", "very deeply nested array", 2],
33 33 ["147", "previously caused memory error", 2],
34 34 ["148", "free memory on bad flate", 2],
... ... @@ -38,7 +38,7 @@ my @bug_tests = (
38 38 ["263", "empty xref stream", 2],
39 39 ["335a", "ozz-fuzz-12152", 2],
40 40 ["335b", "ozz-fuzz-14845", 2],
41   -# ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
  41 + ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
42 42 # When adding to this list, consider adding to CORPUS_FROM_TEST in
43 43 # fuzz/CMakeLists.txt and updating the count in
44 44 # fuzz/qtest/fuzz.test.
... ...