Commit 2532db5737aedb39cc17a97f8d2510ff3d4779bb

Authored by m-holger
Committed by GitHub
2 parents 807dbf42 8b379756

Merge pull request #1382 from m-holger/tokenizer

Use Tokenizer instead of QPDFTokenizer internally in qpdf
include/qpdf/BufferInputSource.hh
... ... @@ -30,6 +30,8 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource
30 30 // Otherwise, the caller owns the memory.
31 31 QPDF_DLL
32 32 BufferInputSource(std::string const& description, Buffer* buf, bool own_memory = false);
  33 +
  34 + // NB This overload copies the string contents.
33 35 QPDF_DLL
34 36 BufferInputSource(std::string const& description, std::string const& contents);
35 37 QPDF_DLL
... ...
include/qpdf/QPDF.hh
... ... @@ -726,167 +726,14 @@ class QPDF
726 726 void removePage(QPDFObjectHandle page);
727 727 // End legacy page helpers
728 728  
729   - // Writer class is restricted to QPDFWriter so that only it can call certain methods.
730   - class Writer
731   - {
732   - friend class QPDFWriter;
733   -
734   - private:
735   - static void
736   - optimize(
737   - QPDF& qpdf,
738   - QPDFWriter::ObjTable const& obj,
739   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
740   - {
741   - return qpdf.optimize(obj, skip_stream_parameters);
742   - }
743   -
744   - static void
745   - getLinearizedParts(
746   - QPDF& qpdf,
747   - QPDFWriter::ObjTable const& obj,
748   - std::vector<QPDFObjectHandle>& part4,
749   - std::vector<QPDFObjectHandle>& part6,
750   - std::vector<QPDFObjectHandle>& part7,
751   - std::vector<QPDFObjectHandle>& part8,
752   - std::vector<QPDFObjectHandle>& part9)
753   - {
754   - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
755   - }
756   -
757   - static void
758   - generateHintStream(
759   - QPDF& qpdf,
760   - QPDFWriter::NewObjTable const& new_obj,
761   - QPDFWriter::ObjTable const& obj,
762   - std::shared_ptr<Buffer>& hint_stream,
763   - int& S,
764   - int& O,
765   - bool compressed)
766   - {
767   - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
768   - }
769   -
770   - static std::vector<QPDFObjGen>
771   - getCompressibleObjGens(QPDF& qpdf)
772   - {
773   - return qpdf.getCompressibleObjVector();
774   - }
775   -
776   - static std::vector<bool>
777   - getCompressibleObjSet(QPDF& qpdf)
778   - {
779   - return qpdf.getCompressibleObjSet();
780   - }
781   -
782   - static std::map<QPDFObjGen, QPDFXRefEntry> const&
783   - getXRefTable(QPDF& qpdf)
784   - {
785   - return qpdf.getXRefTableInternal();
786   - }
  729 + // End of the public API. The following classes and methods are for qpdf internal use only.
787 730  
788   - static size_t
789   - tableSize(QPDF& qpdf)
790   - {
791   - return qpdf.tableSize();
792   - }
793   - };
794   -
795   - // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
796   - // references.
797   - class Resolver
798   - {
799   - friend class QPDFObject;
800   - friend class QPDF_Unresolved;
801   - friend class qpdf::BaseHandle;
802   -
803   - private:
804   - static std::shared_ptr<QPDFObject> const&
805   - resolved(QPDF* qpdf, QPDFObjGen og)
806   - {
807   - return qpdf->resolve(og);
808   - }
809   - };
810   -
811   - // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
812   - class StreamCopier
813   - {
814   - friend class QPDFObjectHandle;
815   -
816   - private:
817   - static void
818   - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
819   - {
820   - qpdf->copyStreamData(dest, src);
821   - }
822   - };
823   -
824   - // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
825   - // special access to allow the parser to create unresolved objects and dangling references.
826   - class ParseGuard
827   - {
828   - friend class QPDFParser;
829   -
830   - private:
831   - ParseGuard(QPDF* qpdf) :
832   - qpdf(qpdf)
833   - {
834   - if (qpdf) {
835   - qpdf->inParse(true);
836   - }
837   - }
838   -
839   - static std::shared_ptr<QPDFObject>
840   - getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
841   - {
842   - return qpdf->getObjectForParser(id, gen, parse_pdf);
843   - }
844   -
845   - ~ParseGuard()
846   - {
847   - if (qpdf) {
848   - qpdf->inParse(false);
849   - }
850   - }
851   - QPDF* qpdf;
852   - };
853   -
854   - // Pipe class is restricted to QPDF_Stream.
855   - class Pipe
856   - {
857   - friend class QPDF_Stream;
858   - friend class qpdf::Stream;
859   -
860   - private:
861   - static bool
862   - pipeStreamData(
863   - QPDF* qpdf,
864   - QPDFObjGen og,
865   - qpdf_offset_t offset,
866   - size_t length,
867   - QPDFObjectHandle dict,
868   - Pipeline* pipeline,
869   - bool suppress_warnings,
870   - bool will_retry)
871   - {
872   - return qpdf->pipeStreamData(
873   - og, offset, length, dict, pipeline, suppress_warnings, will_retry);
874   - }
875   - };
876   -
877   - // JobSetter class is restricted to QPDFJob.
878   - class JobSetter
879   - {
880   - friend class QPDFJob;
881   -
882   - private:
883   - // Enable enhanced warnings for pdf file checking.
884   - static void
885   - setCheckMode(QPDF& qpdf, bool val)
886   - {
887   - qpdf.m->check_mode = val;
888   - }
889   - };
  731 + class Writer;
  732 + class Resolver;
  733 + class StreamCopier;
  734 + class ParseGuard;
  735 + class Pipe;
  736 + class JobSetter;
890 737  
891 738 // For testing only -- do not add to DLL
892 739 static bool test_json_validators();
... ... @@ -901,136 +748,13 @@ class QPDF
901 748  
902 749 static std::string const qpdf_version;
903 750  
904   - class ObjCache
905   - {
906   - public:
907   - ObjCache() :
908   - end_before_space(0),
909   - end_after_space(0)
910   - {
911   - }
912   - ObjCache(
913   - std::shared_ptr<QPDFObject> object,
914   - qpdf_offset_t end_before_space = 0,
915   - qpdf_offset_t end_after_space = 0) :
916   - object(object),
917   - end_before_space(end_before_space),
918   - end_after_space(end_after_space)
919   - {
920   - }
921   -
922   - std::shared_ptr<QPDFObject> object;
923   - qpdf_offset_t end_before_space;
924   - qpdf_offset_t end_after_space;
925   - };
926   -
927   - class ObjCopier
928   - {
929   - public:
930   - std::map<QPDFObjGen, QPDFObjectHandle> object_map;
931   - std::vector<QPDFObjectHandle> to_copy;
932   - QPDFObjGen::set visiting;
933   - };
934   -
935   - class EncryptionParameters
936   - {
937   - friend class QPDF;
938   -
939   - public:
940   - EncryptionParameters();
941   -
942   - private:
943   - bool encrypted;
944   - bool encryption_initialized;
945   - int encryption_V;
946   - int encryption_R;
947   - bool encrypt_metadata;
948   - std::map<std::string, encryption_method_e> crypt_filters;
949   - encryption_method_e cf_stream;
950   - encryption_method_e cf_string;
951   - encryption_method_e cf_file;
952   - std::string provided_password;
953   - std::string user_password;
954   - std::string encryption_key;
955   - std::string cached_object_encryption_key;
956   - QPDFObjGen cached_key_og;
957   - bool user_password_matched;
958   - bool owner_password_matched;
959   - };
960   -
961   - class ForeignStreamData
962   - {
963   - friend class QPDF;
964   -
965   - public:
966   - ForeignStreamData(
967   - std::shared_ptr<EncryptionParameters> encp,
968   - std::shared_ptr<InputSource> file,
969   - QPDFObjGen foreign_og,
970   - qpdf_offset_t offset,
971   - size_t length,
972   - QPDFObjectHandle local_dict);
973   -
974   - private:
975   - std::shared_ptr<EncryptionParameters> encp;
976   - std::shared_ptr<InputSource> file;
977   - QPDFObjGen foreign_og;
978   - qpdf_offset_t offset;
979   - size_t length;
980   - QPDFObjectHandle local_dict;
981   - };
982   -
983   - class CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
984   - {
985   - public:
986   - CopiedStreamDataProvider(QPDF& destination_qpdf);
987   - ~CopiedStreamDataProvider() override = default;
988   - bool provideStreamData(
989   - QPDFObjGen const& og,
990   - Pipeline* pipeline,
991   - bool suppress_warnings,
992   - bool will_retry) override;
993   - void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);
994   - void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);
995   -
996   - private:
997   - QPDF& destination_qpdf;
998   - std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
999   - std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;
1000   - };
1001   -
1002   - class StringDecrypter: public QPDFObjectHandle::StringDecrypter
1003   - {
1004   - friend class QPDF;
1005   -
1006   - public:
1007   - StringDecrypter(QPDF* qpdf, QPDFObjGen og);
1008   - ~StringDecrypter() override = default;
1009   - void decryptString(std::string& val) override;
1010   -
1011   - private:
1012   - QPDF* qpdf;
1013   - QPDFObjGen og;
1014   - };
1015   -
1016   - class ResolveRecorder
1017   - {
1018   - public:
1019   - ResolveRecorder(QPDF* qpdf, QPDFObjGen og) :
1020   - qpdf(qpdf),
1021   - iter(qpdf->m->resolving.insert(og).first)
1022   - {
1023   - }
1024   - virtual ~ResolveRecorder()
1025   - {
1026   - this->qpdf->m->resolving.erase(iter);
1027   - }
1028   -
1029   - private:
1030   - QPDF* qpdf;
1031   - std::set<QPDFObjGen>::const_iterator iter;
1032   - };
1033   -
  751 + class ObjCache;
  752 + class ObjCopier;
  753 + class EncryptionParameters;
  754 + class ForeignStreamData;
  755 + class CopiedStreamDataProvider;
  756 + class StringDecrypter;
  757 + class ResolveRecorder;
1034 758 class JSONReactor;
1035 759  
1036 760 void parse(char const* password);
... ... @@ -1200,200 +924,19 @@ class QPDF
1200 924 replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top);
1201 925 void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream);
1202 926  
1203   - // Linearization Hint table structures.
1204   - // Naming conventions:
1205   -
1206   - // HSomething is the Something Hint Table or table header
1207   - // HSomethingEntry is an entry in the Something table
1208   -
1209   - // delta_something + min_something = something
1210   - // nbits_something = number of bits required for something
1211   -
1212   - // something_offset is the pre-adjusted offset in the file. If >=
1213   - // H0_offset, H0_length must be added to get an actual file
1214   - // offset.
1215   -
1216   - // PDF 1.4: Table F.4
1217   - struct HPageOffsetEntry
1218   - {
1219   - int delta_nobjects{0}; // 1
1220   - qpdf_offset_t delta_page_length{0}; // 2
1221   - // vectors' sizes = nshared_objects
1222   - int nshared_objects{0}; // 3
1223   - std::vector<int> shared_identifiers; // 4
1224   - std::vector<int> shared_numerators; // 5
1225   - qpdf_offset_t delta_content_offset{0}; // 6
1226   - qpdf_offset_t delta_content_length{0}; // 7
1227   - };
1228   -
1229   - // PDF 1.4: Table F.3
1230   - struct HPageOffset
1231   - {
1232   - int min_nobjects{0}; // 1
1233   - qpdf_offset_t first_page_offset{0}; // 2
1234   - int nbits_delta_nobjects{0}; // 3
1235   - int min_page_length{0}; // 4
1236   - int nbits_delta_page_length{0}; // 5
1237   - int min_content_offset{0}; // 6
1238   - int nbits_delta_content_offset{0}; // 7
1239   - int min_content_length{0}; // 8
1240   - int nbits_delta_content_length{0}; // 9
1241   - int nbits_nshared_objects{0}; // 10
1242   - int nbits_shared_identifier{0}; // 11
1243   - int nbits_shared_numerator{0}; // 12
1244   - int shared_denominator{0}; // 13
1245   - // vector size is npages
1246   - std::vector<HPageOffsetEntry> entries;
1247   - };
1248   -
1249   - // PDF 1.4: Table F.6
1250   - struct HSharedObjectEntry
1251   - {
1252   - // Item 3 is a 128-bit signature (unsupported by Acrobat)
1253   - int delta_group_length{0}; // 1
1254   - int signature_present{0}; // 2 -- always 0
1255   - int nobjects_minus_one{0}; // 4 -- always 0
1256   - };
1257   -
1258   - // PDF 1.4: Table F.5
1259   - struct HSharedObject
1260   - {
1261   - int first_shared_obj{0}; // 1
1262   - qpdf_offset_t first_shared_offset{0}; // 2
1263   - int nshared_first_page{0}; // 3
1264   - int nshared_total{0}; // 4
1265   - int nbits_nobjects{0}; // 5
1266   - int min_group_length{0}; // 6
1267   - int nbits_delta_group_length{0}; // 7
1268   - // vector size is nshared_total
1269   - std::vector<HSharedObjectEntry> entries;
1270   - };
1271   -
1272   - // PDF 1.4: Table F.9
1273   - struct HGeneric
1274   - {
1275   - int first_object{0}; // 1
1276   - qpdf_offset_t first_object_offset{0}; // 2
1277   - int nobjects{0}; // 3
1278   - int group_length{0}; // 4
1279   - };
1280   -
1281   - // Other linearization data structures
1282   -
1283   - // Initialized from Linearization Parameter dictionary
1284   - struct LinParameters
1285   - {
1286   - qpdf_offset_t file_size{0}; // /L
1287   - int first_page_object{0}; // /O
1288   - qpdf_offset_t first_page_end{0}; // /E
1289   - int npages{0}; // /N
1290   - qpdf_offset_t xref_zero_offset{0}; // /T
1291   - int first_page{0}; // /P
1292   - qpdf_offset_t H_offset{0}; // offset of primary hint stream
1293   - qpdf_offset_t H_length{0}; // length of primary hint stream
1294   - };
1295   -
1296   - // Computed hint table value data structures. These tables contain the computed values on which
1297   - // the hint table values are based. They exclude things like number of bits and store actual
1298   - // values instead of mins and deltas. File offsets are also absolute rather than being offset
1299   - // by the size of the primary hint table. We populate the hint table structures from these
1300   - // during writing and compare the hint table values with these during validation. We ignore
1301   - // some values for various reasons described in the code. Those values are omitted from these
1302   - // structures. Note also that object numbers are object numbers from the input file, not the
1303   - // output file.
1304   -
1305   - // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
1306   -
1307   - struct CHPageOffsetEntry
1308   - {
1309   - int nobjects{0};
1310   - int nshared_objects{0};
1311   - // vectors' sizes = nshared_objects
1312   - std::vector<int> shared_identifiers;
1313   - };
1314   -
1315   - struct CHPageOffset
1316   - {
1317   - // vector size is npages
1318   - std::vector<CHPageOffsetEntry> entries;
1319   - };
1320   -
1321   - struct CHSharedObjectEntry
1322   - {
1323   - CHSharedObjectEntry(int object) :
1324   - object(object)
1325   - {
1326   - }
1327   -
1328   - int object;
1329   - };
1330   -
1331   - // PDF 1.4: Table F.5
1332   - struct CHSharedObject
1333   - {
1334   - int first_shared_obj{0};
1335   - int nshared_first_page{0};
1336   - int nshared_total{0};
1337   - // vector size is nshared_total
1338   - std::vector<CHSharedObjectEntry> entries;
1339   - };
1340   -
1341   - // No need for CHGeneric -- HGeneric is fine as is.
1342   -
1343   - // Data structures to support optimization -- implemented in QPDF_optimization.cc
1344   -
1345   - class ObjUser
1346   - {
1347   - public:
1348   - enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
1349   -
1350   - // type is set to ou_bad
1351   - ObjUser();
1352   -
1353   - // type must be ou_root
1354   - ObjUser(user_e type);
1355   -
1356   - // type must be one of ou_page or ou_thumb
1357   - ObjUser(user_e type, int pageno);
1358   -
1359   - // type must be one of ou_trailer_key or ou_root_key
1360   - ObjUser(user_e type, std::string const& key);
1361   -
1362   - bool operator<(ObjUser const&) const;
1363   -
1364   - user_e ou_type;
1365   - int pageno; // if ou_page;
1366   - std::string key; // if ou_trailer_key or ou_root_key
1367   - };
1368   -
1369   - struct UpdateObjectMapsFrame
1370   - {
1371   - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
1372   -
1373   - ObjUser const& ou;
1374   - QPDFObjectHandle oh;
1375   - bool top;
1376   - };
1377   -
1378   - class PatternFinder: public InputSource::Finder
1379   - {
1380   - public:
1381   - PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
1382   - qpdf(qpdf),
1383   - checker(checker)
1384   - {
1385   - }
1386   - ~PatternFinder() override = default;
1387   - bool
1388   - check() override
1389   - {
1390   - return (this->qpdf.*checker)();
1391   - }
1392   -
1393   - private:
1394   - QPDF& qpdf;
1395   - bool (QPDF::*checker)();
1396   - };
  927 + struct HPageOffsetEntry;
  928 + struct HPageOffset;
  929 + struct HSharedObjectEntry;
  930 + struct HSharedObject;
  931 + struct HGeneric;
  932 + struct LinParameters;
  933 + struct CHPageOffsetEntry;
  934 + struct CHPageOffset;
  935 + struct CHSharedObjectEntry;
  936 + struct CHSharedObject;
  937 + class ObjUser;
  938 + struct UpdateObjectMapsFrame;
  939 + class PatternFinder;
1397 940  
1398 941 // Methods to support pattern finding
1399 942 static bool validatePDFVersion(char const*&, std::string& version);
... ... @@ -1490,88 +1033,7 @@ class QPDF
1490 1033 return QIntC::to_ulonglong(i);
1491 1034 }
1492 1035  
1493   - class Members
1494   - {
1495   - friend class QPDF;
1496   - friend class ResolveRecorder;
1497   -
1498   - public:
1499   - Members();
1500   - Members(Members const&) = delete;
1501   - ~Members() = default;
1502   -
1503   - private:
1504   - std::shared_ptr<QPDFLogger> log;
1505   - unsigned long long unique_id{0};
1506   - QPDFTokenizer tokenizer;
1507   - std::shared_ptr<InputSource> file;
1508   - std::string last_object_description;
1509   - bool provided_password_is_hex_key{false};
1510   - bool ignore_xref_streams{false};
1511   - bool suppress_warnings{false};
1512   - size_t max_warnings{0};
1513   - bool attempt_recovery{true};
1514   - bool check_mode{false};
1515   - std::shared_ptr<EncryptionParameters> encp;
1516   - std::string pdf_version;
1517   - std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
1518   - // Various tables are indexed by object id, with potential size id + 1
1519   - int xref_table_max_id{std::numeric_limits<int>::max() - 1};
1520   - qpdf_offset_t xref_table_max_offset{0};
1521   - std::set<int> deleted_objects;
1522   - std::map<QPDFObjGen, ObjCache> obj_cache;
1523   - std::set<QPDFObjGen> resolving;
1524   - QPDFObjectHandle trailer;
1525   - std::vector<QPDFObjectHandle> all_pages;
1526   - bool invalid_page_found{false};
1527   - std::map<QPDFObjGen, int> pageobj_to_pages_pos;
1528   - bool pushed_inherited_attributes_to_pages{false};
1529   - bool ever_pushed_inherited_attributes_to_pages{false};
1530   - bool ever_called_get_all_pages{false};
1531   - std::vector<QPDFExc> warnings;
1532   - std::map<unsigned long long, ObjCopier> object_copiers;
1533   - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
1534   - // copied_stream_data_provider is owned by copied_streams
1535   - CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
1536   - bool reconstructed_xref{false};
1537   - bool fixed_dangling_refs{false};
1538   - bool immediate_copy_from{false};
1539   - bool in_parse{false};
1540   - bool parsed{false};
1541   - std::set<int> resolved_object_streams;
1542   -
1543   - // Linearization data
1544   - qpdf_offset_t first_xref_item_offset{0}; // actual value from file
1545   - bool uncompressed_after_compressed{false};
1546   - bool linearization_warnings{false};
1547   -
1548   - // Linearization parameter dictionary and hint table data: may be read from file or computed
1549   - // prior to writing a linearized file
1550   - QPDFObjectHandle lindict;
1551   - LinParameters linp;
1552   - HPageOffset page_offset_hints;
1553   - HSharedObject shared_object_hints;
1554   - HGeneric outline_hints;
1555   -
1556   - // Computed linearization data: used to populate above tables during writing and to compare
1557   - // with them during validation. c_ means computed.
1558   - LinParameters c_linp;
1559   - CHPageOffset c_page_offset_data;
1560   - CHSharedObject c_shared_object_data;
1561   - HGeneric c_outline_data;
1562   -
1563   - // Object ordering data for linearized files: initialized by calculateLinearizationData().
1564   - // Part numbers refer to the PDF 1.4 specification.
1565   - std::vector<QPDFObjectHandle> part4;
1566   - std::vector<QPDFObjectHandle> part6;
1567   - std::vector<QPDFObjectHandle> part7;
1568   - std::vector<QPDFObjectHandle> part8;
1569   - std::vector<QPDFObjectHandle> part9;
1570   -
1571   - // Optimization data
1572   - std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
1573   - std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
1574   - };
  1036 + class Members;
1575 1037  
1576 1038 // Keep all member variables inside the Members object, which we dynamically allocate. This
1577 1039 // makes it possible to add new private members without breaking binary compatibility.
... ...
libqpdf/QPDF.cc
1 1 #include <qpdf/qpdf-config.h> // include first for large file support
2 2  
3   -#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QPDF_private.hh>
4 4  
5 5 #include <array>
6 6 #include <atomic>
... ... @@ -168,12 +168,6 @@ QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
168 168 {
169 169 }
170 170  
171   -void
172   -QPDF::StringDecrypter::decryptString(std::string& val)
173   -{
174   - qpdf->decryptString(val, og);
175   -}
176   -
177 171 std::string const&
178 172 QPDF::QPDFVersion()
179 173 {
... ... @@ -181,20 +175,6 @@ QPDF::QPDFVersion()
181 175 return QPDF::qpdf_version;
182 176 }
183 177  
184   -QPDF::EncryptionParameters::EncryptionParameters() :
185   - encrypted(false),
186   - encryption_initialized(false),
187   - encryption_V(0),
188   - encryption_R(0),
189   - encrypt_metadata(true),
190   - cf_stream(e_none),
191   - cf_string(e_none),
192   - cf_file(e_none),
193   - user_password_matched(false),
194   - owner_password_matched(false)
195   -{
196   -}
197   -
198 178 QPDF::Members::Members() :
199 179 log(QPDFLogger::defaultLogger()),
200 180 file(new InvalidInputSource()),
... ...
libqpdf/QPDFJob.cc
... ... @@ -13,7 +13,6 @@
13 13 #include <qpdf/Pl_StdioFile.hh>
14 14 #include <qpdf/Pl_String.hh>
15 15 #include <qpdf/QIntC.hh>
16   -#include <qpdf/QPDF.hh>
17 16 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
18 17 #include <qpdf/QPDFCryptoProvider.hh>
19 18 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
... ... @@ -27,6 +26,7 @@
27 26 #include <qpdf/QPDFSystemError.hh>
28 27 #include <qpdf/QPDFUsage.hh>
29 28 #include <qpdf/QPDFWriter.hh>
  29 +#include <qpdf/QPDF_private.hh>
30 30 #include <qpdf/QTC.hh>
31 31 #include <qpdf/QUtil.hh>
32 32 #include <qpdf/Util.hh>
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1495,19 +1495,23 @@ QPDFObjectHandle
1495 1495 QPDFObjectHandle::parse(
1496 1496 QPDF* context, std::string const& object_str, std::string const& object_description)
1497 1497 {
1498   - auto input = std::shared_ptr<InputSource>(new BufferInputSource("parsed object", object_str));
1499   - QPDFTokenizer tokenizer;
  1498 + // BufferInputSource does not modify the input, but Buffer either requires a string& or copies
  1499 + // the string.
  1500 + Buffer buf(const_cast<std::string&>(object_str));
  1501 + auto input = BufferInputSource("parsed object", &buf);
  1502 + qpdf::Tokenizer tokenizer;
1500 1503 bool empty = false;
1501   - QPDFObjectHandle result = parse(input, object_description, tokenizer, empty, nullptr, context);
1502   - size_t offset = QIntC::to_size(input->tell());
  1504 + auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false)
  1505 + .parse(empty, false);
  1506 + size_t offset = QIntC::to_size(input.tell());
1503 1507 while (offset < object_str.length()) {
1504 1508 if (!isspace(object_str.at(offset))) {
1505 1509 QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
1506 1510 throw QPDFExc(
1507 1511 qpdf_e_damaged_pdf,
1508   - input->getName(),
  1512 + "parsed object",
1509 1513 object_description,
1510   - input->getLastOffset(),
  1514 + input.getLastOffset(),
1511 1515 "trailing data found parsing object from string");
1512 1516 }
1513 1517 ++offset;
... ... @@ -1614,51 +1618,52 @@ QPDFObjectHandle::parseContentStream_data(
1614 1618 QPDF* context)
1615 1619 {
1616 1620 size_t stream_length = stream_data->getSize();
1617   - auto input =
1618   - std::shared_ptr<InputSource>(new BufferInputSource(description, stream_data.get()));
1619   - QPDFTokenizer tokenizer;
  1621 + auto input = BufferInputSource(description, stream_data.get());
  1622 + Tokenizer tokenizer;
1620 1623 tokenizer.allowEOF();
  1624 + auto sp_description = QPDFParser::make_description(description, "content");
1621 1625 bool empty = false;
1622   - while (QIntC::to_size(input->tell()) < stream_length) {
  1626 + while (QIntC::to_size(input.tell()) < stream_length) {
1623 1627 // Read a token and seek to the beginning. The offset we get from this process is the
1624 1628 // beginning of the next non-ignorable (space, comment) token. This way, the offset and
1625 1629 // don't including ignorable content.
1626   - tokenizer.readToken(input, "content", true);
1627   - qpdf_offset_t offset = input->getLastOffset();
1628   - input->seek(offset, SEEK_SET);
  1630 + tokenizer.nextToken(input, "content", true);
  1631 + qpdf_offset_t offset = input.getLastOffset();
  1632 + input.seek(offset, SEEK_SET);
1629 1633 auto obj =
1630   - QPDFParser(*input, "content", tokenizer, nullptr, context, false).parse(empty, true);
  1634 + QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true);
1631 1635 if (!obj) {
1632 1636 // EOF
1633 1637 break;
1634 1638 }
1635   - size_t length = QIntC::to_size(input->tell() - offset);
  1639 + size_t length = QIntC::to_size(input.tell() - offset);
1636 1640  
1637 1641 callbacks->handleObject(obj, QIntC::to_size(offset), length);
1638 1642 if (obj.isOperator() && (obj.getOperatorValue() == "ID")) {
1639 1643 // Discard next character; it is the space after ID that terminated the token. Read
1640 1644 // until end of inline image.
1641 1645 char ch;
1642   - input->read(&ch, 1);
  1646 + input.read(&ch, 1);
1643 1647 tokenizer.expectInlineImage(input);
1644   - QPDFTokenizer::Token t = tokenizer.readToken(input, description, true);
1645   - offset = input->getLastOffset();
1646   - length = QIntC::to_size(input->tell() - offset);
1647   - if (t.getType() == QPDFTokenizer::tt_bad) {
  1648 + tokenizer.nextToken(input, description);
  1649 + offset = input.getLastOffset();
  1650 + length = QIntC::to_size(input.tell() - offset);
  1651 + if (tokenizer.getType() == QPDFTokenizer::tt_bad) {
1648 1652 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
1649 1653 warn(
1650 1654 context,
1651 1655 QPDFExc(
1652 1656 qpdf_e_damaged_pdf,
1653   - input->getName(),
  1657 + description,
1654 1658 "stream data",
1655   - input->tell(),
  1659 + input.tell(),
1656 1660 "EOF found while reading inline image"));
1657 1661 } else {
1658   - std::string inline_image = t.getValue();
1659 1662 QTC::TC("qpdf", "QPDFObjectHandle inline image token");
1660 1663 callbacks->handleObject(
1661   - QPDFObjectHandle::newInlineImage(inline_image), QIntC::to_size(offset), length);
  1664 + QPDFObjectHandle::newInlineImage(tokenizer.getValue()),
  1665 + QIntC::to_size(offset),
  1666 + length);
1662 1667 }
1663 1668 }
1664 1669 }
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -52,10 +52,10 @@ QPDFWordTokenFinder::check()
52 52 {
53 53 // Find a word token matching the given string, preceded by a delimiter, and followed by a
54 54 // delimiter or EOF.
55   - QPDFTokenizer tokenizer;
56   - QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true, str.size() + 2);
  55 + Tokenizer tokenizer;
  56 + tokenizer.nextToken(is, "finder", str.size() + 2);
57 57 qpdf_offset_t pos = is.tell();
58   - if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) {
  58 + if (tokenizer.getType() != tt::tt_word || tokenizer.getValue() != str) {
59 59 QTC::TC("qpdf", "QPDFTokenizer finder found wrong word");
60 60 return false;
61 61 }
... ... @@ -845,7 +845,7 @@ Tokenizer::findEI(InputSource&amp; input)
845 845 }
846 846 inline_image_bytes = QIntC::to_size(input.tell() - pos - 2);
847 847  
848   - QPDFTokenizer check;
  848 + Tokenizer check;
849 849 bool found_bad = false;
850 850 // Look at the next 10 tokens or up to EOF. The next inline image's image data would look
851 851 // like bad tokens, but there will always be at least 10 tokens between one inline image's
... ... @@ -853,13 +853,13 @@ Tokenizer::findEI(InputSource&amp; input)
853 853 // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can
854 854 // be pretty sure we've found the actual EI.
855 855 for (int i = 0; i < 10; ++i) {
856   - QPDFTokenizer::Token t = check.readToken(input, "checker", true);
857   - QPDFTokenizer::token_type_e type = t.getType();
858   - if (type == tt::tt_eof) {
  856 + check.nextToken(input, "checker");
  857 + auto typ = check.getType();
  858 + if (typ == tt::tt_eof) {
859 859 okay = true;
860   - } else if (type == tt::tt_bad) {
  860 + } else if (typ == tt::tt_bad) {
861 861 found_bad = true;
862   - } else if (t.isWord()) {
  862 + } else if (typ == tt::tt_word) {
863 863 // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into
864 864 // "words". We recognize strings of alphabetic characters as potential valid
865 865 // operators for purposes of telling whether we're in valid content or not. It's not
... ... @@ -868,13 +868,12 @@ Tokenizer::findEI(InputSource&amp; input)
868 868 bool found_alpha = false;
869 869 bool found_non_printable = false;
870 870 bool found_other = false;
871   - for (char ch: t.getValue()) {
872   - if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
873   - (ch == '*')) {
  871 + for (char ch: check.getValue()) {
  872 + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '*')) {
874 873 // Treat '*' as alpha since there are valid PDF operators that contain *
875 874 // along with alphabetic characters.
876 875 found_alpha = true;
877   - } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) {
  876 + } else if (static_cast<signed char>(ch) < 32 && !isSpace(ch)) {
878 877 // Compare ch as a signed char so characters outside of 7-bit will be < 0.
879 878 found_non_printable = true;
880 879 break;
... ...
libqpdf/QPDFWriter.cc
... ... @@ -14,9 +14,9 @@
14 14 #include <qpdf/Pl_RC4.hh>
15 15 #include <qpdf/Pl_StdioFile.hh>
16 16 #include <qpdf/QIntC.hh>
17   -#include <qpdf/QPDF.hh>
18 17 #include <qpdf/QPDFObjectHandle_private.hh>
19 18 #include <qpdf/QPDFObject_private.hh>
  19 +#include <qpdf/QPDF_private.hh>
20 20 #include <qpdf/QTC.hh>
21 21 #include <qpdf/QUtil.hh>
22 22 #include <qpdf/RC4.hh>
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -10,8 +10,8 @@
10 10 #include <qpdf/Pl_Flate.hh>
11 11 #include <qpdf/Pl_QPDFTokenizer.hh>
12 12 #include <qpdf/QIntC.hh>
13   -#include <qpdf/QPDF.hh>
14 13 #include <qpdf/QPDFExc.hh>
  14 +#include <qpdf/QPDF_private.hh>
15 15 #include <qpdf/QTC.hh>
16 16 #include <qpdf/QUtil.hh>
17 17 #include <qpdf/SF_ASCII85Decode.hh>
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -3,7 +3,7 @@
3 3  
4 4 #include <qpdf/assert_debug.h>
5 5  
6   -#include <qpdf/QPDF.hh>
  6 +#include <qpdf/QPDF_private.hh>
7 7  
8 8 #include <qpdf/QPDFExc.hh>
9 9  
... ...
libqpdf/QPDF_linearization.cc
1 1 // See doc/linearization.
2 2  
3   -#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QPDF_private.hh>
4 4  
5 5 #include <qpdf/BitStream.hh>
6 6 #include <qpdf/BitWriter.hh>
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -2,22 +2,15 @@
2 2  
3 3 #include <qpdf/assert_debug.h>
4 4  
5   -#include <qpdf/QPDF.hh>
  5 +#include <qpdf/QPDF_private.hh>
6 6  
7 7 #include <qpdf/QPDFExc.hh>
8 8 #include <qpdf/QPDFObjectHandle_private.hh>
9 9 #include <qpdf/QPDFWriter_private.hh>
10 10 #include <qpdf/QTC.hh>
11 11  
12   -QPDF::ObjUser::ObjUser() :
13   - ou_type(ou_bad),
14   - pageno(0)
15   -{
16   -}
17   -
18 12 QPDF::ObjUser::ObjUser(user_e type) :
19   - ou_type(type),
20   - pageno(0)
  13 + ou_type(type)
21 14 {
22 15 qpdf_assert_debug(type == ou_root);
23 16 }
... ... @@ -31,7 +24,6 @@ QPDF::ObjUser::ObjUser(user_e type, int pageno) :
31 24  
32 25 QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
33 26 ou_type(type),
34   - pageno(0),
35 27 key(key)
36 28 {
37 29 qpdf_assert_debug((type == ou_trailer_key) || (type == ou_root_key));
... ... @@ -40,16 +32,17 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const&amp; key) :
40 32 bool
41 33 QPDF::ObjUser::operator<(ObjUser const& rhs) const
42 34 {
43   - if (this->ou_type < rhs.ou_type) {
  35 + if (ou_type < rhs.ou_type) {
44 36 return true;
45   - } else if (this->ou_type == rhs.ou_type) {
46   - if (this->pageno < rhs.pageno) {
  37 + }
  38 + if (ou_type == rhs.ou_type) {
  39 + if (pageno < rhs.pageno) {
47 40 return true;
48   - } else if (this->pageno == rhs.pageno) {
49   - return (this->key < rhs.key);
  41 + }
  42 + if (pageno == rhs.pageno) {
  43 + return key < rhs.key;
50 44 }
51 45 }
52   -
53 46 return false;
54 47 }
55 48  
... ...
libqpdf/QPDF_pages.cc
1   -#include <qpdf/QPDF.hh>
  1 +#include <qpdf/QPDF_private.hh>
2 2  
3 3 #include <qpdf/QPDFExc.hh>
4 4 #include <qpdf/QPDFObjectHandle_private.hh>
... ...
libqpdf/qpdf/QPDFObject_private.hh
... ... @@ -7,8 +7,8 @@
7 7 #include <qpdf/Constants.h>
8 8 #include <qpdf/JSON.hh>
9 9 #include <qpdf/JSON_writer.hh>
10   -#include <qpdf/QPDF.hh>
11 10 #include <qpdf/QPDFObjGen.hh>
  11 +#include <qpdf/QPDF_private.hh>
12 12 #include <qpdf/Types.h>
13 13  
14 14 #include <map>
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -12,6 +12,8 @@ class QPDFParser
12 12 {
13 13 public:
14 14 QPDFParser() = delete;
  15 +
  16 + // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer.
15 17 QPDFParser(
16 18 InputSource& input,
17 19 std::string const& object_description,
... ... @@ -24,16 +26,56 @@ class QPDFParser
24 26 tokenizer(*tokenizer.m),
25 27 decrypter(decrypter),
26 28 context(context),
27   - description(
28   - std::make_shared<QPDFObject::Description>(
29   - std::string(input.getName() + ", " + object_description + " at offset $PO"))),
  29 + description(make_description(input.getName(), object_description)),
30 30 parse_pdf(parse_pdf)
31 31 {
32 32 }
33   - virtual ~QPDFParser() = default;
  33 +
  34 + QPDFParser(
  35 + InputSource& input,
  36 + std::string const& object_description,
  37 + qpdf::Tokenizer& tokenizer,
  38 + QPDFObjectHandle::StringDecrypter* decrypter,
  39 + QPDF* context,
  40 + bool parse_pdf) :
  41 + input(input),
  42 + object_description(object_description),
  43 + tokenizer(tokenizer),
  44 + decrypter(decrypter),
  45 + context(context),
  46 + description(make_description(input.getName(), object_description)),
  47 + parse_pdf(parse_pdf)
  48 + {
  49 + }
  50 +
  51 + // Used by parseContentStream_data only
  52 + QPDFParser(
  53 + InputSource& input,
  54 + std::shared_ptr<QPDFObject::Description> sp_description,
  55 + std::string const& object_description,
  56 + qpdf::Tokenizer& tokenizer,
  57 + QPDF* context) :
  58 + input(input),
  59 + object_description(object_description),
  60 + tokenizer(tokenizer),
  61 + decrypter(nullptr),
  62 + context(context),
  63 + description(std::move(sp_description)),
  64 + parse_pdf(false)
  65 + {
  66 + }
  67 + ~QPDFParser() = default;
34 68  
35 69 QPDFObjectHandle parse(bool& empty, bool content_stream);
36 70  
  71 + static std::shared_ptr<QPDFObject::Description>
  72 + make_description(std::string const& input_name, std::string const& object_description)
  73 + {
  74 + using namespace std::literals;
  75 + return std::make_shared<QPDFObject::Description>(
  76 + input_name + ", " + object_description + " at offset $PO");
  77 + }
  78 +
37 79 private:
38 80 // Parser state. Note:
39 81 // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
... ... @@ -83,7 +125,7 @@ class QPDFParser
83 125 bool parse_pdf;
84 126  
85 127 std::vector<StackFrame> stack;
86   - StackFrame* frame;
  128 + StackFrame* frame{nullptr};
87 129 // Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as
88 130 // it only gets incremented or reset when a bad token is encountered.
89 131 int bad_count{0};
... ... @@ -92,9 +134,9 @@ class QPDFParser
92 134 // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
93 135 int good_count{0};
94 136 // Start offset including any leading whitespace.
95   - qpdf_offset_t start;
  137 + qpdf_offset_t start{0};
96 138 // Number of successive integer tokens.
97   - int int_count = 0;
  139 + int int_count{0};
98 140 long long int_buffer[2]{0, 0};
99 141 qpdf_offset_t last_offset_buffer[2]{0, 0};
100 142 };
... ...
libqpdf/qpdf/QPDF_private.hh 0 โ†’ 100644
  1 +#ifndef QPDF_PRIVATE_HH
  2 +#define QPDF_PRIVATE_HH
  3 +
  4 +#include <qpdf/QPDF.hh>
  5 +
  6 +#include <qpdf/QPDFTokenizer_private.hh>
  7 +
  8 +// Writer class is restricted to QPDFWriter so that only it can call certain methods.
  9 +class QPDF::Writer
  10 +{
  11 + friend class QPDFWriter;
  12 +
  13 + private:
  14 + static void
  15 + optimize(
  16 + QPDF& qpdf,
  17 + QPDFWriter::ObjTable const& obj,
  18 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  19 + {
  20 + qpdf.optimize(obj, skip_stream_parameters);
  21 + }
  22 +
  23 + static void
  24 + getLinearizedParts(
  25 + QPDF& qpdf,
  26 + QPDFWriter::ObjTable const& obj,
  27 + std::vector<QPDFObjectHandle>& part4,
  28 + std::vector<QPDFObjectHandle>& part6,
  29 + std::vector<QPDFObjectHandle>& part7,
  30 + std::vector<QPDFObjectHandle>& part8,
  31 + std::vector<QPDFObjectHandle>& part9)
  32 + {
  33 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
  34 + }
  35 +
  36 + static void
  37 + generateHintStream(
  38 + QPDF& qpdf,
  39 + QPDFWriter::NewObjTable const& new_obj,
  40 + QPDFWriter::ObjTable const& obj,
  41 + std::shared_ptr<Buffer>& hint_stream,
  42 + int& S,
  43 + int& O,
  44 + bool compressed)
  45 + {
  46 + qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
  47 + }
  48 +
  49 + static std::vector<QPDFObjGen>
  50 + getCompressibleObjGens(QPDF& qpdf)
  51 + {
  52 + return qpdf.getCompressibleObjVector();
  53 + }
  54 +
  55 + static std::vector<bool>
  56 + getCompressibleObjSet(QPDF& qpdf)
  57 + {
  58 + return qpdf.getCompressibleObjSet();
  59 + }
  60 +
  61 + static std::map<QPDFObjGen, QPDFXRefEntry> const&
  62 + getXRefTable(QPDF& qpdf)
  63 + {
  64 + return qpdf.getXRefTableInternal();
  65 + }
  66 +
  67 + static size_t
  68 + tableSize(QPDF& qpdf)
  69 + {
  70 + return qpdf.tableSize();
  71 + }
  72 +};
  73 +
  74 +// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
  75 +// references.
  76 +class QPDF::Resolver
  77 +{
  78 + friend class QPDFObject;
  79 + friend class qpdf::BaseHandle;
  80 +
  81 + private:
  82 + static std::shared_ptr<QPDFObject> const&
  83 + resolved(QPDF* qpdf, QPDFObjGen og)
  84 + {
  85 + return qpdf->resolve(og);
  86 + }
  87 +};
  88 +
  89 +// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
  90 +class QPDF::StreamCopier
  91 +{
  92 + friend class QPDFObjectHandle;
  93 +
  94 + private:
  95 + static void
  96 + copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
  97 + {
  98 + qpdf->copyStreamData(dest, src);
  99 + }
  100 +};
  101 +
  102 +// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
  103 +// special access to allow the parser to create unresolved objects and dangling references.
  104 +class QPDF::ParseGuard
  105 +{
  106 + friend class QPDFParser;
  107 +
  108 + private:
  109 + ParseGuard(QPDF* qpdf) :
  110 + qpdf(qpdf)
  111 + {
  112 + if (qpdf) {
  113 + qpdf->inParse(true);
  114 + }
  115 + }
  116 +
  117 + static std::shared_ptr<QPDFObject>
  118 + getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
  119 + {
  120 + return qpdf->getObjectForParser(id, gen, parse_pdf);
  121 + }
  122 +
  123 + ~ParseGuard()
  124 + {
  125 + if (qpdf) {
  126 + qpdf->inParse(false);
  127 + }
  128 + }
  129 + QPDF* qpdf;
  130 +};
  131 +
  132 +// Pipe class is restricted to QPDF_Stream.
  133 +class QPDF::Pipe
  134 +{
  135 + friend class qpdf::Stream;
  136 +
  137 + private:
  138 + static bool
  139 + pipeStreamData(
  140 + QPDF* qpdf,
  141 + QPDFObjGen og,
  142 + qpdf_offset_t offset,
  143 + size_t length,
  144 + QPDFObjectHandle dict,
  145 + Pipeline* pipeline,
  146 + bool suppress_warnings,
  147 + bool will_retry)
  148 + {
  149 + return qpdf->pipeStreamData(
  150 + og, offset, length, dict, pipeline, suppress_warnings, will_retry);
  151 + }
  152 +};
  153 +
  154 +class QPDF::ObjCache
  155 +{
  156 + public:
  157 + ObjCache() = default;
  158 + ObjCache(
  159 + std::shared_ptr<QPDFObject> object,
  160 + qpdf_offset_t end_before_space = 0,
  161 + qpdf_offset_t end_after_space = 0) :
  162 + object(std::move(object)),
  163 + end_before_space(end_before_space),
  164 + end_after_space(end_after_space)
  165 + {
  166 + }
  167 +
  168 + std::shared_ptr<QPDFObject> object;
  169 + qpdf_offset_t end_before_space{0};
  170 + qpdf_offset_t end_after_space{0};
  171 +};
  172 +
  173 +class QPDF::ObjCopier
  174 +{
  175 + public:
  176 + std::map<QPDFObjGen, QPDFObjectHandle> object_map;
  177 + std::vector<QPDFObjectHandle> to_copy;
  178 + QPDFObjGen::set visiting;
  179 +};
  180 +
  181 +class QPDF::EncryptionParameters
  182 +{
  183 + friend class QPDF;
  184 +
  185 + public:
  186 + EncryptionParameters() = default;
  187 +
  188 + private:
  189 + bool encrypted{false};
  190 + bool encryption_initialized{false};
  191 + int encryption_V{0};
  192 + int encryption_R{0};
  193 + bool encrypt_metadata{true};
  194 + std::map<std::string, encryption_method_e> crypt_filters;
  195 + encryption_method_e cf_stream{e_none};
  196 + encryption_method_e cf_string{e_none};
  197 + encryption_method_e cf_file{e_none};
  198 + std::string provided_password;
  199 + std::string user_password;
  200 + std::string encryption_key;
  201 + std::string cached_object_encryption_key;
  202 + QPDFObjGen cached_key_og{};
  203 + bool user_password_matched{false};
  204 + bool owner_password_matched{false};
  205 +};
  206 +
  207 +class QPDF::ForeignStreamData
  208 +{
  209 + friend class QPDF;
  210 +
  211 + public:
  212 + ForeignStreamData(
  213 + std::shared_ptr<EncryptionParameters> encp,
  214 + std::shared_ptr<InputSource> file,
  215 + QPDFObjGen foreign_og,
  216 + qpdf_offset_t offset,
  217 + size_t length,
  218 + QPDFObjectHandle local_dict);
  219 +
  220 + private:
  221 + std::shared_ptr<EncryptionParameters> encp;
  222 + std::shared_ptr<InputSource> file;
  223 + QPDFObjGen foreign_og;
  224 + qpdf_offset_t offset;
  225 + size_t length;
  226 + QPDFObjectHandle local_dict;
  227 +};
  228 +
  229 +class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
  230 +{
  231 + public:
  232 + CopiedStreamDataProvider(QPDF& destination_qpdf);
  233 + ~CopiedStreamDataProvider() override = default;
  234 + bool provideStreamData(
  235 + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) override;
  236 + void registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream);
  237 + void registerForeignStream(QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData>);
  238 +
  239 + private:
  240 + QPDF& destination_qpdf;
  241 + std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
  242 + std::map<QPDFObjGen, std::shared_ptr<ForeignStreamData>> foreign_stream_data;
  243 +};
  244 +
  245 +class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter
  246 +{
  247 + friend class QPDF;
  248 +
  249 + public:
  250 + StringDecrypter(QPDF* qpdf, QPDFObjGen og);
  251 + ~StringDecrypter() final = default;
  252 + void
  253 + decryptString(std::string& val) final
  254 + {
  255 + qpdf->decryptString(val, og);
  256 + }
  257 +
  258 + private:
  259 + QPDF* qpdf;
  260 + QPDFObjGen og;
  261 +};
  262 +
  263 +// PDF 1.4: Table F.4
  264 +struct QPDF::HPageOffsetEntry
  265 +{
  266 + int delta_nobjects{0}; // 1
  267 + qpdf_offset_t delta_page_length{0}; // 2
  268 + // vectors' sizes = nshared_objects
  269 + int nshared_objects{0}; // 3
  270 + std::vector<int> shared_identifiers; // 4
  271 + std::vector<int> shared_numerators; // 5
  272 + qpdf_offset_t delta_content_offset{0}; // 6
  273 + qpdf_offset_t delta_content_length{0}; // 7
  274 +};
  275 +
  276 +// PDF 1.4: Table F.3
  277 +struct QPDF::HPageOffset
  278 +{
  279 + int min_nobjects{0}; // 1
  280 + qpdf_offset_t first_page_offset{0}; // 2
  281 + int nbits_delta_nobjects{0}; // 3
  282 + int min_page_length{0}; // 4
  283 + int nbits_delta_page_length{0}; // 5
  284 + int min_content_offset{0}; // 6
  285 + int nbits_delta_content_offset{0}; // 7
  286 + int min_content_length{0}; // 8
  287 + int nbits_delta_content_length{0}; // 9
  288 + int nbits_nshared_objects{0}; // 10
  289 + int nbits_shared_identifier{0}; // 11
  290 + int nbits_shared_numerator{0}; // 12
  291 + int shared_denominator{0}; // 13
  292 + // vector size is npages
  293 + std::vector<HPageOffsetEntry> entries;
  294 +};
  295 +
  296 +// PDF 1.4: Table F.6
  297 +struct QPDF::HSharedObjectEntry
  298 +{
  299 + // Item 3 is a 128-bit signature (unsupported by Acrobat)
  300 + int delta_group_length{0}; // 1
  301 + int signature_present{0}; // 2 -- always 0
  302 + int nobjects_minus_one{0}; // 4 -- always 0
  303 +};
  304 +
  305 +// PDF 1.4: Table F.5
  306 +struct QPDF::HSharedObject
  307 +{
  308 + int first_shared_obj{0}; // 1
  309 + qpdf_offset_t first_shared_offset{0}; // 2
  310 + int nshared_first_page{0}; // 3
  311 + int nshared_total{0}; // 4
  312 + int nbits_nobjects{0}; // 5
  313 + int min_group_length{0}; // 6
  314 + int nbits_delta_group_length{0}; // 7
  315 + // vector size is nshared_total
  316 + std::vector<HSharedObjectEntry> entries;
  317 +};
  318 +
  319 +// PDF 1.4: Table F.9
  320 +struct QPDF::HGeneric
  321 +{
  322 + int first_object{0}; // 1
  323 + qpdf_offset_t first_object_offset{0}; // 2
  324 + int nobjects{0}; // 3
  325 + int group_length{0}; // 4
  326 +};
  327 +
  328 +// Other linearization data structures
  329 +
  330 +// Initialized from Linearization Parameter dictionary
  331 +struct QPDF::LinParameters
  332 +{
  333 + qpdf_offset_t file_size{0}; // /L
  334 + int first_page_object{0}; // /O
  335 + qpdf_offset_t first_page_end{0}; // /E
  336 + int npages{0}; // /N
  337 + qpdf_offset_t xref_zero_offset{0}; // /T
  338 + int first_page{0}; // /P
  339 + qpdf_offset_t H_offset{0}; // offset of primary hint stream
  340 + qpdf_offset_t H_length{0}; // length of primary hint stream
  341 +};
  342 +
  343 +// Computed hint table value data structures. These tables contain the computed values on which
  344 +// the hint table values are based. They exclude things like number of bits and store actual
  345 +// values instead of mins and deltas. File offsets are also absolute rather than being offset
  346 +// by the size of the primary hint table. We populate the hint table structures from these
  347 +// during writing and compare the hint table values with these during validation. We ignore
  348 +// some values for various reasons described in the code. Those values are omitted from these
  349 +// structures. Note also that object numbers are object numbers from the input file, not the
  350 +// output file.
  351 +
  352 +// Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
  353 +
  354 +struct QPDF::CHPageOffsetEntry
  355 +{
  356 + int nobjects{0};
  357 + int nshared_objects{0};
  358 + // vectors' sizes = nshared_objects
  359 + std::vector<int> shared_identifiers;
  360 +};
  361 +
  362 +struct QPDF::CHPageOffset
  363 +{
  364 + // vector size is npages
  365 + std::vector<CHPageOffsetEntry> entries;
  366 +};
  367 +
  368 +struct QPDF::CHSharedObjectEntry
  369 +{
  370 + CHSharedObjectEntry(int object) :
  371 + object(object)
  372 + {
  373 + }
  374 +
  375 + int object;
  376 +};
  377 +
  378 +// PDF 1.4: Table F.5
  379 +struct QPDF::CHSharedObject
  380 +{
  381 + int first_shared_obj{0};
  382 + int nshared_first_page{0};
  383 + int nshared_total{0};
  384 + // vector size is nshared_total
  385 + std::vector<CHSharedObjectEntry> entries;
  386 +};
  387 +
  388 +// No need for CHGeneric -- HGeneric is fine as is.
  389 +
  390 +// Data structures to support optimization -- implemented in QPDF_optimization.cc
  391 +
  392 +class QPDF::ObjUser
  393 +{
  394 + public:
  395 + enum user_e { ou_bad, ou_page, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
  396 +
  397 + ObjUser() = default;
  398 +
  399 + // type must be ou_root
  400 + ObjUser(user_e type);
  401 +
  402 + // type must be one of ou_page or ou_thumb
  403 + ObjUser(user_e type, int pageno);
  404 +
  405 + // type must be one of ou_trailer_key or ou_root_key
  406 + ObjUser(user_e type, std::string const& key);
  407 +
  408 + bool operator<(ObjUser const&) const;
  409 +
  410 + user_e ou_type{ou_bad};
  411 + int pageno{0}; // if ou_page;
  412 + std::string key; // if ou_trailer_key or ou_root_key
  413 +};
  414 +
  415 +struct QPDF::UpdateObjectMapsFrame
  416 +{
  417 + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
  418 +
  419 + ObjUser const& ou;
  420 + QPDFObjectHandle oh;
  421 + bool top;
  422 +};
  423 +
  424 +class QPDF::PatternFinder final: public InputSource::Finder
  425 +{
  426 + public:
  427 + PatternFinder(QPDF& qpdf, bool (QPDF::*checker)()) :
  428 + qpdf(qpdf),
  429 + checker(checker)
  430 + {
  431 + }
  432 + ~PatternFinder() final = default;
  433 + bool
  434 + check() final
  435 + {
  436 + return (this->qpdf.*checker)();
  437 + }
  438 +
  439 + private:
  440 + QPDF& qpdf;
  441 + bool (QPDF::*checker)();
  442 +};
  443 +
  444 +class QPDF::Members
  445 +{
  446 + friend class QPDF;
  447 + friend class ResolveRecorder;
  448 +
  449 + public:
  450 + Members();
  451 + Members(Members const&) = delete;
  452 + ~Members() = default;
  453 +
  454 + private:
  455 + std::shared_ptr<QPDFLogger> log;
  456 + unsigned long long unique_id{0};
  457 + qpdf::Tokenizer tokenizer;
  458 + std::shared_ptr<InputSource> file;
  459 + std::string last_object_description;
  460 + bool provided_password_is_hex_key{false};
  461 + bool ignore_xref_streams{false};
  462 + bool suppress_warnings{false};
  463 + size_t max_warnings{0};
  464 + bool attempt_recovery{true};
  465 + bool check_mode{false};
  466 + std::shared_ptr<EncryptionParameters> encp;
  467 + std::string pdf_version;
  468 + std::map<QPDFObjGen, QPDFXRefEntry> xref_table;
  469 + // Various tables are indexed by object id, with potential size id + 1
  470 + int xref_table_max_id{std::numeric_limits<int>::max() - 1};
  471 + qpdf_offset_t xref_table_max_offset{0};
  472 + std::set<int> deleted_objects;
  473 + std::map<QPDFObjGen, ObjCache> obj_cache;
  474 + std::set<QPDFObjGen> resolving;
  475 + QPDFObjectHandle trailer;
  476 + std::vector<QPDFObjectHandle> all_pages;
  477 + bool invalid_page_found{false};
  478 + std::map<QPDFObjGen, int> pageobj_to_pages_pos;
  479 + bool pushed_inherited_attributes_to_pages{false};
  480 + bool ever_pushed_inherited_attributes_to_pages{false};
  481 + bool ever_called_get_all_pages{false};
  482 + std::vector<QPDFExc> warnings;
  483 + std::map<unsigned long long, ObjCopier> object_copiers;
  484 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
  485 + // copied_stream_data_provider is owned by copied_streams
  486 + CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
  487 + bool reconstructed_xref{false};
  488 + bool fixed_dangling_refs{false};
  489 + bool immediate_copy_from{false};
  490 + bool in_parse{false};
  491 + bool parsed{false};
  492 + std::set<int> resolved_object_streams;
  493 +
  494 + // Linearization data
  495 + qpdf_offset_t first_xref_item_offset{0}; // actual value from file
  496 + bool uncompressed_after_compressed{false};
  497 + bool linearization_warnings{false};
  498 +
  499 + // Linearization parameter dictionary and hint table data: may be read from file or computed
  500 + // prior to writing a linearized file
  501 + QPDFObjectHandle lindict;
  502 + LinParameters linp;
  503 + HPageOffset page_offset_hints;
  504 + HSharedObject shared_object_hints;
  505 + HGeneric outline_hints;
  506 +
  507 + // Computed linearization data: used to populate above tables during writing and to compare
  508 + // with them during validation. c_ means computed.
  509 + LinParameters c_linp;
  510 + CHPageOffset c_page_offset_data;
  511 + CHSharedObject c_shared_object_data;
  512 + HGeneric c_outline_data;
  513 +
  514 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  515 + // Part numbers refer to the PDF 1.4 specification.
  516 + std::vector<QPDFObjectHandle> part4;
  517 + std::vector<QPDFObjectHandle> part6;
  518 + std::vector<QPDFObjectHandle> part7;
  519 + std::vector<QPDFObjectHandle> part8;
  520 + std::vector<QPDFObjectHandle> part9;
  521 +
  522 + // Optimization data
  523 + std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
  524 + std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
  525 +};
  526 +
  527 +// JobSetter class is restricted to QPDFJob.
  528 +class QPDF::JobSetter
  529 +{
  530 + friend class QPDFJob;
  531 +
  532 + private:
  533 + // Enable enhanced warnings for pdf file checking.
  534 + static void
  535 + setCheckMode(QPDF& qpdf, bool val)
  536 + {
  537 + qpdf.m->check_mode = val;
  538 + }
  539 +};
  540 +
  541 +class QPDF::ResolveRecorder
  542 +{
  543 + public:
  544 + ResolveRecorder(QPDF* qpdf, QPDFObjGen const& og) :
  545 + qpdf(qpdf),
  546 + iter(qpdf->m->resolving.insert(og).first)
  547 + {
  548 + }
  549 + virtual ~ResolveRecorder()
  550 + {
  551 + this->qpdf->m->resolving.erase(iter);
  552 + }
  553 +
  554 + private:
  555 + QPDF* qpdf;
  556 + std::set<QPDFObjGen>::const_iterator iter;
  557 +};
  558 +
  559 +#endif // QPDF_PRIVATE_HH
... ...