Commit f49f43fed125eef4f7dd72d1ad73d4c2e306c5bd
1 parent
9641626c
Add separate FUTURE header file for QPDFObjectHandle
Showing
2 changed files
with
1661 additions
and
26 deletions
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -22,23 +22,27 @@ |
| 22 | 22 | #ifndef QPDFOBJECTHANDLE_HH |
| 23 | 23 | #define QPDFOBJECTHANDLE_HH |
| 24 | 24 | |
| 25 | -#include <qpdf/Constants.h> | |
| 26 | -#include <qpdf/DLL.h> | |
| 27 | -#include <qpdf/Types.h> | |
| 28 | - | |
| 29 | -#include <functional> | |
| 30 | -#include <map> | |
| 31 | -#include <memory> | |
| 32 | -#include <set> | |
| 33 | -#include <string> | |
| 34 | -#include <vector> | |
| 35 | - | |
| 36 | -#include <qpdf/Buffer.hh> | |
| 37 | -#include <qpdf/InputSource.hh> | |
| 38 | -#include <qpdf/JSON.hh> | |
| 39 | -#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) | |
| 40 | -#include <qpdf/QPDFObjGen.hh> | |
| 41 | -#include <qpdf/QPDFTokenizer.hh> | |
| 25 | +#ifdef QPDF_FUTURE | |
| 26 | +# include <qpdf/QPDFObjectHandle_future.hh> | |
| 27 | +#else | |
| 28 | + | |
| 29 | +# include <qpdf/Constants.h> | |
| 30 | +# include <qpdf/DLL.h> | |
| 31 | +# include <qpdf/Types.h> | |
| 32 | + | |
| 33 | +# include <functional> | |
| 34 | +# include <map> | |
| 35 | +# include <memory> | |
| 36 | +# include <set> | |
| 37 | +# include <string> | |
| 38 | +# include <vector> | |
| 39 | + | |
| 40 | +# include <qpdf/Buffer.hh> | |
| 41 | +# include <qpdf/InputSource.hh> | |
| 42 | +# include <qpdf/JSON.hh> | |
| 43 | +# include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) | |
| 44 | +# include <qpdf/QPDFObjGen.hh> | |
| 45 | +# include <qpdf/QPDFTokenizer.hh> | |
| 42 | 46 | |
| 43 | 47 | class Pipeline; |
| 44 | 48 | class QPDF; |
| ... | ... | @@ -291,13 +295,6 @@ class QPDFObjectHandle |
| 291 | 295 | QPDF_DLL |
| 292 | 296 | QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default; |
| 293 | 297 | |
| 294 | -#ifdef QPDF_FUTURE | |
| 295 | - QPDF_DLL | |
| 296 | - QPDFObjectHandle(QPDFObjectHandle&&) = default; | |
| 297 | - QPDF_DLL | |
| 298 | - QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default; | |
| 299 | -#endif | |
| 300 | - | |
| 301 | 298 | QPDF_DLL |
| 302 | 299 | inline bool isInitialized() const; |
| 303 | 300 | |
| ... | ... | @@ -1399,7 +1396,7 @@ class QPDFObjectHandle |
| 1399 | 1396 | std::shared_ptr<QPDFObject> obj; |
| 1400 | 1397 | }; |
| 1401 | 1398 | |
| 1402 | -#ifndef QPDF_NO_QPDF_STRING | |
| 1399 | +# ifndef QPDF_NO_QPDF_STRING | |
| 1403 | 1400 | // This is short for QPDFObjectHandle::parse, so you can do |
| 1404 | 1401 | |
| 1405 | 1402 | // auto oh = "<< /Key (value) >>"_qpdf; |
| ... | ... | @@ -1415,7 +1412,7 @@ QPDF_DLL |
| 1415 | 1412 | QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); |
| 1416 | 1413 | /* clang-format on */ |
| 1417 | 1414 | |
| 1418 | -#endif // QPDF_NO_QPDF_STRING | |
| 1415 | +# endif // QPDF_NO_QPDF_STRING | |
| 1419 | 1416 | |
| 1420 | 1417 | class QPDFObjectHandle::QPDFDictItems |
| 1421 | 1418 | { |
| ... | ... | @@ -1637,4 +1634,5 @@ QPDFObjectHandle::isInitialized() const |
| 1637 | 1634 | return obj != nullptr; |
| 1638 | 1635 | } |
| 1639 | 1636 | |
| 1637 | +#endif // QPDF_FUTURE | |
| 1640 | 1638 | #endif // QPDFOBJECTHANDLE_HH | ... | ... |
include/qpdf/QPDFObjectHandle_future.hh
0 → 100644
| 1 | +// Copyright (c) 2005-2024 Jay Berkenbilt | |
| 2 | +// | |
| 3 | +// This file is part of qpdf. | |
| 4 | +// | |
| 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 | +// you may not use this file except in compliance with the License. | |
| 7 | +// You may obtain a copy of the License at | |
| 8 | +// | |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | +// | |
| 11 | +// Unless required by applicable law or agreed to in writing, software | |
| 12 | +// distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | +// See the License for the specific language governing permissions and | |
| 15 | +// limitations under the License. | |
| 16 | +// | |
| 17 | +// Versions of qpdf prior to version 7 were released under the terms | |
| 18 | +// of version 2.0 of the Artistic License. At your option, you may | |
| 19 | +// continue to consider qpdf to be licensed under those terms. Please | |
| 20 | +// see the manual for additional information. | |
| 21 | + | |
| 22 | +#ifndef QPDFOBJECTHANDLE_FUTURE_HH | |
| 23 | +#define QPDFOBJECTHANDLE_FUTURE_HH | |
| 24 | + | |
| 25 | +#include <qpdf/Constants.h> | |
| 26 | +#include <qpdf/DLL.h> | |
| 27 | +#include <qpdf/Types.h> | |
| 28 | + | |
| 29 | +#include <functional> | |
| 30 | +#include <map> | |
| 31 | +#include <memory> | |
| 32 | +#include <set> | |
| 33 | +#include <string> | |
| 34 | +#include <vector> | |
| 35 | + | |
| 36 | +#include <qpdf/Buffer.hh> | |
| 37 | +#include <qpdf/InputSource.hh> | |
| 38 | +#include <qpdf/JSON.hh> | |
| 39 | +#include <qpdf/QPDFObjGen.hh> | |
| 40 | +#include <qpdf/QPDFTokenizer.hh> | |
| 41 | + | |
| 42 | +class Pipeline; | |
| 43 | +class QPDF; | |
| 44 | +class QPDF_Array; | |
| 45 | +class QPDF_Bool; | |
| 46 | +class QPDF_Dictionary; | |
| 47 | +class QPDF_InlineImage; | |
| 48 | +class QPDF_Integer; | |
| 49 | +class QPDF_Name; | |
| 50 | +class QPDF_Null; | |
| 51 | +class QPDF_Operator; | |
| 52 | +class QPDF_Real; | |
| 53 | +class QPDF_Reserved; | |
| 54 | +class QPDF_Stream; | |
| 55 | +class QPDF_String; | |
| 56 | +class QPDFObject; | |
| 57 | +class QPDFTokenizer; | |
| 58 | +class QPDFExc; | |
| 59 | +class Pl_QPDFTokenizer; | |
| 60 | +class QPDFMatrix; | |
| 61 | +class QPDFParser; | |
| 62 | + | |
| 63 | +class QPDFObjectHandle | |
| 64 | +{ | |
| 65 | + friend class QPDFParser; | |
| 66 | + | |
| 67 | + public: | |
| 68 | + // This class is used by replaceStreamData. It provides an alternative way of associating | |
| 69 | + // stream data with a stream. See comments on replaceStreamData and newStream for additional | |
| 70 | + // details. | |
| 71 | + class QPDF_DLL_CLASS StreamDataProvider | |
| 72 | + { | |
| 73 | + public: | |
| 74 | + QPDF_DLL | |
| 75 | + StreamDataProvider(bool supports_retry = false); | |
| 76 | + | |
| 77 | + QPDF_DLL | |
| 78 | + virtual ~StreamDataProvider(); | |
| 79 | + // The implementation of this function must write stream data to the given pipeline. The | |
| 80 | + // stream data must conform to whatever filters are explicitly associated with the stream. | |
| 81 | + // QPDFWriter may, in some cases, add compression, but if it does, it will update the | |
| 82 | + // filters as needed. Every call to provideStreamData for a given stream must write the same | |
| 83 | + // data. Note that, when writing linearized files, qpdf will call your provideStreamData | |
| 84 | + // twice, and if it generates different output, you risk generating invalid output or having | |
| 85 | + // qpdf throw an exception. The object ID and generation passed to this method are those | |
| 86 | + // that belong to the stream on behalf of which the provider is called. They may be ignored | |
| 87 | + // or used by the implementation for indexing or other purposes. This information is made | |
| 88 | + // available just to make it more convenient to use a single StreamDataProvider object to | |
| 89 | + // provide data for multiple streams. | |
| 90 | + | |
| 91 | + // A few things to keep in mind: | |
| 92 | + // | |
| 93 | + // * Stream data providers must not modify any objects since they may be called after some | |
| 94 | + // parts of the file have already been written. | |
| 95 | + // | |
| 96 | + // * Since qpdf may call provideStreamData multiple times when writing linearized files, if | |
| 97 | + // the work done by your stream data provider is slow or computationally intensive, you | |
| 98 | + // might want to implement your own cache. | |
| 99 | + // | |
| 100 | + // * Once you have called replaceStreamData, the original stream data is no longer directly | |
| 101 | + // accessible from the stream, but this is easy to work around by copying the stream to | |
| 102 | + // a separate QPDF object. The qpdf library implements this very efficiently without | |
| 103 | + // actually making a copy of the stream data. You can find examples of this pattern in | |
| 104 | + // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc. | |
| 105 | + | |
| 106 | + // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or | |
| 107 | + // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by | |
| 108 | + // allowing an alternative provideStreamData to be implemented. You must implement at least | |
| 109 | + // one of the versions of provideStreamData below. If you implement the version that | |
| 110 | + // supports retry and returns a value, you should pass true as the value of supports_retry | |
| 111 | + // in the base class constructor. This will cause the library to call that version of the | |
| 112 | + // method, which should also return a boolean indicating whether it ran without errors. | |
| 113 | + QPDF_DLL | |
| 114 | + virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); | |
| 115 | + QPDF_DLL | |
| 116 | + virtual bool provideStreamData( | |
| 117 | + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry); | |
| 118 | + QPDF_DLL virtual void provideStreamData(int objid, int generation, Pipeline* pipeline); | |
| 119 | + QPDF_DLL virtual bool provideStreamData( | |
| 120 | + int objid, int generation, Pipeline* pipeline, bool suppress_warnings, bool will_retry); | |
| 121 | + QPDF_DLL | |
| 122 | + bool supportsRetry(); | |
| 123 | + | |
| 124 | + private: | |
| 125 | + bool supports_retry; | |
| 126 | + }; | |
| 127 | + | |
| 128 | + // The TokenFilter class provides a way to filter content streams in a lexically aware fashion. | |
| 129 | + // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter | |
| 130 | + // methods or can be applied on the spot by filterPageContents. You may also use | |
| 131 | + // Pl_QPDFTokenizer directly if you need full control. | |
| 132 | + // | |
| 133 | + // The handleToken method is called for each token, including the eof token, and then handleEOF | |
| 134 | + // is called at the very end. Handlers may call write (or writeToken) to pass data downstream. | |
| 135 | + // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of | |
| 136 | + // using TokenFilters. | |
| 137 | + // | |
| 138 | + // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you | |
| 139 | + // get the canonical, "parsed" representation of the token. For a string, this means that there | |
| 140 | + // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits) | |
| 141 | + // has been resolved. qpdf's internal representation of a name includes the leading slash. As | |
| 142 | + // such, you can't write the value of token.getValue() directly to output that is supposed to be | |
| 143 | + // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can | |
| 144 | + // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new | |
| 145 | + // string or name token from a canonical representation, use | |
| 146 | + // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or | |
| 147 | + // QPDFTokenizer::Token(QPDFTokenizer::tt_name, | |
| 148 | + // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can | |
| 149 | + // still write them with writeToken(). Example: | |
| 150 | + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) | |
| 151 | + // would write `/text#2fplain`, and | |
| 152 | + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`. | |
| 153 | + class QPDF_DLL_CLASS TokenFilter | |
| 154 | + { | |
| 155 | + public: | |
| 156 | + QPDF_DLL | |
| 157 | + TokenFilter() = default; | |
| 158 | + QPDF_DLL | |
| 159 | + virtual ~TokenFilter() = default; | |
| 160 | + virtual void handleToken(QPDFTokenizer::Token const&) = 0; | |
| 161 | + QPDF_DLL | |
| 162 | + virtual void handleEOF(); | |
| 163 | + | |
| 164 | + class PipelineAccessor | |
| 165 | + { | |
| 166 | + friend class Pl_QPDFTokenizer; | |
| 167 | + | |
| 168 | + private: | |
| 169 | + static void | |
| 170 | + setPipeline(TokenFilter* f, Pipeline* p) | |
| 171 | + { | |
| 172 | + f->setPipeline(p); | |
| 173 | + } | |
| 174 | + }; | |
| 175 | + | |
| 176 | + protected: | |
| 177 | + QPDF_DLL | |
| 178 | + void write(char const* data, size_t len); | |
| 179 | + QPDF_DLL | |
| 180 | + void write(std::string const& str); | |
| 181 | + QPDF_DLL | |
| 182 | + void writeToken(QPDFTokenizer::Token const&); | |
| 183 | + | |
| 184 | + private: | |
| 185 | + QPDF_DLL_PRIVATE | |
| 186 | + void setPipeline(Pipeline*); | |
| 187 | + | |
| 188 | + Pipeline* pipeline; | |
| 189 | + }; | |
| 190 | + | |
| 191 | + // This class is used by parse to decrypt strings when reading an object that contains encrypted | |
| 192 | + // strings. | |
| 193 | + class StringDecrypter | |
| 194 | + { | |
| 195 | + public: | |
| 196 | + QPDF_DLL | |
| 197 | + virtual ~StringDecrypter() = default; | |
| 198 | + virtual void decryptString(std::string& val) = 0; | |
| 199 | + }; | |
| 200 | + | |
| 201 | + // This class is used by parsePageContents. Callers must instantiate a subclass of this with | |
| 202 | + // handlers defined to accept QPDFObjectHandles that are parsed from the stream. | |
| 203 | + class QPDF_DLL_CLASS ParserCallbacks | |
| 204 | + { | |
| 205 | + public: | |
| 206 | + QPDF_DLL | |
| 207 | + virtual ~ParserCallbacks() = default; | |
| 208 | + // One of the handleObject methods must be overridden. | |
| 209 | + QPDF_DLL | |
| 210 | + virtual void handleObject(QPDFObjectHandle); | |
| 211 | + QPDF_DLL | |
| 212 | + virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length); | |
| 213 | + | |
| 214 | + virtual void handleEOF() = 0; | |
| 215 | + | |
| 216 | + // Override this if you want to know the full size of the contents, possibly after | |
| 217 | + // concatenation of multiple streams. This is called before the first call to handleObject. | |
| 218 | + QPDF_DLL | |
| 219 | + virtual void contentSize(size_t); | |
| 220 | + | |
| 221 | + protected: | |
| 222 | + // Implementors may call this method during parsing to terminate parsing early. This method | |
| 223 | + // throws an exception that is caught by parsePageContents, so its effect is immediate. | |
| 224 | + QPDF_DLL | |
| 225 | + void terminateParsing(); | |
| 226 | + }; | |
| 227 | + | |
| 228 | + // Convenience object for rectangles | |
| 229 | + class Rectangle | |
| 230 | + { | |
| 231 | + public: | |
| 232 | + Rectangle() : | |
| 233 | + llx(0.0), | |
| 234 | + lly(0.0), | |
| 235 | + urx(0.0), | |
| 236 | + ury(0.0) | |
| 237 | + { | |
| 238 | + } | |
| 239 | + Rectangle(double llx, double lly, double urx, double ury) : | |
| 240 | + llx(llx), | |
| 241 | + lly(lly), | |
| 242 | + urx(urx), | |
| 243 | + ury(ury) | |
| 244 | + { | |
| 245 | + } | |
| 246 | + | |
| 247 | + double llx; | |
| 248 | + double lly; | |
| 249 | + double urx; | |
| 250 | + double ury; | |
| 251 | + }; | |
| 252 | + | |
| 253 | + // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't | |
| 254 | + // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity | |
| 255 | + // transform matrix and this one is all zeroes. | |
| 256 | + class Matrix | |
| 257 | + { | |
| 258 | + public: | |
| 259 | + Matrix() : | |
| 260 | + a(0.0), | |
| 261 | + b(0.0), | |
| 262 | + c(0.0), | |
| 263 | + d(0.0), | |
| 264 | + e(0.0), | |
| 265 | + f(0.0) | |
| 266 | + { | |
| 267 | + } | |
| 268 | + Matrix(double a, double b, double c, double d, double e, double f) : | |
| 269 | + a(a), | |
| 270 | + b(b), | |
| 271 | + c(c), | |
| 272 | + d(d), | |
| 273 | + e(e), | |
| 274 | + f(f) | |
| 275 | + { | |
| 276 | + } | |
| 277 | + | |
| 278 | + double a; | |
| 279 | + double b; | |
| 280 | + double c; | |
| 281 | + double d; | |
| 282 | + double e; | |
| 283 | + double f; | |
| 284 | + }; | |
| 285 | + | |
| 286 | + QPDF_DLL | |
| 287 | + QPDFObjectHandle() = default; | |
| 288 | + QPDF_DLL | |
| 289 | + QPDFObjectHandle(QPDFObjectHandle const&) = default; | |
| 290 | + QPDF_DLL | |
| 291 | + QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default; | |
| 292 | + | |
| 293 | + QPDF_DLL | |
| 294 | + QPDFObjectHandle(QPDFObjectHandle&&) = default; | |
| 295 | + QPDF_DLL | |
| 296 | + QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default; | |
| 297 | + | |
| 298 | + QPDF_DLL | |
| 299 | + inline bool isInitialized() const; | |
| 300 | + | |
| 301 | + // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying | |
| 302 | + // object, meaning that changes to one are reflected in the other, or "if you paint one, the | |
| 303 | + // other one changes color." This does not perform a structural comparison of the contents of | |
| 304 | + // the objects. | |
| 305 | + QPDF_DLL | |
| 306 | + bool isSameObjectAs(QPDFObjectHandle const&) const; | |
| 307 | + | |
| 308 | + // Return type code and type name of underlying object. These are useful for doing rapid type | |
| 309 | + // tests (like switch statements) or for testing and debugging. | |
| 310 | + QPDF_DLL | |
| 311 | + qpdf_object_type_e getTypeCode(); | |
| 312 | + QPDF_DLL | |
| 313 | + char const* getTypeName(); | |
| 314 | + | |
| 315 | + // Exactly one of these will return true for any initialized object. Operator and InlineImage | |
| 316 | + // are only allowed in content streams. | |
| 317 | + QPDF_DLL | |
| 318 | + bool isBool(); | |
| 319 | + QPDF_DLL | |
| 320 | + bool isNull(); | |
| 321 | + QPDF_DLL | |
| 322 | + bool isInteger(); | |
| 323 | + QPDF_DLL | |
| 324 | + bool isReal(); | |
| 325 | + QPDF_DLL | |
| 326 | + bool isName(); | |
| 327 | + QPDF_DLL | |
| 328 | + bool isString(); | |
| 329 | + QPDF_DLL | |
| 330 | + bool isOperator(); | |
| 331 | + QPDF_DLL | |
| 332 | + bool isInlineImage(); | |
| 333 | + QPDF_DLL | |
| 334 | + bool isArray(); | |
| 335 | + QPDF_DLL | |
| 336 | + bool isDictionary(); | |
| 337 | + QPDF_DLL | |
| 338 | + bool isStream(); | |
| 339 | + QPDF_DLL | |
| 340 | + bool isReserved(); | |
| 341 | + | |
| 342 | + // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended | |
| 343 | + // for internal use, but it can be used as an efficient way to check for nulls that are not | |
| 344 | + // indirect objects. | |
| 345 | + QPDF_DLL | |
| 346 | + bool isDirectNull() const; | |
| 347 | + | |
| 348 | + // This returns true in addition to the query for the specific type for indirect objects. | |
| 349 | + QPDF_DLL | |
| 350 | + inline bool isIndirect() const; | |
| 351 | + | |
| 352 | + // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse | |
| 353 | + // such an object will throw a logic_error. | |
| 354 | + QPDF_DLL | |
| 355 | + bool isDestroyed(); | |
| 356 | + | |
| 357 | + // True for everything except array, dictionary, stream, word, and inline image. | |
| 358 | + QPDF_DLL | |
| 359 | + bool isScalar(); | |
| 360 | + | |
| 361 | + // True if the object is a name object representing the provided name. | |
| 362 | + QPDF_DLL | |
| 363 | + bool isNameAndEquals(std::string const& name); | |
| 364 | + | |
| 365 | + // True if the object is a dictionary of the specified type and subtype, if any. | |
| 366 | + QPDF_DLL | |
| 367 | + bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); | |
| 368 | + | |
| 369 | + // True if the object is a stream of the specified type and subtype, if any. | |
| 370 | + QPDF_DLL | |
| 371 | + bool isStreamOfType(std::string const& type, std::string const& subtype = ""); | |
| 372 | + | |
| 373 | + // Public factory methods | |
| 374 | + | |
| 375 | + // Wrap an object in an array if it is not already an array. This is a helper for cases in which | |
| 376 | + // something in a PDF may either be a single item or an array of items, which is a common idiom. | |
| 377 | + QPDF_DLL | |
| 378 | + QPDFObjectHandle wrapInArray(); | |
| 379 | + | |
| 380 | + // Construct an object of any type from a string representation of the object. Throws QPDFExc | |
| 381 | + // with an empty filename and an offset into the string if there is an error. Any indirect | |
| 382 | + // object syntax (obj gen R) will cause a logic_error exception to be thrown. If | |
| 383 | + // object_description is provided, it will appear in the message of any QPDFExc exception thrown | |
| 384 | + // for invalid syntax. See also the global `operator ""_qpdf` defined below. | |
| 385 | + QPDF_DLL | |
| 386 | + static QPDFObjectHandle | |
| 387 | + parse(std::string const& object_str, std::string const& object_description = ""); | |
| 388 | + | |
| 389 | + // Construct an object of any type from a string representation of the object. Indirect object | |
| 390 | + // syntax (obj gen R) is allowed and will create indirect references within the passed-in | |
| 391 | + // context. If object_description is provided, it will appear in the message of any QPDFExc | |
| 392 | + // exception thrown for invalid syntax. Note that you can't parse an indirect object reference | |
| 393 | + // all by itself as parse will stop at the end of the first complete object, which will just be | |
| 394 | + // the first number and will report that there is trailing data at the end of the string. | |
| 395 | + QPDF_DLL | |
| 396 | + static QPDFObjectHandle | |
| 397 | + parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); | |
| 398 | + | |
| 399 | + // Construct an object as above by reading from the given InputSource at its current position | |
| 400 | + // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted. | |
| 401 | + // This method was intended to be called by QPDF for parsing objects that are ready from the | |
| 402 | + // object's input stream. | |
| 403 | + QPDF_DLL | |
| 404 | + static QPDFObjectHandle parse( | |
| 405 | + std::shared_ptr<InputSource> input, | |
| 406 | + std::string const& object_description, | |
| 407 | + QPDFTokenizer&, | |
| 408 | + bool& empty, | |
| 409 | + StringDecrypter* decrypter, | |
| 410 | + QPDF* context); | |
| 411 | + | |
| 412 | + // Return the offset where the object was found when parsed. A negative value means that the | |
| 413 | + // object was created without parsing. If the object is in a stream, the offset is from the | |
| 414 | + // beginning of the stream. Otherwise, the offset is from the beginning of the file. | |
| 415 | + QPDF_DLL | |
| 416 | + qpdf_offset_t getParsedOffset(); | |
| 417 | + | |
| 418 | + // Older method: stream_or_array should be the value of /Contents from a page object. It's more | |
| 419 | + // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error | |
| 420 | + // messages will also be more useful because the page object information will be known. | |
| 421 | + QPDF_DLL | |
| 422 | + static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); | |
| 423 | + | |
| 424 | + // When called on a stream or stream array that is some page's content streams, do the same as | |
| 425 | + // pipePageContents. This method is a lower level way to do what | |
| 426 | + // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a | |
| 427 | + // contents object that is disconnected from a page object. The description argument should | |
| 428 | + // describe the containing page and is used in error messages. The all_description argument is | |
| 429 | + // initialized to something that could be used to describe the result of the pipeline. It is the | |
| 430 | + // description amended with the identifiers of the underlying objects. Please note that if there | |
| 431 | + // is an array of content streams, p->finish() is called after each stream. If you pass a | |
| 432 | + // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an | |
| 433 | + // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the | |
| 434 | + // end. | |
| 435 | + QPDF_DLL | |
| 436 | + void | |
| 437 | + pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); | |
| 438 | + | |
| 439 | + // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream | |
| 440 | + // data is passed through the token filter after all original filters but before content stream | |
| 441 | + // normalization if requested. This is a low-level interface to add it to a stream. You will | |
| 442 | + // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be | |
| 443 | + // applied to a page object, and which will automatically handle the case of pages whose | |
| 444 | + // contents are split across multiple streams. | |
| 445 | + QPDF_DLL | |
| 446 | + void addTokenFilter(std::shared_ptr<TokenFilter> token_filter); | |
| 447 | + | |
| 448 | + // Legacy helpers for parsing content streams. These methods are not going away, but newer code | |
| 449 | + // should call the correspond methods in QPDFPageObjectHelper instead. The specification and | |
| 450 | + // behavior of these methods are the same as the identically named methods in that class, but | |
| 451 | + // newer functionality will be added there. | |
| 452 | + QPDF_DLL | |
| 453 | + void parsePageContents(ParserCallbacks* callbacks); | |
| 454 | + QPDF_DLL | |
| 455 | + void filterPageContents(TokenFilter* filter, Pipeline* next = nullptr); | |
| 456 | + // See comments for QPDFPageObjectHelper::pipeContents. | |
| 457 | + QPDF_DLL | |
| 458 | + void pipePageContents(Pipeline* p); | |
| 459 | + QPDF_DLL | |
| 460 | + void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter); | |
| 461 | + // End legacy content stream helpers | |
| 462 | + | |
| 463 | + // Called on a stream to filter the stream as if it were page contents. This can be used to | |
| 464 | + // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream. | |
| 465 | + QPDF_DLL | |
| 466 | + void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); | |
| 467 | + // Called on a stream to parse the stream as page contents. This can be used to parse a form | |
| 468 | + // XObject. | |
| 469 | + QPDF_DLL | |
| 470 | + void parseAsContents(ParserCallbacks* callbacks); | |
| 471 | + | |
| 472 | + // Type-specific factories | |
| 473 | + QPDF_DLL | |
| 474 | + static QPDFObjectHandle newNull(); | |
| 475 | + QPDF_DLL | |
| 476 | + static QPDFObjectHandle newBool(bool value); | |
| 477 | + QPDF_DLL | |
| 478 | + static QPDFObjectHandle newInteger(long long value); | |
| 479 | + QPDF_DLL | |
| 480 | + static QPDFObjectHandle newReal(std::string const& value); | |
| 481 | + QPDF_DLL | |
| 482 | + static QPDFObjectHandle | |
| 483 | + newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); | |
| 484 | + // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes, | |
| 485 | + // excluding the NUL character, and starting with a slash. Name objects as represented in the | |
| 486 | + // PDF specification can contain characters escaped with #, but such escaping is not of concern | |
| 487 | + // when calling QPDFObjectHandle methods not directly relating to parsing. For example, | |
| 488 | + // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return | |
| 489 | + // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse() | |
| 490 | + // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and | |
| 491 | + // modifying objects, you want to work with the internal, canonical representation. For names | |
| 492 | + // containing alphanumeric characters, dashes, and underscores, there is no difference between | |
| 493 | + // the two representations. For a lengthy discussion, see | |
| 494 | + // https://github.com/qpdf/qpdf/discussions/625. | |
| 495 | + QPDF_DLL | |
| 496 | + static QPDFObjectHandle newName(std::string const& name); | |
| 497 | + QPDF_DLL | |
| 498 | + static QPDFObjectHandle newString(std::string const& str); | |
| 499 | + // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in | |
| 500 | + // PDF files outside of content streams, such as in document metadata form field values, page | |
| 501 | + // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16 | |
| 502 | + // as needed to successfully encode all the characters. | |
| 503 | + QPDF_DLL | |
| 504 | + static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); | |
| 505 | + QPDF_DLL | |
| 506 | + static QPDFObjectHandle newOperator(std::string const&); | |
| 507 | + QPDF_DLL | |
| 508 | + static QPDFObjectHandle newInlineImage(std::string const&); | |
| 509 | + QPDF_DLL | |
| 510 | + static QPDFObjectHandle newArray(); | |
| 511 | + QPDF_DLL | |
| 512 | + static QPDFObjectHandle newArray(std::vector<QPDFObjectHandle> const& items); | |
| 513 | + QPDF_DLL | |
| 514 | + static QPDFObjectHandle newArray(Rectangle const&); | |
| 515 | + QPDF_DLL | |
| 516 | + static QPDFObjectHandle newArray(Matrix const&); | |
| 517 | + QPDF_DLL | |
| 518 | + static QPDFObjectHandle newArray(QPDFMatrix const&); | |
| 519 | + QPDF_DLL | |
| 520 | + static QPDFObjectHandle newDictionary(); | |
| 521 | + QPDF_DLL | |
| 522 | + static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items); | |
| 523 | + | |
| 524 | + // Create an array from a rectangle. Equivalent to the rectangle form of newArray. | |
| 525 | + QPDF_DLL | |
| 526 | + static QPDFObjectHandle newFromRectangle(Rectangle const&); | |
| 527 | + // Create an array from a matrix. Equivalent to the matrix form of newArray. | |
| 528 | + QPDF_DLL | |
| 529 | + static QPDFObjectHandle newFromMatrix(Matrix const&); | |
| 530 | + QPDF_DLL | |
| 531 | + static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); | |
| 532 | + | |
| 533 | + // Note: new stream creation methods have were added to the QPDF class starting with | |
| 534 | + // version 11.2.0. The ones in this class are here for backward compatibility. | |
| 535 | + | |
| 536 | + // Create a new stream and associate it with the given qpdf object. A subsequent call must be | |
| 537 | + // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be | |
| 538 | + // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively, | |
| 539 | + // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can | |
| 540 | + // call QPDF::newStream() instead. | |
| 541 | + QPDF_DLL | |
| 542 | + static QPDFObjectHandle newStream(QPDF* qpdf); | |
| 543 | + | |
| 544 | + // Create a new stream and associate it with the given qpdf object. Use the given buffer as the | |
| 545 | + // stream data. The stream dictionary's /Length key will automatically be set to the size of the | |
| 546 | + // data buffer. If additional keys are required, the stream's dictionary may be retrieved by | |
| 547 | + // calling getDict(), and the resulting dictionary may be modified. This method is just a | |
| 548 | + // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience | |
| 549 | + // methods for streams that require no parameters beyond the stream length. Note that you don't | |
| 550 | + // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will | |
| 551 | + // automatically compress uncompressed stream data. Example programs are provided that | |
| 552 | + // illustrate this. From QPDF 11.2, you can call QPDF::newStream() | |
| 553 | + // instead. | |
| 554 | + QPDF_DLL | |
| 555 | + static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data); | |
| 556 | + | |
| 557 | + // Create new stream with data from string. This method will create a copy of the data rather | |
| 558 | + // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream. | |
| 559 | + // From QPDF 11.2, you can call QPDF::newStream() instead. | |
| 560 | + QPDF_DLL | |
| 561 | + static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); | |
| 562 | + | |
| 563 | + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is | |
| 564 | + // going to be added to the QPDF object. Normally you don't have to use this type since you can | |
| 565 | + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects | |
| 566 | + // with circular references, you may need to create a reserved object so that you can have a | |
| 567 | + // reference to it and then replace the object later. Reserved objects have the special | |
| 568 | + // property that they can't be resolved to direct objects. This makes it possible to replace a | |
| 569 | + // reserved object with a new object while preserving existing references to them. When you are | |
| 570 | + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this | |
| 571 | + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a | |
| 572 | + // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call | |
| 573 | + // QPDF::newReserved() instead. | |
| 574 | + QPDF_DLL | |
| 575 | + static QPDFObjectHandle newReserved(QPDF* qpdf); | |
| 576 | + | |
| 577 | + // Provide an owning qpdf and object description. The library does this automatically with | |
| 578 | + // objects that are read from the input PDF and with objects that are created programmatically | |
| 579 | + // and inserted into the QPDF as a new indirect object. Most end user code will not need to call | |
| 580 | + // this. If an object has an owning qpdf and object description, it enables qpdf to give | |
| 581 | + // warnings with proper context in some cases where it would otherwise raise exceptions. It is | |
| 582 | + // okay to add objects without an owning_qpdf to objects that have one, but it is an error to | |
| 583 | + // have a QPDF contain objects with owning_qpdf set to something else. To add objects from | |
| 584 | + // another qpdf, use copyForeignObject instead. | |
| 585 | + QPDF_DLL | |
| 586 | + void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); | |
| 587 | + QPDF_DLL | |
| 588 | + bool hasObjectDescription(); | |
| 589 | + | |
| 590 | + // Accessor methods | |
| 591 | + // | |
| 592 | + // (Note: this comment is referenced in qpdf-c.h and the manual.) | |
| 593 | + // | |
| 594 | + // In PDF files, objects have specific types, but there is nothing that prevents PDF files from | |
| 595 | + // containing objects of types that aren't expected by the specification. | |
| 596 | + // | |
| 597 | + // There are two flavors of accessor methods: | |
| 598 | + // | |
| 599 | + // * getSomethingValue() returns the value and issues a type warning if the type is incorrect. | |
| 600 | + // | |
| 601 | + // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns | |
| 602 | + // true and initializes a reference of the appropriate type. These methods never issue type | |
| 603 | + // warnings. | |
| 604 | + // | |
| 605 | + // The getSomethingValue() accessors and some of the other methods expect objects of a | |
| 606 | + // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as | |
| 607 | + // trying to get a dictionary key from an array, trying to get the string value of a number, | |
| 608 | + // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using | |
| 609 | + // the following behavior: | |
| 610 | + // | |
| 611 | + // * Requesting a value of the wrong type (int value from string, array item from a scalar or | |
| 612 | + // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for | |
| 613 | + // number, the empty string for string, or the null object for an object handle. | |
| 614 | + // | |
| 615 | + // * Accessing an array item that is out of bounds will return a null object. | |
| 616 | + // | |
| 617 | + // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to | |
| 618 | + // a scalar or array) will be ignored. | |
| 619 | + // | |
| 620 | + // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5, | |
| 621 | + // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code | |
| 622 | + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case | |
| 623 | + // for all objects whose origin was a PDF file), the warning is issued using the normal warning | |
| 624 | + // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them. | |
| 625 | + // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created | |
| 626 | + // programmatically), an exception will be thrown. | |
| 627 | + // | |
| 628 | + // The way to avoid getting any type warnings or exceptions, even when working with malformed | |
| 629 | + // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for | |
| 630 | + // example, make sure that isString() returns true before calling getStringValue()) and to | |
| 631 | + // always be sure that any array indices are in bounds. | |
| 632 | + // | |
| 633 | + // For additional discussion and rationale for this behavior, see the section in the QPDF manual | |
| 634 | + // entitled "Object Accessor Methods". | |
| 635 | + | |
| 636 | + // Methods for bool objects | |
| 637 | + QPDF_DLL | |
| 638 | + bool getBoolValue(); | |
| 639 | + QPDF_DLL | |
| 640 | + bool getValueAsBool(bool&); | |
| 641 | + | |
| 642 | + // Methods for integer objects. Note: if an integer value is too big (too far away from zero in | |
| 643 | + // either direction) to fit in the requested return type, the maximum or minimum value for that | |
| 644 | + // return type may be returned. For example, on a system with 32-bit int, a numeric object with | |
| 645 | + // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX. | |
| 646 | + QPDF_DLL | |
| 647 | + long long getIntValue(); | |
| 648 | + QPDF_DLL | |
| 649 | + bool getValueAsInt(long long&); | |
| 650 | + QPDF_DLL | |
| 651 | + int getIntValueAsInt(); | |
| 652 | + QPDF_DLL | |
| 653 | + bool getValueAsInt(int&); | |
| 654 | + QPDF_DLL | |
| 655 | + unsigned long long getUIntValue(); | |
| 656 | + QPDF_DLL | |
| 657 | + bool getValueAsUInt(unsigned long long&); | |
| 658 | + QPDF_DLL | |
| 659 | + unsigned int getUIntValueAsUInt(); | |
| 660 | + QPDF_DLL | |
| 661 | + bool getValueAsUInt(unsigned int&); | |
| 662 | + | |
| 663 | + // Methods for real objects | |
| 664 | + QPDF_DLL | |
| 665 | + std::string getRealValue(); | |
| 666 | + QPDF_DLL | |
| 667 | + bool getValueAsReal(std::string&); | |
| 668 | + | |
| 669 | + // Methods that work for both integer and real objects | |
| 670 | + QPDF_DLL | |
| 671 | + bool isNumber(); | |
| 672 | + QPDF_DLL | |
| 673 | + double getNumericValue(); | |
| 674 | + QPDF_DLL | |
| 675 | + bool getValueAsNumber(double&); | |
| 676 | + | |
| 677 | + // Methods for name objects. The returned name value is in qpdf's canonical form with all | |
| 678 | + // escaping resolved. See comments for newName() for details. | |
| 679 | + QPDF_DLL | |
| 680 | + std::string getName(); | |
| 681 | + QPDF_DLL | |
| 682 | + bool getValueAsName(std::string&); | |
| 683 | + | |
| 684 | + // Methods for string objects | |
| 685 | + QPDF_DLL | |
| 686 | + std::string getStringValue(); | |
| 687 | + QPDF_DLL | |
| 688 | + bool getValueAsString(std::string&); | |
| 689 | + | |
| 690 | + // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise, | |
| 691 | + // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to | |
| 692 | + // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters | |
| 693 | + // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range | |
| 694 | + // unmapped. | |
| 695 | + QPDF_DLL | |
| 696 | + std::string getUTF8Value(); | |
| 697 | + QPDF_DLL | |
| 698 | + bool getValueAsUTF8(std::string&); | |
| 699 | + | |
| 700 | + // Methods for content stream objects | |
| 701 | + QPDF_DLL | |
| 702 | + std::string getOperatorValue(); | |
| 703 | + QPDF_DLL | |
| 704 | + bool getValueAsOperator(std::string&); | |
| 705 | + QPDF_DLL | |
| 706 | + std::string getInlineImageValue(); | |
| 707 | + QPDF_DLL | |
| 708 | + bool getValueAsInlineImage(std::string&); | |
| 709 | + | |
| 710 | + // Methods for array objects; see also name and array objects. | |
| 711 | + | |
| 712 | + // Return an object that enables iteration over members. You can do | |
| 713 | + // | |
| 714 | + // for (auto iter: obj.aitems()) | |
| 715 | + // { | |
| 716 | + // // iter is an array element | |
| 717 | + // } | |
| 718 | + class QPDFArrayItems; | |
| 719 | + QPDF_DLL | |
| 720 | + QPDFArrayItems aitems(); | |
| 721 | + | |
| 722 | + QPDF_DLL | |
| 723 | + int getArrayNItems(); | |
| 724 | + QPDF_DLL | |
| 725 | + QPDFObjectHandle getArrayItem(int n); | |
| 726 | + // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling | |
| 727 | + // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of | |
| 728 | + // nulls. | |
| 729 | + QPDF_DLL | |
| 730 | + std::vector<QPDFObjectHandle> getArrayAsVector(); | |
| 731 | + QPDF_DLL | |
| 732 | + bool isRectangle(); | |
| 733 | + // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the | |
| 734 | + // rectangle [0, 0, 0, 0] | |
| 735 | + QPDF_DLL | |
| 736 | + Rectangle getArrayAsRectangle(); | |
| 737 | + QPDF_DLL | |
| 738 | + bool isMatrix(); | |
| 739 | + // If the array is an array of six numeric values, return as a matrix. Otherwise, return the | |
| 740 | + // matrix [1, 0, 0, 1, 0, 0] | |
| 741 | + QPDF_DLL | |
| 742 | + Matrix getArrayAsMatrix(); | |
| 743 | + | |
| 744 | + // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as | |
| 745 | + // canonical name strings starting with a leading slash and not containing any PDF syntax | |
| 746 | + // escaping. See comments for getName() for details. | |
| 747 | + | |
| 748 | + // Return an object that enables iteration over members. You can do | |
| 749 | + // | |
| 750 | + // for (auto iter: obj.ditems()) | |
| 751 | + // { | |
| 752 | + // // iter.first is the key | |
| 753 | + // // iter.second is the value | |
| 754 | + // } | |
| 755 | + class QPDFDictItems; | |
| 756 | + QPDF_DLL | |
| 757 | + QPDFDictItems ditems(); | |
| 758 | + | |
| 759 | + // Return true if key is present. Keys with null values are treated as if they are not present. | |
| 760 | + // This is as per the PDF spec. | |
| 761 | + QPDF_DLL | |
| 762 | + bool hasKey(std::string const&); | |
| 763 | + // Return the value for the key. If the key is not present, null is returned. | |
| 764 | + QPDF_DLL | |
| 765 | + QPDFObjectHandle getKey(std::string const&); | |
| 766 | + // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access | |
| 767 | + // lower-level dictionaries, as in | |
| 768 | + // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); | |
| 769 | + QPDF_DLL | |
| 770 | + QPDFObjectHandle getKeyIfDict(std::string const&); | |
| 771 | + // Return all keys. Keys with null values are treated as if they are not present. This is as | |
| 772 | + // per the PDF spec. | |
| 773 | + QPDF_DLL | |
| 774 | + std::set<std::string> getKeys(); | |
| 775 | + // Return dictionary as a map. Entries with null values are included. | |
| 776 | + QPDF_DLL | |
| 777 | + std::map<std::string, QPDFObjectHandle> getDictAsMap(); | |
| 778 | + | |
| 779 | + // Methods for name and array objects. The name value is in qpdf's canonical form with all | |
| 780 | + // escaping resolved. See comments for newName() for details. | |
| 781 | + QPDF_DLL | |
| 782 | + bool isOrHasName(std::string const&); | |
| 783 | + | |
| 784 | + // Make all resources in a resource dictionary indirect. This just goes through all entries of | |
| 785 | + // top-level subdictionaries and converts any direct objects to indirect objects. This can be | |
| 786 | + // useful to call before mergeResources if it is going to be called multiple times to prevent | |
| 787 | + // resources from being copied multiple times. | |
| 788 | + QPDF_DLL | |
| 789 | + void makeResourcesIndirect(QPDF& owning_qpdf); | |
| 790 | + | |
| 791 | + // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in | |
| 792 | + // dictionary subitems are resolved, and "conflicts" is initialized to a map such that | |
| 793 | + // conflicts[resource_type][old_key] == [new_key] | |
| 794 | + // | |
| 795 | + // See also makeResourcesIndirect, which can be useful to call before calling this. | |
| 796 | + // | |
| 797 | + // This method does nothing if both this object and the other object are not dictionaries. | |
| 798 | + // Otherwise, it has following behavior, where "object" refers to the object whose method is | |
| 799 | + // invoked, and "other" refers to the argument: | |
| 800 | + // | |
| 801 | + // * For each key in "other" whose value is an array: | |
| 802 | + // * If "object" does not have that entry, shallow copy it. | |
| 803 | + // * Otherwise, if "object" has an array in the same place, append to that array any objects | |
| 804 | + // in "other"'s array that are not already present. | |
| 805 | + // * For each key in "other" whose value is a dictionary: | |
| 806 | + // * If "object" does not have that entry, shallow copy it. | |
| 807 | + // * Otherwise, for each key in the subdictionary: | |
| 808 | + // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if | |
| 809 | + // indirect. | |
| 810 | + // * Otherwise, if conflicts are being detected: | |
| 811 | + // * If there is a key (oldkey) already in the dictionary that points to the same indirect | |
| 812 | + // destination as key, indicate that key was replaced by oldkey. This would happen if | |
| 813 | + // these two resource dictionaries have previously been merged. | |
| 814 | + // * Otherwise pick a new key (newkey) that is unique within the resource dictionary, | |
| 815 | + // store that in the resource dictionary with key's destination as its destination, and | |
| 816 | + // indicate that key was replaced by newkey. | |
| 817 | + // | |
| 818 | + // The primary purpose of this method is to facilitate merging of resource dictionaries that are | |
| 819 | + // supposed to have the same scope as each other. For example, this can be used to merge a form | |
| 820 | + // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The | |
| 821 | + // "conflicts" parameter may be previously initialized. This method adds to whatever is already | |
| 822 | + // there, which can be useful when merging with multiple things. | |
| 823 | + QPDF_DLL | |
| 824 | + void mergeResources( | |
| 825 | + QPDFObjectHandle other, | |
| 826 | + std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr); | |
| 827 | + | |
| 828 | + // Get all resource names from a resource dictionary. If this object is a dictionary, this | |
| 829 | + // method returns a set of all the keys in all top-level subdictionaries. For resources | |
| 830 | + // dictionaries, this is the collection of names that may be referenced in the content stream. | |
| 831 | + QPDF_DLL | |
| 832 | + std::set<std::string> getResourceNames(); | |
| 833 | + | |
| 834 | + // Find a unique name within a resource dictionary starting with a given prefix. This method | |
| 835 | + // works by appending a number to the given prefix. It searches starting with min_suffix and | |
| 836 | + // sets min_suffix to selected value upon return. This can be used to increase efficiency if | |
| 837 | + // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next | |
| 838 | + // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling | |
| 839 | + // this multiple times on the same resource dictionary, you can initialize resource_names by | |
| 840 | + // calling getResourceNames(), incrementally update it as you add resources, and keep passing it | |
| 841 | + // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time | |
| 842 | + // it's called. | |
| 843 | + QPDF_DLL | |
| 844 | + std::string getUniqueResourceName( | |
| 845 | + std::string const& prefix, | |
| 846 | + int& min_suffix, | |
| 847 | + std::set<std::string>* resource_names = nullptr); | |
| 848 | + | |
| 849 | + // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF | |
| 850 | + // object. Indirect objects always have an owning QPDF. Direct objects that are read from the | |
| 851 | + // input source will also have an owning QPDF. Programmatically created objects will only have | |
| 852 | + // one if setObjectDescription was called. | |
| 853 | + // | |
| 854 | + // When the QPDF object that owns an object is destroyed, the object is changed into a null, and | |
| 855 | + // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the | |
| 856 | + // life of the QPDF. If in doubt, ask for it each time you need it. | |
| 857 | + | |
| 858 | + // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it | |
| 859 | + // returns a null pointer. Use this when you are able to handle the case of an object that | |
| 860 | + // doesn't have an owning QPDF. | |
| 861 | + QPDF_DLL | |
| 862 | + QPDF* getOwningQPDF() const; | |
| 863 | + // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a | |
| 864 | + // runtime_error. Use this when you know the object has to have an owning QPDF, such as when | |
| 865 | + // it's a known indirect object. Since streams are always indirect objects, this method can be | |
| 866 | + // used safely for streams. If error_msg is specified, it will be used at the contents of the | |
| 867 | + // runtime_error if there is now owner. | |
| 868 | + QPDF_DLL | |
| 869 | + QPDF& getQPDF(std::string const& error_msg = "") const; | |
| 870 | + | |
| 871 | + // Create a shallow copy of an object as a direct object, but do not traverse across indirect | |
| 872 | + // object boundaries. That means that, for dictionaries and arrays, any keys or items that were | |
| 873 | + // indirect objects will still be indirect objects that point to the same place. In the | |
| 874 | + // strictest sense, this is not a shallow copy because it recursively descends arrays and | |
| 875 | + // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You | |
| 876 | + // can't copy a stream this way. See copyStream() instead. | |
| 877 | + QPDF_DLL | |
| 878 | + QPDFObjectHandle shallowCopy(); | |
| 879 | + | |
| 880 | + // Create a true shallow copy of an array or dictionary, just copying the immediate items | |
| 881 | + // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in | |
| 882 | + // the copy, you are modifying the original, which is almost never what you want. However, if | |
| 883 | + // your intention is merely to *replace* top-level items or keys and not to modify lower-level | |
| 884 | + // items in the copy, this method is much faster than shallowCopy(). | |
| 885 | + QPDF_DLL | |
| 886 | + QPDFObjectHandle unsafeShallowCopy(); | |
| 887 | + | |
| 888 | + // Create a copy of this stream. The new stream and the old stream are independent: after the | |
| 889 | + // copy, either the original or the copy's dictionary or data can be modified without affecting | |
| 890 | + // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's | |
| 891 | + // data are made. If the source stream's data is already being provided by a StreamDataProvider, | |
| 892 | + // the new stream will use the same one, so you have to make sure your StreamDataProvider can | |
| 893 | + // handle that case. But if you're already using a StreamDataProvider, you probably don't need | |
| 894 | + // to call this method. | |
| 895 | + QPDF_DLL | |
| 896 | + QPDFObjectHandle copyStream(); | |
| 897 | + | |
| 898 | + // Mutator methods. | |
| 899 | + | |
| 900 | + // Since qpdf 11: for mutators that may add or remove an item, there are additional versions | |
| 901 | + // whose names contain "AndGet" that return the added or removed item. For example: | |
| 902 | + // | |
| 903 | + // auto new_dict = dict.replaceKeyAndGetNew( | |
| 904 | + // "/New", QPDFObjectHandle::newDictionary()); | |
| 905 | + // | |
| 906 | + // auto old_value = dict.replaceKeyAndGetOld( | |
| 907 | + // "/New", "(something)"_qpdf); | |
| 908 | + | |
| 909 | + // Recursively copy this object, making it direct. An exception is thrown if a loop is detected. | |
| 910 | + // With allow_streams true, keep indirect object references to streams. Otherwise, throw an | |
| 911 | + // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream | |
| 912 | + // is found, the resulting object is still associated with the containing qpdf. When | |
| 913 | + // allow_streams is false, the object will no longer be connected to the original QPDF object | |
| 914 | + // after this call completes successfully. | |
| 915 | + QPDF_DLL | |
| 916 | + void makeDirect(bool allow_streams = false); | |
| 917 | + | |
| 918 | + // Mutator methods for array objects | |
| 919 | + QPDF_DLL | |
| 920 | + void setArrayItem(int, QPDFObjectHandle const&); | |
| 921 | + QPDF_DLL | |
| 922 | + void setArrayFromVector(std::vector<QPDFObjectHandle> const& items); | |
| 923 | + // Insert an item before the item at the given position ("at") so that it has that position | |
| 924 | + // after insertion. If "at" is equal to the size of the array, insert the item at the end. | |
| 925 | + QPDF_DLL | |
| 926 | + void insertItem(int at, QPDFObjectHandle const& item); | |
| 927 | + // Like insertItem but return the item that was inserted. | |
| 928 | + QPDF_DLL | |
| 929 | + QPDFObjectHandle insertItemAndGetNew(int at, QPDFObjectHandle const& item); | |
| 930 | + // Append an item to an array. | |
| 931 | + QPDF_DLL | |
| 932 | + void appendItem(QPDFObjectHandle const& item); | |
| 933 | + // Append an item, and return the newly added item. | |
| 934 | + QPDF_DLL | |
| 935 | + QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); | |
| 936 | + // Remove the item at that position, reducing the size of the array by one. | |
| 937 | + QPDF_DLL | |
| 938 | + void eraseItem(int at); | |
| 939 | + // Erase and item and return the item that was removed. | |
| 940 | + QPDF_DLL | |
| 941 | + QPDFObjectHandle eraseItemAndGetOld(int at); | |
| 942 | + | |
| 943 | + // Mutator methods for dictionary objects | |
| 944 | + | |
| 945 | + // Replace value of key, adding it if it does not exist. If value is null, remove the key. | |
| 946 | + QPDF_DLL | |
| 947 | + void replaceKey(std::string const& key, QPDFObjectHandle const& value); | |
| 948 | + // Replace value of key and return the value. | |
| 949 | + QPDF_DLL | |
| 950 | + QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); | |
| 951 | + // Replace value of key and return the old value, or null if the key was previously not present. | |
| 952 | + QPDF_DLL | |
| 953 | + QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); | |
| 954 | + // Remove key, doing nothing if key does not exist. | |
| 955 | + QPDF_DLL | |
| 956 | + void removeKey(std::string const& key); | |
| 957 | + // Remove key and return the old value. If the old value didn't exist, return a null object. | |
| 958 | + QPDF_DLL | |
| 959 | + QPDFObjectHandle removeKeyAndGetOld(std::string const& key); | |
| 960 | + | |
| 961 | + // ABI: Remove in qpdf 12 | |
| 962 | + [[deprecated("use replaceKey -- it does the same thing")]] QPDF_DLL void | |
| 963 | + replaceOrRemoveKey(std::string const& key, QPDFObjectHandle const&); | |
| 964 | + | |
| 965 | + // Methods for stream objects | |
| 966 | + QPDF_DLL | |
| 967 | + QPDFObjectHandle getDict(); | |
| 968 | + | |
| 969 | + // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode | |
| 970 | + // level, whether compression is enabled, and its ability to filter. Passing false will prevent | |
| 971 | + // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding | |
| 972 | + // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and | |
| 973 | + // recompressing a stream that it knows how to operate on for any application-specific reason, | |
| 974 | + // such as that you have already optimized its filtering. Note that this doesn't affect any | |
| 975 | + // other ways to get the stream's data, such as pipeStreamData or getStreamData. | |
| 976 | + QPDF_DLL | |
| 977 | + void setFilterOnWrite(bool); | |
| 978 | + QPDF_DLL | |
| 979 | + bool getFilterOnWrite(); | |
| 980 | + | |
| 981 | + // If addTokenFilter has been called for this stream, then the original data should be | |
| 982 | + // considered to be modified. This means we should avoid optimizations such as not filtering a | |
| 983 | + // stream that is already compressed. | |
| 984 | + QPDF_DLL | |
| 985 | + bool isDataModified(); | |
| 986 | + | |
| 987 | + // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered | |
| 988 | + // and we can't decode it. | |
| 989 | + QPDF_DLL | |
| 990 | + std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); | |
| 991 | + | |
| 992 | + // Returns unfiltered (raw) stream data. | |
| 993 | + QPDF_DLL | |
| 994 | + std::shared_ptr<Buffer> getRawStreamData(); | |
| 995 | + | |
| 996 | + // Write stream data through the given pipeline. A null pipeline value may be used if all you | |
| 997 | + // want to do is determine whether a stream is filterable and would be filtered based on the | |
| 998 | + // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags | |
| 999 | + // alter the behavior in the following way: | |
| 1000 | + // | |
| 1001 | + // encode_flags: | |
| 1002 | + // | |
| 1003 | + // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are | |
| 1004 | + // applied. | |
| 1005 | + // | |
| 1006 | + // qpdf_sf_normalize -- tokenize as content stream and normalize tokens | |
| 1007 | + // | |
| 1008 | + // decode_level: | |
| 1009 | + // | |
| 1010 | + // qpdf_dl_none -- do not decode any streams. | |
| 1011 | + // | |
| 1012 | + // qpdf_dl_generalized -- decode supported general-purpose filters. This includes | |
| 1013 | + // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode. | |
| 1014 | + // | |
| 1015 | + // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy | |
| 1016 | + // specialized filters. This includes /RunLengthDecode. | |
| 1017 | + // | |
| 1018 | + // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported | |
| 1019 | + // lossy filters. This includes /DCTDecode. | |
| 1020 | + // | |
| 1021 | + // If, based on the flags and the filters and decode parameters, we determine that we know how | |
| 1022 | + // to apply all requested filters, do so and return true if we are successful. | |
| 1023 | + // | |
| 1024 | + // The exact meaning of the return value differs the different versions of this function, but | |
| 1025 | + // for any version, the meaning has been the same. For the main version, added in qpdf 10, the | |
| 1026 | + // return value indicates whether the overall operation succeeded. The filter parameter, if | |
| 1027 | + // specified, will be set to whether or not filtering was attempted. If filtering was not | |
| 1028 | + // requested, this value will be false even if the overall operation succeeded. | |
| 1029 | + // | |
| 1030 | + // If filtering is requested but this method returns false, it means there was some error in the | |
| 1031 | + // filtering, in which case the resulting data is likely partially filtered and/or incomplete | |
| 1032 | + // and may not be consistent with the configured filters. QPDFWriter handles this by attempting | |
| 1033 | + // to get the stream data without filtering, but callers should consider a false return value | |
| 1034 | + // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry | |
| 1035 | + // in that case, pass true as the value of will_retry. This changes the warning issued by the | |
| 1036 | + // library to indicate that the operation will be retried without filtering to avoid data loss. | |
| 1037 | + | |
| 1038 | + // Return value is overall success, even if filtering is not requested. | |
| 1039 | + QPDF_DLL | |
| 1040 | + bool pipeStreamData( | |
| 1041 | + Pipeline*, | |
| 1042 | + bool* filtering_attempted, | |
| 1043 | + int encode_flags, | |
| 1044 | + qpdf_stream_decode_level_e decode_level, | |
| 1045 | + bool suppress_warnings = false, | |
| 1046 | + bool will_retry = false); | |
| 1047 | + | |
| 1048 | + // Legacy version. Return value is whether filtering was attempted. There is no way to determine | |
| 1049 | + // success if filtering was not attempted. | |
| 1050 | + QPDF_DLL | |
| 1051 | + bool pipeStreamData( | |
| 1052 | + Pipeline*, | |
| 1053 | + int encode_flags, | |
| 1054 | + qpdf_stream_decode_level_e decode_level, | |
| 1055 | + bool suppress_warnings = false, | |
| 1056 | + bool will_retry = false); | |
| 1057 | + | |
| 1058 | + // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows: | |
| 1059 | + // filter = false -> encode_flags = 0 | |
| 1060 | + // filter = true -> decode_level = qpdf_dl_generalized | |
| 1061 | + // normalize = true -> encode_flags |= qpdf_sf_normalize | |
| 1062 | + // compress = true -> encode_flags |= qpdf_sf_compress | |
| 1063 | + // Return value is whether filtering was attempted. | |
| 1064 | + QPDF_DLL | |
| 1065 | + bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); | |
| 1066 | + | |
| 1067 | + // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data. | |
| 1068 | + // This is most appropriately used when creating streams from scratch that will use a stream | |
| 1069 | + // data provider and therefore start with an empty dictionary. It may be more convenient in | |
| 1070 | + // this case than calling getDict and modifying it for each key. The pdf-create example does | |
| 1071 | + // this. | |
| 1072 | + QPDF_DLL | |
| 1073 | + void replaceDict(QPDFObjectHandle const&); | |
| 1074 | + | |
| 1075 | + // REPLACING STREAM DATA | |
| 1076 | + | |
| 1077 | + // Note about all replaceStreamData methods: whatever values are passed as filter and | |
| 1078 | + // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object | |
| 1079 | + // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf | |
| 1080 | + // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing | |
| 1081 | + // values untouched. | |
| 1082 | + | |
| 1083 | + // Replace this stream's stream data with the given data buffer. The stream's /Length key is | |
| 1084 | + // replaced with the length of the data buffer. The stream is interpreted as if the data read | |
| 1085 | + // from the file, after any decryption filters have been applied, is as presented. | |
| 1086 | + QPDF_DLL | |
| 1087 | + void replaceStreamData( | |
| 1088 | + std::shared_ptr<Buffer> data, | |
| 1089 | + QPDFObjectHandle const& filter, | |
| 1090 | + QPDFObjectHandle const& decode_parms); | |
| 1091 | + | |
| 1092 | + // Replace the stream's stream data with the given string. This method will create a copy of the | |
| 1093 | + // data rather than using the user-provided buffer as in the std::shared_ptr<Buffer> version of | |
| 1094 | + // replaceStreamData. | |
| 1095 | + QPDF_DLL | |
| 1096 | + void replaceStreamData( | |
| 1097 | + std::string const& data, | |
| 1098 | + QPDFObjectHandle const& filter, | |
| 1099 | + QPDFObjectHandle const& decode_parms); | |
| 1100 | + | |
| 1101 | + // As above, replace this stream's stream data. Instead of directly providing a buffer with the | |
| 1102 | + // stream data, call the given provider's provideStreamData method. See comments on the | |
| 1103 | + // StreamDataProvider class (defined above) for details on the method. The data must be | |
| 1104 | + // consistent with filter and decode_parms as provided. Although it is more complex to use this | |
| 1105 | + // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid | |
| 1106 | + // allocating memory for the stream data. Example programs are provided that use both forms of | |
| 1107 | + // replaceStreamData. | |
| 1108 | + | |
| 1109 | + // Note about stream length: for any given stream, the provider must provide the same amount of | |
| 1110 | + // data each time it is called. This is critical for making linearization work properly. | |
| 1111 | + // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with | |
| 1112 | + // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data | |
| 1113 | + // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is | |
| 1114 | + // enforced that the length be the same as the first time. | |
| 1115 | + | |
| 1116 | + // If you have gotten a compile error here while building code that worked with older versions | |
| 1117 | + // of qpdf, just omit the length parameter. You can also simplify your code by not having to | |
| 1118 | + // compute the length in advance. | |
| 1119 | + QPDF_DLL | |
| 1120 | + void replaceStreamData( | |
| 1121 | + std::shared_ptr<StreamDataProvider> provider, | |
| 1122 | + QPDFObjectHandle const& filter, | |
| 1123 | + QPDFObjectHandle const& decode_parms); | |
| 1124 | + | |
| 1125 | + // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider. | |
| 1126 | + | |
| 1127 | + // The provider should write the stream data to the pipeline. For a one-liner to replace stream | |
| 1128 | + // data with the contents of a file, pass QUtil::file_provider(filename) as provider. | |
| 1129 | + QPDF_DLL | |
| 1130 | + void replaceStreamData( | |
| 1131 | + std::function<void(Pipeline*)> provider, | |
| 1132 | + QPDFObjectHandle const& filter, | |
| 1133 | + QPDFObjectHandle const& decode_parms); | |
| 1134 | + // The provider should write the stream data to the pipeline, returning true if it succeeded | |
| 1135 | + // without errors. | |
| 1136 | + QPDF_DLL | |
| 1137 | + void replaceStreamData( | |
| 1138 | + std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider, | |
| 1139 | + QPDFObjectHandle const& filter, | |
| 1140 | + QPDFObjectHandle const& decode_parms); | |
| 1141 | + | |
| 1142 | + // Access object ID and generation. For direct objects, return object ID 0. | |
| 1143 | + | |
| 1144 | + // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to | |
| 1145 | + // the pattern of depending on object ID or generation without the other. In general, when | |
| 1146 | + // keeping track of object IDs, it's better to use QPDFObjGen instead. | |
| 1147 | + | |
| 1148 | + QPDF_DLL | |
| 1149 | + QPDFObjGen getObjGen() const; | |
| 1150 | + QPDF_DLL | |
| 1151 | + inline int getObjectID() const; | |
| 1152 | + QPDF_DLL | |
| 1153 | + inline int getGeneration() const; | |
| 1154 | + | |
| 1155 | + QPDF_DLL | |
| 1156 | + std::string unparse(); | |
| 1157 | + QPDF_DLL | |
| 1158 | + std::string unparseResolved(); | |
| 1159 | + // For strings only, force binary representation. Otherwise, same as unparse. | |
| 1160 | + QPDF_DLL | |
| 1161 | + std::string unparseBinary(); | |
| 1162 | + | |
| 1163 | + // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available | |
| 1164 | + // JSON version. The JSON is generated as follows: | |
| 1165 | + // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their | |
| 1166 | + // native JSON types. | |
| 1167 | + // * Names are encoded as strings representing the canonical representation (after parsing #xx) | |
| 1168 | + // and preceded by a slash, just as unparse() returns. For example, the JSON for the | |
| 1169 | + // PDF-syntax name /Text#2fPlain would be "/Text/Plain". | |
| 1170 | + // * Indirect references are encoded as strings containing "obj gen R" | |
| 1171 | + // * Strings | |
| 1172 | + // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters | |
| 1173 | + // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode | |
| 1174 | + // mappings are not reversible. There is no way to tell the difference between a string that | |
| 1175 | + // looks like a name or indirect object from an actual name or indirect object. | |
| 1176 | + // * JSON v2: | |
| 1177 | + // * Unicode strings and strings encoded with PDF Doc encoding that can be bidirectionally | |
| 1178 | + // mapped to Unicode (which is all strings without undefined characters) are represented | |
| 1179 | + // as "u:" followed by the UTF-8 encoded string. Example: | |
| 1180 | + // "u:potato". | |
| 1181 | + // * All other strings are represented as "b:" followed by a hexadecimal encoding of the | |
| 1182 | + // string. Example: "b:0102cacb" | |
| 1183 | + // * Streams | |
| 1184 | + // * JSON v1: Only the stream's dictionary is encoded. There is no way to tell a stream from a | |
| 1185 | + // dictionary other than context. | |
| 1186 | + // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the | |
| 1187 | + // stream's dictionary. Since "dict" does not otherwise represent anything, this is | |
| 1188 | + // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data. | |
| 1189 | + // * Object types that are only valid in content streams (inline image, operator) are serialized | |
| 1190 | + // as "null". Attempting to serialize a "reserved" object is an error. | |
| 1191 | + // If dereference_indirect is true and this is an indirect object, show the actual contents of | |
| 1192 | + // the object. The effect of dereference_indirect applies only to this object. It is not | |
| 1193 | + // recursive. | |
| 1194 | + QPDF_DLL | |
| 1195 | + JSON getJSON(int json_version, bool dereference_indirect = false); | |
| 1196 | + | |
| 1197 | + // Write the object encoded as JSON to a pipeline. This is equivalent to, but more efficient | |
| 1198 | + // than, calling getJSON(json_version, dereference_indirect).write(p, depth). See the | |
| 1199 | + // documentation for getJSON and JSON::write for further detail. | |
| 1200 | + QPDF_DLL | |
| 1201 | + void | |
| 1202 | + writeJSON(int json_version, Pipeline* p, bool dereference_indirect = false, size_t depth = 0); | |
| 1203 | + | |
| 1204 | + // Deprecated version uses v1 for backward compatibility. | |
| 1205 | + // ABI: remove for qpdf 12 | |
| 1206 | + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON | |
| 1207 | + getJSON(bool dereference_indirect = false); | |
| 1208 | + | |
| 1209 | + // This method can be called on a stream to get a more extended JSON representation of the | |
| 1210 | + // stream that includes the stream's data. The JSON object returned is always a dictionary whose | |
| 1211 | + // "dict" key is an encoding of the stream's dictionary. The representation of the data is | |
| 1212 | + // determined by the json_data field. | |
| 1213 | + // | |
| 1214 | + // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file. | |
| 1215 | + // | |
| 1216 | + // If json_data is qpdf_sj_none, stream data is not represented. | |
| 1217 | + // | |
| 1218 | + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on | |
| 1219 | + // the value of decode_level, which has the same meaning as with pipeStreamData. | |
| 1220 | + // | |
| 1221 | + // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data" | |
| 1222 | + // field of the dictionary that is returned. | |
| 1223 | + // | |
| 1224 | + // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be | |
| 1225 | + // supplied. The value of data_filename is stored in the resulting json in the "datafile" key | |
| 1226 | + // but is not otherwise use. The stream data itself (raw or filtered depending on decode level), | |
| 1227 | + // is written to the pipeline via pipeStreamData(). | |
| 1228 | + // | |
| 1229 | + // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must | |
| 1230 | + // remain valid until after the JSON object is written. | |
| 1231 | + QPDF_DLL | |
| 1232 | + JSON getStreamJSON( | |
| 1233 | + int json_version, | |
| 1234 | + qpdf_json_stream_data_e json_data, | |
| 1235 | + qpdf_stream_decode_level_e decode_level, | |
| 1236 | + Pipeline* p, | |
| 1237 | + std::string const& data_filename); | |
| 1238 | + | |
| 1239 | + // Legacy helper methods for commonly performed operations on pages. Newer code should use | |
| 1240 | + // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as | |
| 1241 | + // the identically named methods in that class, but newer functionality will be added there. | |
| 1242 | + QPDF_DLL | |
| 1243 | + std::map<std::string, QPDFObjectHandle> getPageImages(); | |
| 1244 | + QPDF_DLL | |
| 1245 | + std::vector<QPDFObjectHandle> getPageContents(); | |
| 1246 | + QPDF_DLL | |
| 1247 | + void addPageContents(QPDFObjectHandle contents, bool first); | |
| 1248 | + QPDF_DLL | |
| 1249 | + void rotatePage(int angle, bool relative); | |
| 1250 | + QPDF_DLL | |
| 1251 | + void coalesceContentStreams(); | |
| 1252 | + // End legacy page helpers | |
| 1253 | + | |
| 1254 | + // Issue a warning about this object if possible. If the object has a description, a warning | |
| 1255 | + // will be issued using the owning QPDF as context. Otherwise, a message will be written to the | |
| 1256 | + // default logger's error stream, which is standard error if not overridden. Objects read | |
| 1257 | + // normally from the file have descriptions. See comments on setObjectDescription for additional | |
| 1258 | + // details. | |
| 1259 | + QPDF_DLL | |
| 1260 | + void warnIfPossible(std::string const& warning); | |
| 1261 | + | |
| 1262 | + // Provide access to specific classes for recursive disconnected(). | |
| 1263 | + class DisconnectAccess | |
| 1264 | + { | |
| 1265 | + friend class QPDF_Dictionary; | |
| 1266 | + friend class QPDF_Stream; | |
| 1267 | + | |
| 1268 | + private: | |
| 1269 | + static void | |
| 1270 | + disconnect(QPDFObjectHandle o) | |
| 1271 | + { | |
| 1272 | + o.disconnect(); | |
| 1273 | + } | |
| 1274 | + }; | |
| 1275 | + | |
| 1276 | + // Convenience routine: Throws if the assumption is violated. Your code will be better if you | |
| 1277 | + // call one of the isType methods and handle the case of the type being wrong, but these can be | |
| 1278 | + // convenient if you have already verified the type. | |
| 1279 | + QPDF_DLL | |
| 1280 | + void assertInitialized() const; | |
| 1281 | + | |
| 1282 | + QPDF_DLL | |
| 1283 | + void assertNull(); | |
| 1284 | + QPDF_DLL | |
| 1285 | + void assertBool(); | |
| 1286 | + QPDF_DLL | |
| 1287 | + void assertInteger(); | |
| 1288 | + QPDF_DLL | |
| 1289 | + void assertReal(); | |
| 1290 | + QPDF_DLL | |
| 1291 | + void assertName(); | |
| 1292 | + QPDF_DLL | |
| 1293 | + void assertString(); | |
| 1294 | + QPDF_DLL | |
| 1295 | + void assertOperator(); | |
| 1296 | + QPDF_DLL | |
| 1297 | + void assertInlineImage(); | |
| 1298 | + QPDF_DLL | |
| 1299 | + void assertArray(); | |
| 1300 | + QPDF_DLL | |
| 1301 | + void assertDictionary(); | |
| 1302 | + QPDF_DLL | |
| 1303 | + void assertStream(); | |
| 1304 | + QPDF_DLL | |
| 1305 | + void assertReserved(); | |
| 1306 | + | |
| 1307 | + QPDF_DLL | |
| 1308 | + void assertIndirect(); | |
| 1309 | + QPDF_DLL | |
| 1310 | + void assertScalar(); | |
| 1311 | + QPDF_DLL | |
| 1312 | + void assertNumber(); | |
| 1313 | + | |
| 1314 | + // The isPageObject method checks the /Type key of the object. This is not completely reliable | |
| 1315 | + // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is | |
| 1316 | + // slightly more accepting but may still return false here when treating the object as a page | |
| 1317 | + // would work. Use this sparingly. | |
| 1318 | + QPDF_DLL | |
| 1319 | + bool isPageObject(); | |
| 1320 | + QPDF_DLL | |
| 1321 | + bool isPagesObject(); | |
| 1322 | + QPDF_DLL | |
| 1323 | + void assertPageObject(); | |
| 1324 | + | |
| 1325 | + QPDF_DLL | |
| 1326 | + bool isFormXObject(); | |
| 1327 | + | |
| 1328 | + // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as | |
| 1329 | + // images. | |
| 1330 | + QPDF_DLL | |
| 1331 | + bool isImage(bool exclude_imagemask = true); | |
| 1332 | + | |
| 1333 | + // The following methods do not form part of the public API and are for internal use only. | |
| 1334 | + | |
| 1335 | + QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) : | |
| 1336 | + obj(obj) | |
| 1337 | + { | |
| 1338 | + } | |
| 1339 | + std::shared_ptr<QPDFObject> | |
| 1340 | + getObj() | |
| 1341 | + { | |
| 1342 | + return obj; | |
| 1343 | + } | |
| 1344 | + std::shared_ptr<QPDFObject> | |
| 1345 | + getObj() const | |
| 1346 | + { | |
| 1347 | + return obj; | |
| 1348 | + } | |
| 1349 | + QPDFObject* | |
| 1350 | + getObjectPtr() | |
| 1351 | + { | |
| 1352 | + return obj.get(); | |
| 1353 | + } | |
| 1354 | + QPDFObject* const | |
| 1355 | + getObjectPtr() const | |
| 1356 | + { | |
| 1357 | + return obj.get(); | |
| 1358 | + } | |
| 1359 | + | |
| 1360 | + void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false); | |
| 1361 | + | |
| 1362 | + private: | |
| 1363 | + QPDF_Array* asArray() const; | |
| 1364 | + QPDF_Bool* asBool() const; | |
| 1365 | + QPDF_Dictionary* asDictionary() const; | |
| 1366 | + QPDF_InlineImage* asInlineImage() const; | |
| 1367 | + QPDF_Integer* asInteger() const; | |
| 1368 | + QPDF_Name* asName() const; | |
| 1369 | + QPDF_Null* asNull() const; | |
| 1370 | + QPDF_Operator* asOperator() const; | |
| 1371 | + QPDF_Real* asReal() const; | |
| 1372 | + QPDF_Reserved* asReserved() const; | |
| 1373 | + QPDF_Stream* asStream() const; | |
| 1374 | + QPDF_Stream* asStreamWithAssert(); | |
| 1375 | + QPDF_String* asString() const; | |
| 1376 | + | |
| 1377 | + void typeWarning(char const* expected_type, std::string const& warning); | |
| 1378 | + void objectWarning(std::string const& warning); | |
| 1379 | + void assertType(char const* type_name, bool istype); | |
| 1380 | + void makeDirect(QPDFObjGen::set& visited, bool stop_at_streams); | |
| 1381 | + void disconnect(); | |
| 1382 | + void setParsedOffset(qpdf_offset_t offset); | |
| 1383 | + void parseContentStream_internal(std::string const& description, ParserCallbacks* callbacks); | |
| 1384 | + static void parseContentStream_data( | |
| 1385 | + std::shared_ptr<Buffer>, | |
| 1386 | + std::string const& description, | |
| 1387 | + ParserCallbacks* callbacks, | |
| 1388 | + QPDF* context); | |
| 1389 | + std::vector<QPDFObjectHandle> | |
| 1390 | + arrayOrStreamToStreamArray(std::string const& description, std::string& all_description); | |
| 1391 | + static void warn(QPDF*, QPDFExc const&); | |
| 1392 | + void checkOwnership(QPDFObjectHandle const&) const; | |
| 1393 | + | |
| 1394 | + // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance | |
| 1395 | + // penalty since QPDFObjectHandle objects are copied around so frequently. | |
| 1396 | + std::shared_ptr<QPDFObject> obj; | |
| 1397 | +}; | |
| 1398 | + | |
| 1399 | +#ifndef QPDF_NO_QPDF_STRING | |
| 1400 | +// This is short for QPDFObjectHandle::parse, so you can do | |
| 1401 | + | |
| 1402 | +// auto oh = "<< /Key (value) >>"_qpdf; | |
| 1403 | + | |
| 1404 | +// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration | |
| 1405 | +// from being here. | |
| 1406 | + | |
| 1407 | +/* clang-format off */ | |
| 1408 | +// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest | |
| 1409 | +// of the file as a string if clang-format removes the space after "operator", and as of | |
| 1410 | +// clang-format 15, there's no way to prevent it from doing so. | |
| 1411 | +QPDF_DLL | |
| 1412 | +QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); | |
| 1413 | +/* clang-format on */ | |
| 1414 | + | |
| 1415 | +#endif // QPDF_NO_QPDF_STRING | |
| 1416 | + | |
| 1417 | +class QPDFObjectHandle::QPDFDictItems | |
| 1418 | +{ | |
| 1419 | + // This class allows C++-style iteration, including range-for iteration, around dictionaries. | |
| 1420 | + // You can write | |
| 1421 | + | |
| 1422 | + // for (auto iter: QPDFDictItems(dictionary_obj)) | |
| 1423 | + // { | |
| 1424 | + // // iter.first is a string | |
| 1425 | + // // iter.second is a QPDFObjectHandle | |
| 1426 | + // } | |
| 1427 | + | |
| 1428 | + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. | |
| 1429 | + | |
| 1430 | + public: | |
| 1431 | + QPDF_DLL | |
| 1432 | + QPDFDictItems(QPDFObjectHandle const& oh); | |
| 1433 | + | |
| 1434 | + class iterator | |
| 1435 | + { | |
| 1436 | + friend class QPDFDictItems; | |
| 1437 | + | |
| 1438 | + public: | |
| 1439 | + typedef std::pair<std::string, QPDFObjectHandle> T; | |
| 1440 | + using iterator_category = std::bidirectional_iterator_tag; | |
| 1441 | + using value_type = T; | |
| 1442 | + using difference_type = long; | |
| 1443 | + using pointer = T*; | |
| 1444 | + using reference = T&; | |
| 1445 | + | |
| 1446 | + QPDF_DLL | |
| 1447 | + virtual ~iterator() = default; | |
| 1448 | + QPDF_DLL | |
| 1449 | + iterator& operator++(); | |
| 1450 | + QPDF_DLL | |
| 1451 | + iterator | |
| 1452 | + operator++(int) | |
| 1453 | + { | |
| 1454 | + iterator t = *this; | |
| 1455 | + ++(*this); | |
| 1456 | + return t; | |
| 1457 | + } | |
| 1458 | + QPDF_DLL | |
| 1459 | + iterator& operator--(); | |
| 1460 | + QPDF_DLL | |
| 1461 | + iterator | |
| 1462 | + operator--(int) | |
| 1463 | + { | |
| 1464 | + iterator t = *this; | |
| 1465 | + --(*this); | |
| 1466 | + return t; | |
| 1467 | + } | |
| 1468 | + QPDF_DLL | |
| 1469 | + reference operator*(); | |
| 1470 | + QPDF_DLL | |
| 1471 | + pointer operator->(); | |
| 1472 | + QPDF_DLL | |
| 1473 | + bool operator==(iterator const& other) const; | |
| 1474 | + QPDF_DLL | |
| 1475 | + bool | |
| 1476 | + operator!=(iterator const& other) const | |
| 1477 | + { | |
| 1478 | + return !operator==(other); | |
| 1479 | + } | |
| 1480 | + | |
| 1481 | + private: | |
| 1482 | + iterator(QPDFObjectHandle& oh, bool for_begin); | |
| 1483 | + void updateIValue(); | |
| 1484 | + | |
| 1485 | + class Members | |
| 1486 | + { | |
| 1487 | + friend class QPDFDictItems::iterator; | |
| 1488 | + | |
| 1489 | + public: | |
| 1490 | + QPDF_DLL | |
| 1491 | + ~Members() = default; | |
| 1492 | + | |
| 1493 | + private: | |
| 1494 | + Members(QPDFObjectHandle& oh, bool for_begin); | |
| 1495 | + Members() = delete; | |
| 1496 | + Members(Members const&) = delete; | |
| 1497 | + | |
| 1498 | + QPDFObjectHandle& oh; | |
| 1499 | + std::set<std::string> keys; | |
| 1500 | + std::set<std::string>::iterator iter; | |
| 1501 | + bool is_end; | |
| 1502 | + }; | |
| 1503 | + std::shared_ptr<Members> m; | |
| 1504 | + value_type ivalue; | |
| 1505 | + }; | |
| 1506 | + | |
| 1507 | + QPDF_DLL | |
| 1508 | + iterator begin(); | |
| 1509 | + QPDF_DLL | |
| 1510 | + iterator end(); | |
| 1511 | + | |
| 1512 | + private: | |
| 1513 | + QPDFObjectHandle oh; | |
| 1514 | +}; | |
| 1515 | + | |
| 1516 | +class QPDFObjectHandle::QPDFArrayItems | |
| 1517 | +{ | |
| 1518 | + // This class allows C++-style iteration, including range-for iteration, around arrays. You can | |
| 1519 | + // write | |
| 1520 | + | |
| 1521 | + // for (auto iter: QPDFArrayItems(array_obj)) | |
| 1522 | + // { | |
| 1523 | + // // iter is a QPDFObjectHandle | |
| 1524 | + // } | |
| 1525 | + | |
| 1526 | + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. | |
| 1527 | + | |
| 1528 | + public: | |
| 1529 | + QPDF_DLL | |
| 1530 | + QPDFArrayItems(QPDFObjectHandle const& oh); | |
| 1531 | + | |
| 1532 | + class iterator | |
| 1533 | + { | |
| 1534 | + friend class QPDFArrayItems; | |
| 1535 | + | |
| 1536 | + public: | |
| 1537 | + typedef QPDFObjectHandle T; | |
| 1538 | + using iterator_category = std::bidirectional_iterator_tag; | |
| 1539 | + using value_type = T; | |
| 1540 | + using difference_type = long; | |
| 1541 | + using pointer = T*; | |
| 1542 | + using reference = T&; | |
| 1543 | + | |
| 1544 | + QPDF_DLL | |
| 1545 | + virtual ~iterator() = default; | |
| 1546 | + QPDF_DLL | |
| 1547 | + iterator& operator++(); | |
| 1548 | + QPDF_DLL | |
| 1549 | + iterator | |
| 1550 | + operator++(int) | |
| 1551 | + { | |
| 1552 | + iterator t = *this; | |
| 1553 | + ++(*this); | |
| 1554 | + return t; | |
| 1555 | + } | |
| 1556 | + QPDF_DLL | |
| 1557 | + iterator& operator--(); | |
| 1558 | + QPDF_DLL | |
| 1559 | + iterator | |
| 1560 | + operator--(int) | |
| 1561 | + { | |
| 1562 | + iterator t = *this; | |
| 1563 | + --(*this); | |
| 1564 | + return t; | |
| 1565 | + } | |
| 1566 | + QPDF_DLL | |
| 1567 | + reference operator*(); | |
| 1568 | + QPDF_DLL | |
| 1569 | + pointer operator->(); | |
| 1570 | + QPDF_DLL | |
| 1571 | + bool operator==(iterator const& other) const; | |
| 1572 | + QPDF_DLL | |
| 1573 | + bool | |
| 1574 | + operator!=(iterator const& other) const | |
| 1575 | + { | |
| 1576 | + return !operator==(other); | |
| 1577 | + } | |
| 1578 | + | |
| 1579 | + private: | |
| 1580 | + iterator(QPDFObjectHandle& oh, bool for_begin); | |
| 1581 | + void updateIValue(); | |
| 1582 | + | |
| 1583 | + class Members | |
| 1584 | + { | |
| 1585 | + friend class QPDFArrayItems::iterator; | |
| 1586 | + | |
| 1587 | + public: | |
| 1588 | + QPDF_DLL | |
| 1589 | + ~Members() = default; | |
| 1590 | + | |
| 1591 | + private: | |
| 1592 | + Members(QPDFObjectHandle& oh, bool for_begin); | |
| 1593 | + Members() = delete; | |
| 1594 | + Members(Members const&) = delete; | |
| 1595 | + | |
| 1596 | + QPDFObjectHandle& oh; | |
| 1597 | + int item_number; | |
| 1598 | + bool is_end; | |
| 1599 | + }; | |
| 1600 | + std::shared_ptr<Members> m; | |
| 1601 | + value_type ivalue; | |
| 1602 | + }; | |
| 1603 | + | |
| 1604 | + QPDF_DLL | |
| 1605 | + iterator begin(); | |
| 1606 | + QPDF_DLL | |
| 1607 | + iterator end(); | |
| 1608 | + | |
| 1609 | + private: | |
| 1610 | + QPDFObjectHandle oh; | |
| 1611 | +}; | |
| 1612 | + | |
| 1613 | +inline int | |
| 1614 | +QPDFObjectHandle::getObjectID() const | |
| 1615 | +{ | |
| 1616 | + return getObjGen().getObj(); | |
| 1617 | +} | |
| 1618 | + | |
| 1619 | +inline int | |
| 1620 | +QPDFObjectHandle::getGeneration() const | |
| 1621 | +{ | |
| 1622 | + return getObjGen().getGen(); | |
| 1623 | +} | |
| 1624 | + | |
| 1625 | +inline bool | |
| 1626 | +QPDFObjectHandle::isIndirect() const | |
| 1627 | +{ | |
| 1628 | + return (obj != nullptr) && (getObjectID() != 0); | |
| 1629 | +} | |
| 1630 | + | |
| 1631 | +inline bool | |
| 1632 | +QPDFObjectHandle::isInitialized() const | |
| 1633 | +{ | |
| 1634 | + return obj != nullptr; | |
| 1635 | +} | |
| 1636 | + | |
| 1637 | +#endif // QPDFOBJECTHANDLE_FUTURE_HH | ... | ... |