diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 5f990b7..8d6e149 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -62,75 +62,82 @@ class QPDF QPDF_DLL static std::shared_ptr create(); - // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not - // read until they are needed. A QPDF object may be associated with only one file in its - // lifetime. This method must be called before any methods that potentially ask for information - // about the PDF file are called. Prior to calling this, the only methods that are allowed are - // those that set parameters. If the input file is not encrypted, either a null password or an - // empty password can be used. If the file is encrypted, either the user password or the owner - // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this - // method or any of the other process methods to force the password to be interpreted as a raw - // encryption key. See comments on setPasswordIsHexKey for more information. + /*! \brief Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not + * read until they are needed. A QPDF object may be associated with only one file in its + * lifetime. This method must be called before any methods that potentially ask for information + * about the PDF file are called. Prior to calling this, the only methods that are allowed are + * those that set parameters. If the input file is not encrypted, either a null password or an + * empty password can be used. If the file is encrypted, either the user password or the owner + * password may be supplied. The method setPasswordIsHexKey may be called prior to calling this + * method or any of the other process methods to force the password to be interpreted as a raw + * encryption key. See comments on setPasswordIsHexKey for more information. + */ QPDF_DLL void processFile(char const* filename, char const* password = nullptr); - // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. - // It may be open read only. This works exactly like processFile except that the PDF file is - // read from an already opened FILE*. If close_file is true, the file will be closed at the - // end. Otherwise, the caller is responsible for closing the file. + /*! \brief Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. + * It may be open read only. This works exactly like processFile except that the PDF file is + * read from an already opened FILE*. If close_file is true, the file will be closed at the + * end. Otherwise, the caller is responsible for closing the file. + */ QPDF_DLL void processFile( char const* description, FILE* file, bool close_file, char const* password = nullptr); - // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except - // that the PDF file is in memory instead of on disk. The description appears in any warning or - // error message in place of the file name. The buffer is owned by the caller and must remain - // valid for the lifetime of the QPDF object. + /*! \brief Parse a PDF file loaded into a memory buffer. This works exactly like processFile except + * that the PDF file is in memory instead of on disk. The description appears in any warning or + * error message in place of the file name. The buffer is owned by the caller and must remain + * valid for the lifetime of the QPDF object. + */ QPDF_DLL void processMemoryFile( char const* description, char const* buf, size_t length, char const* password = nullptr); - // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving - // a PDF file, you can subclass InputSource and use this method. + /*! \brief Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving + * a PDF file, you can subclass InputSource and use this method. + */ QPDF_DLL void processInputSource(std::shared_ptr, char const* password = nullptr); - // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf - // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See - // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not - // load stream data into memory for more than one stream at a time, even if the stream data is - // specified inline. + /*! \brief Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf + * --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See + * "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not + * load stream data into memory for more than one stream at a time, even if the stream data is + * specified inline. + */ QPDF_DLL void createFromJSON(std::string const& json_file); QPDF_DLL void createFromJSON(std::shared_ptr); - // Update a PDF from an input source that contains JSON in the same format as is written by - // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the - // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the - // input JSON may be arbitrarily large. + /*! \brief Update a PDF from an input source that contains JSON in the same format as is written by + * writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the + * JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the + * input JSON may be arbitrarily large. + */ QPDF_DLL void updateFromJSON(std::string const& json_file); QPDF_DLL void updateFromJSON(std::shared_ptr); - // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() - // method is not called on the pipeline. - // - // The decode_level parameter controls which streams are uncompressed in the JSON. Use - // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible - // values for json_stream_data can be found in qpdf/Constants.h and correspond to the - // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix - // must be specified. Each stream will be written to a file whose path is constructed by - // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If - // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in - // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are - // ignored. This corresponds to the --json-object command-line argument. - // - // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large - // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the - // JSON output in memory, but do so with caution as this will allocate enough memory to hold the - // entire PDF file. + /*! \brief Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() + * method is not called on the pipeline. + * + * The decode_level parameter controls which streams are uncompressed in the JSON. Use + * qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible + * values for json_stream_data can be found in qpdf/Constants.h and correspond to the + * --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix + * must be specified. Each stream will be written to a file whose path is constructed by + * appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If + * wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in + * wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are + * ignored. This corresponds to the --json-object command-line argument. + * + * QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large + * PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the + * JSON output in memory, but do so with caution as this will allocate enough memory to hold the + * entire PDF file. + */ QPDF_DLL void writeJSON( int version, @@ -140,13 +147,14 @@ class QPDF std::string const& file_prefix, std::set wanted_objects); - // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. - // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is - // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value - // are written to the pipeline assuming that a dictionary is already open. The parameter - // first_key indicates whether this is the first key in an in-progress dictionary. It will be - // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a - // prettified JSON output. Remaining arguments are the same as the above version. + /*! \brief This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. + * If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is + * written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value + * are written to the pipeline assuming that a dictionary is already open. The parameter + * first_key indicates whether this is the first key in an in-progress dictionary. It will be + * set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a + * prettified JSON output. Remaining arguments are the same as the above version. + */ QPDF_DLL void writeJSON( int version, @@ -158,140 +166,155 @@ class QPDF std::string const& file_prefix, std::set wanted_objects); - // Close or otherwise release the input source. Once this has been called, no other methods of - // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been - // called, it is safe to perform operations on the input file such as deleting or renaming it. + /*! \brief Close or otherwise release the input source. Once this has been called, no other methods of + * qpdf can be called safely except for getWarnings and anyWarnings(). After this has been + * called, it is safe to perform operations on the input file such as deleting or renaming it. + */ QPDF_DLL void closeInputSource(); - // For certain forensic or investigatory purposes, it may sometimes be useful to specify the - // encryption key directly, even though regular PDF applications do not provide a way to do - // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass - // the normal encryption key computation or recovery mechanisms and interpret the bytes in the - // password as a hex-encoded encryption key. Note that we hex-encode the key because it may - // contain null bytes and therefore can't be represented in a char const*. + /*! \brief For certain forensic or investigatory purposes, it may sometimes be useful to specify the + * encryption key directly, even though regular PDF applications do not provide a way to do + * this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass + * the normal encryption key computation or recovery mechanisms and interpret the bytes in the + * password as a hex-encoded encryption key. Note that we hex-encode the key because it may + * contain null bytes and therefore can't be represented in a char const*. + */ QPDF_DLL void setPasswordIsHexKey(bool); - // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal - // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other - // objects can be added to the file in the normal way, and the trailer and document catalog can - // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF - // file. See the pdf-create.cc example for a demonstration of how to use this method to create - // a PDF file from scratch. + /*! \brief Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal + * trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other + * objects can be added to the file in the normal way, and the trailer and document catalog can + * be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF + * file. See the pdf-create.cc example for a demonstration of how to use this method to create + * a PDF file from scratch. + */ QPDF_DLL void emptyPDF(); - // From 10.1: register a new filter implementation for a specific stream filter. You can add - // your own implementations for new filter types or override existing ones provided by the - // library. Registered stream filters are used for decoding only as you can override encoding - // with stream data providers. For example, you could use this method to add support for one of - // the other filter types by using additional third-party libraries that qpdf does not presently - // use. The standard filters are implemented using QPDFStreamFilter classes. + /*! \brief From 10.1: register a new filter implementation for a specific stream filter. You can add + * your own implementations for new filter types or override existing ones provided by the + * library. Registered stream filters are used for decoding only as you can override encoding + * with stream data providers. For example, you could use this method to add support for one of + * the other filter types by using additional third-party libraries that qpdf does not presently + * use. The standard filters are implemented using QPDFStreamFilter classes. + */ QPDF_DLL static void registerStreamFilter( std::string const& filter_name, std::function()> factory); // Parameter settings - // To capture or redirect output, configure the logger returned by getLogger(). By default, all - // QPDF and QPDFJob objects share the global logger. If you need a private logger for some - // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on - // configuring the logger. - // - // Note that no normal QPDF operations generate output to standard output, so for applications - // that just wish to avoid creating output for warnings and don't call any check functions, - // calling setSuppressWarnings(true) is sufficient. + /*! \brief To capture or redirect output, configure the logger returned by getLogger(). By default, all + * QPDF and QPDFJob objects share the global logger. If you need a private logger for some + * reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on + * configuring the logger. + * + * Note that no normal QPDF operations generate output to standard output, so for applications + * that just wish to avoid creating output for warnings and don't call any check functions, + * calling setSuppressWarnings(true) is sufficient. + */ QPDF_DLL std::shared_ptr getLogger(); QPDF_DLL void setLogger(std::shared_ptr); - // This deprecated method is the old way to capture output, but it didn't capture all output. - // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it - // configures a private logger, separating this object from the default logger, and calls - // setOutputStreams on that logger. See QPDFLogger.hh for additional details. + /*! \brief This deprecated method is the old way to capture output, but it didn't capture all output. + * See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it + * configures a private logger, separating this object from the default logger, and calls + * setOutputStreams on that logger. See QPDFLogger.hh for additional details. + */ [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); - // If true, ignore any cross-reference streams in a hybrid file (one that contains both - // cross-reference streams and cross-reference tables). This can be useful for testing to - // ensure that a hybrid file would work with an older reader. + /*! \brief If true, ignore any cross-reference streams in a hybrid file (one that contains both + * cross-reference streams and cross-reference tables). This can be useful for testing to + * ensure that a hybrid file would work with an older reader. + */ QPDF_DLL void setIgnoreXRefStreams(bool); - // By default, any warnings are issued to std::cerr or the error stream specified in a call to - // setOutputStreams as they are encountered. If this method is called with a true value, - // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. + /*! \brief By default, any warnings are issued to std::cerr or the error stream specified in a call to + * setOutputStreams as they are encountered. If this method is called with a true value, + * reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. + */ QPDF_DLL void setSuppressWarnings(bool); - // Set the maximum number of warnings. A QPDFExc is thrown if the limit is exceeded. + /*! \brief Set the maximum number of warnings. A QPDFExc is thrown if the limit is exceeded. */ QPDF_DLL void setMaxWarnings(size_t); - // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If - // turned off, it will throw an exception on the first such problem it finds without attempting - // recovery. + /*! \brief By default, QPDF will try to recover if it finds certain types of errors in PDF files. If + * turned off, it will throw an exception on the first such problem it finds without attempting + * recovery. + */ QPDF_DLL void setAttemptRecovery(bool); - // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when - // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object - // makes it possible for the object and its input source to disappear before streams copied from - // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to - // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject - // or addPage, the source object's input source must stick around until after the destination - // PDF is written. If you call this method on the source QPDF object, it sends a signal to the - // destination object that it must fully copy the stream data when copyForeignObject. It will do - // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid - // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only - // once regardless of how many objects the stream is copied into. The result is that, if you - // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, - // you do not need to keep it or its input source around after copying its objects to another - // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method - // is called on the QPDF object you are copying FROM, not the one you are copying to. The - // reasoning for this is that there's no reason a given QPDF may not get objects copied to it - // from a variety of other objects, some transient and some not. Since what's relevant is - // whether the source QPDF is transient, the method must be called on the source QPDF, not the - // destination one. This method will make a copy of the stream in RAM, so be sure you have - // enough memory to simultaneously hold all the streams you're copying. + /*! \brief Tell other QPDF objects that streams copied from this QPDF need to be fully copied when + * copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object + * makes it possible for the object and its input source to disappear before streams copied from + * it are written with the destination QPDF object. Confused? Ordinarily, if you are going to + * copy objects from a source QPDF object to a destination QPDF object using copyForeignObject + * or addPage, the source object's input source must stick around until after the destination + * PDF is written. If you call this method on the source QPDF object, it sends a signal to the + * destination object that it must fully copy the stream data when copyForeignObject. It will do + * this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid + * unnecessary duplication of the stream data. Note that the stream data is copied into RAM only + * once regardless of how many objects the stream is copied into. The result is that, if you + * called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, + * you do not need to keep it or its input source around after copying its objects to another + * QPDF. This is true even if the source streams use StreamDataProvider. Note that this method + * is called on the QPDF object you are copying FROM, not the one you are copying to. The + * reasoning for this is that there's no reason a given QPDF may not get objects copied to it + * from a variety of other objects, some transient and some not. Since what's relevant is + * whether the source QPDF is transient, the method must be called on the source QPDF, not the + * destination one. This method will make a copy of the stream in RAM, so be sure you have + * enough memory to simultaneously hold all the streams you're copying. + */ QPDF_DLL void setImmediateCopyFrom(bool); // Other public methods - // Return the list of warnings that have been issued so far and clear the list. This method may - // be called even if processFile throws an exception. Note that if setSuppressWarnings was not - // called or was called with a false value, any warnings retrieved here will have already been - // output. + /*! \brief Return the list of warnings that have been issued so far and clear the list. This method may + * be called even if processFile throws an exception. Note that if setSuppressWarnings was not + * called or was called with a false value, any warnings retrieved here will have already been + * output. + */ QPDF_DLL std::vector getWarnings(); - // Indicate whether any warnings have been issued so far. Does not clear the list of warnings. + /*! \brief Indicate whether any warnings have been issued so far. Does not clear the list of warnings. */ QPDF_DLL bool anyWarnings() const; - // Indicate the number of warnings that have been issued since the last call to getWarnings. - // Does not clear the list of warnings. + /*! \brief Indicate the number of warnings that have been issued since the last call to getWarnings. + * Does not clear the list of warnings. + */ QPDF_DLL size_t numWarnings() const; - // Return an application-scoped unique ID for this QPDF object. This is not a globally unique - // ID. It is constructed using a timestamp and a random number and is intended to be unique - // among QPDF objects that are created by a single run of an application. While it's very likely - // that these are actually globally unique, it is not recommended to use them for long-term - // purposes. + /*! \brief Return an application-scoped unique ID for this QPDF object. This is not a globally unique + * ID. It is constructed using a timestamp and a random number and is intended to be unique + * among QPDF objects that are created by a single run of an application. While it's very likely + * that these are actually globally unique, it is not recommended to use them for long-term + * purposes. + */ QPDF_DLL unsigned long long getUniqueId() const; - // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, - // following warning suppression rules, and it will be available with getWarnings(). + /*! \brief Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, + * following warning suppression rules, and it will be available with getWarnings(). + */ QPDF_DLL void warn(QPDFExc const& e); - // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename - // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF - // object. + /*! \brief Same as above but creates the QPDFExc object using the arguments passed to warn. The filename + * argument to QPDFExc is omitted. This method uses the filename associated with the QPDF + * object. + */ QPDF_DLL void warn( qpdf_error_code_e error_code, @@ -299,13 +322,13 @@ class QPDF qpdf_offset_t offset, std::string const& message); - // Return the filename associated with the QPDF object. + /*! \brief Return the filename associated with the QPDF object. */ QPDF_DLL std::string getFilename() const; - // Return PDF Version and extension level together as a PDFVersion object + /*! \brief Return PDF Version and extension level together as a PDFVersion object */ QPDF_DLL PDFVersion getVersionAsPDFVersion(); - // Return just the PDF version from the file + /*! \brief Return just the PDF version from the file */ QPDF_DLL std::string getPDFVersion() const; QPDF_DLL @@ -319,118 +342,128 @@ class QPDF // Public factory methods - // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data - // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the - // resulting dictionary may be modified. Alternatively, you can create a new dictionary and - // call replaceDict to install it. + /*! \brief Create a new stream. A subsequent call must be made to replaceStreamData() to provide data + * for the stream. The stream's dictionary may be retrieved by calling getDict(), and the + * resulting dictionary may be modified. Alternatively, you can create a new dictionary and + * call replaceDict to install it. + */ QPDF_DLL QPDFObjectHandle newStream(); - // Create a new stream. Use the given buffer as the stream data. The stream dictionary's - // /Length key will automatically be set to the size of the data buffer. If additional keys are - // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting - // dictionary may be modified. This method is just a convenient wrapper around the newStream() - // and replaceStreamData(). It is a convenience methods for streams that require no parameters - // beyond the stream length. Note that you don't have to deal with compression yourself if you - // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. - // Example programs are provided that illustrate this. + /*! \brief Create a new stream. Use the given buffer as the stream data. The stream dictionary's + * /Length key will automatically be set to the size of the data buffer. If additional keys are + * required, the stream's dictionary may be retrieved by calling getDict(), and the resulting + * dictionary may be modified. This method is just a convenient wrapper around the newStream() + * and replaceStreamData(). It is a convenience methods for streams that require no parameters + * beyond the stream length. Note that you don't have to deal with compression yourself if you + * use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. + * Example programs are provided that illustrate this. + */ QPDF_DLL QPDFObjectHandle newStream(std::shared_ptr data); - // Create new stream with data from string. This method will create a copy of the data rather - // than using the user-provided buffer as in the std::shared_ptr version of newStream. + /*! \brief Create new stream with data from string. This method will create a copy of the data rather + * than using the user-provided buffer as in the std::shared_ptr version of newStream. + */ QPDF_DLL QPDFObjectHandle newStream(std::string const& data); - // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is - // going to be added to the QPDF object. Normally you don't have to use this type since you can - // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects - // with circular references, you may need to create a reserved object so that you can have a - // reference to it and then replace the object later. Reserved objects have the special - // property that they can't be resolved to direct objects. This makes it possible to replace a - // reserved object with a new object while preserving existing references to them. When you are - // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this - // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a - // QPDF with QPDFWriter if it has any reserved objects in it. + /*! \brief A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is + * going to be added to the QPDF object. Normally you don't have to use this type since you can + * just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects + * with circular references, you may need to create a reserved object so that you can have a + * reference to it and then replace the object later. Reserved objects have the special + * property that they can't be resolved to direct objects. This makes it possible to replace a + * reserved object with a new object while preserving existing references to them. When you are + * ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this + * purpose rather than the more general QPDF::replaceObject. It is an error to try to write a + * QPDF with QPDFWriter if it has any reserved objects in it. + */ QPDF_DLL QPDFObjectHandle newReserved(); QPDF_DLL QPDFObjectHandle newIndirectNull(); - // Install this object handle as an indirect object and return an indirect reference to it. + /*! \brief Install this object handle as an indirect object and return an indirect reference to it. */ QPDF_DLL QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); - // Retrieve an object by object ID and generation. Returns an indirect reference to it. The - // getObject() methods were added for qpdf 11. + /*! \brief Retrieve an object by object ID and generation. Returns an indirect reference to it. The + * getObject() methods were added for qpdf 11. + */ QPDF_DLL QPDFObjectHandle getObject(QPDFObjGen); QPDF_DLL QPDFObjectHandle getObject(int objid, int generation); - // These are older methods, but there is no intention to deprecate - // them. + /*! \brief These are older methods, but there is no intention to deprecate + * them. + */ QPDF_DLL QPDFObjectHandle getObjectByObjGen(QPDFObjGen); QPDF_DLL QPDFObjectHandle getObjectByID(int objid, int generation); - // Replace the object with the given object id with the given object. The object handle passed - // in must be a direct object, though it may contain references to other indirect objects within - // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that - // pointed to the original object still pointed to the original object, resulting in confusing - // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will - // start pointing to the newly replaced object. Note that replacing an object with - // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent - // object is treated as a null object. To replace a reserved object, call replaceReserved - // instead. + /*! \brief Replace the object with the given object id with the given object. The object handle passed + * in must be a direct object, though it may contain references to other indirect objects within + * it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that + * pointed to the original object still pointed to the original object, resulting in confusing + * and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will + * start pointing to the newly replaced object. Note that replacing an object with + * QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent + * object is treated as a null object. To replace a reserved object, call replaceReserved + * instead. + */ QPDF_DLL void replaceObject(QPDFObjGen og, QPDFObjectHandle); QPDF_DLL void replaceObject(int objid, int generation, QPDFObjectHandle); - // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that - // reference them objects not notice the swap, but this was fixed in 10.2.1. + /*! \brief Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that + * reference them objects not notice the swap, but this was fixed in 10.2.1. + */ QPDF_DLL void swapObjects(QPDFObjGen og1, QPDFObjGen og2); QPDF_DLL void swapObjects(int objid1, int generation1, int objid2, int generation2); - // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the - // underlying object is a reserved object or a null object. After this call, reserved will - // be a reference to replacement. + /*! \brief Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the + * underlying object is a reserved object or a null object. After this call, reserved will + * be a reference to replacement. + */ QPDF_DLL void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); - // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no - // longer necessary to keep the original QPDF around after the call to copyForeignObject as long - // as the source of any copied stream data is still available. Usually this means you just have - // to keep the input file around, not the QPDF object. The exception to this is if you copy a - // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the - // original stream's QPDF object must stick around because the QPDF object is itself the source - // of the original stream data. For a more in-depth discussion, please see the TODO file. - // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one - // you're copying FROM). If you do this prior to copying any of its objects, then neither the - // source QPDF object nor its input source needs to stick around at all regardless of the - // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is - // called. See setImmediateCopyFrom for more information. - // - // The return value of this method is an indirect reference to the copied object in this file. - // This method is intended to be used to copy non-page objects. To copy page objects, pass the - // foreign page object directly to addPage (or addPageAt). If you copy objects that contain - // references to pages, you should copy the pages first using addPage(At). Otherwise references - // to the pages that have not been copied will be replaced with nulls. It is possible to use - // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies - // the object normally but does not update the page structure. For example, it is a valid use - // case to use copyForeignObject for a page that you are going to turn into a form XObject, - // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. - // - // When copying objects with this method, object structure will be preserved, so all indirectly - // referenced indirect objects will be copied as well. This includes any circular references - // that may exist. The QPDF object keeps a record of what has already been copied, so shared - // objects will not be copied multiple times. This also means that if you mutate an object that - // has already been copied and try to copy it again, it won't work since the modified object - // will not be recopied. Therefore, you should do all mutation on the original file that you - // are going to do before you start copying its objects to a new file. + /*! \brief Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no + * longer necessary to keep the original QPDF around after the call to copyForeignObject as long + * as the source of any copied stream data is still available. Usually this means you just have + * to keep the input file around, not the QPDF object. The exception to this is if you copy a + * stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the + * original stream's QPDF object must stick around because the QPDF object is itself the source + * of the original stream data. For a more in-depth discussion, please see the TODO file. + * Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one + * you're copying FROM). If you do this prior to copying any of its objects, then neither the + * source QPDF object nor its input source needs to stick around at all regardless of the + * source. The cost is that the stream data is copied into RAM at the time copyForeignObject is + * called. See setImmediateCopyFrom for more information. + * + * The return value of this method is an indirect reference to the copied object in this file. + * This method is intended to be used to copy non-page objects. To copy page objects, pass the + * foreign page object directly to addPage (or addPageAt). If you copy objects that contain + * references to pages, you should copy the pages first using addPage(At). Otherwise references + * to the pages that have not been copied will be replaced with nulls. It is possible to use + * copyForeignObject on page objects if you are not going to use them as pages. Doing so copies + * the object normally but does not update the page structure. For example, it is a valid use + * case to use copyForeignObject for a page that you are going to turn into a form XObject, + * though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. + * + * When copying objects with this method, object structure will be preserved, so all indirectly + * referenced indirect objects will be copied as well. This includes any circular references + * that may exist. The QPDF object keeps a record of what has already been copied, so shared + * objects will not be copied multiple times. This also means that if you mutate an object that + * has already been copied and try to copy it again, it won't work since the modified object + * will not be recopied. Therefore, you should do all mutation on the original file that you + * are going to do before you start copying its objects to a new file. + */ QPDF_DLL QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); @@ -438,8 +471,9 @@ class QPDF enum encryption_method_e { e_none, e_unknown, e_rc4, e_aes, e_aesv3 }; - // To be removed from the public API in qpdf 13. See - // . + /*! \brief To be removed from the public API in qpdf 13. See + * . + */ class EncryptionData { public: @@ -548,8 +582,9 @@ class QPDF QPDF_DLL bool allowModifyAll(); - // Helper function to trim padding from user password. Calling trim_user_password on the result - // of getPaddedUserPassword gives getTrimmedUserPassword's result. + /*! \brief Helper function to trim padding from user password. Calling trim_user_password on the result + * of getPaddedUserPassword gives getTrimmedUserPassword's result. + */ QPDF_DLL static void trim_user_password(std::string& user_password); QPDF_DLL @@ -561,7 +596,7 @@ class QPDF int encryption_V, int encryption_R); - // To be removed in qpdf 13. See . + /*! \brief To be removed in qpdf 13. See . */ [[deprecated("to be removed in qpdf 13")]] QPDF_DLL static std::string compute_encryption_key(std::string const& password, EncryptionData const& data); @@ -594,94 +629,108 @@ class QPDF std::string& OE, std::string& UE, std::string& Perms); - // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or - // 128-bit keys, the user password can be recovered when the file is opened using the owner - // password. This is not possible with newer encryption formats. If you are attempting to - // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. + + /*! \brief Return the full user password as stored in the PDF file. For files encrypted with 40-bit or + * 128-bit keys, the user password can be recovered when the file is opened using the owner + * password. This is not possible with newer encryption formats. If you are attempting to + * recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. + */ QPDF_DLL std::string const& getPaddedUserPassword() const; - // Return human-readable form of user password subject to same limitations as - // getPaddedUserPassword(). + + /*! \brief Return human-readable form of user password subject to same limitations as + * getPaddedUserPassword(). + */ QPDF_DLL std::string getTrimmedUserPassword() const; - // Return the previously computed or retrieved encryption key for this file + + /*! \brief Return the previously computed or retrieved encryption key for this file */ QPDF_DLL std::string getEncryptionKey() const; - // Remove security restrictions associated with digitally signed files. From qpdf 11.7.0, this - // is called by QPDFAcroFormDocumentHelper::disableDigitalSignatures and is more useful when - // called from there than when just called by itself. + /*! \brief Remove security restrictions associated with digitally signed files. From qpdf 11.7.0, this + * is called by QPDFAcroFormDocumentHelper::disableDigitalSignatures and is more useful when + * called from there than when just called by itself. + */ QPDF_DLL void removeSecurityRestrictions(); // Linearization support - // Returns true iff the file starts with a linearization parameter dictionary. Does no - // additional validation. + /*! \brief Returns true iff the file starts with a linearization parameter dictionary. Does no + * additional validation. + */ QPDF_DLL bool isLinearized(); - // Performs various sanity checks on a linearized file. Return true if no errors or warnings. - // Otherwise, return false and output errors and warnings to the default output stream - // (std::cout or whatever is configured in the logger). It is recommended for linearization - // errors to be treated as warnings. + /*! \brief Performs various sanity checks on a linearized file. Return true if no errors or warnings. + * Otherwise, return false and output errors and warnings to the default output stream + * (std::cout or whatever is configured in the logger). It is recommended for linearization + * errors to be treated as warnings. + */ QPDF_DLL bool checkLinearization(); - // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints - // tables to the default output stream. Normalization includes adding min values to delta values - // and adjusting offsets based on the location and size of the primary hint stream. + /*! \brief Calls checkLinearization() and, if possible, prints normalized contents of some of the hints + * tables to the default output stream. Normalization includes adding min values to delta values + * and adjusting offsets based on the location and size of the primary hint stream. + */ QPDF_DLL void showLinearizationData(); - // Shows the contents of the cross-reference table + /*! \brief Shows the contents of the cross-reference table. */ QPDF_DLL void showXRefTable(); - // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this - // method was used to detect all indirect references to objects that don't exist and resolve - // them by replacing them with null, which is how the PDF spec says to interpret such dangling - // references. This method is called automatically when you try to add any new objects, if you - // call getAllObjects, and before a file is written. The qpdf object caches whether it has run - // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run - // again if you had explicitly added new objects that may have additional dangling references. + /*! \brief Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this + * method was used to detect all indirect references to objects that don't exist and resolve + * them by replacing them with null, which is how the PDF spec says to interpret such dangling + * references. This method is called automatically when you try to add any new objects, if you + * call getAllObjects, and before a file is written. The qpdf object caches whether it has run + * this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run + * again if you had explicitly added new objects that may have additional dangling references. + * QPDF_DLL void fixDanglingReferences(bool force = false); - // Return the approximate number of indirect objects. It is/ approximate because not all objects - // in the file are preserved in all cases, and gaps in object numbering are not preserved. + /*! \brief Return the approximate number of indirect objects. It is/ approximate because not all objects + * in the file are preserved in all cases, and gaps in object numbering are not preserved. + */ QPDF_DLL size_t getObjectCount(); - // Returns a list of indirect objects for every object in the xref table. Useful for discovering - // objects that are not otherwise referenced. + /*! \brief Returns a list of indirect objects for every object in the xref table. Useful for discovering + * objects that are not otherwise referenced. + */ QPDF_DLL std::vector getAllObjects(); // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc - // The object_stream_data map maps from a "compressed" object to the object stream that contains - // it. This enables optimize to populate the object <-> user maps with only uncompressed - // objects. If allow_changes is false, an exception will be thrown if any changes are made - // during the optimization process. This is available so that the test suite can make sure that - // a linearized file is already optimized. When called in this way, optimize() still populates - // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is - // called for each stream object. The function should return 2 if optimization should discard - // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should - // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for - // stream dictionary keys it will be regenerating. + /*! \brief The object_stream_data map maps from a "compressed" object to the object stream that contains + * it. This enables optimize to populate the object <-> user maps with only uncompressed + * objects. If allow_changes is false, an exception will be thrown if any changes are made + * during the optimization process. This is available so that the test suite can make sure that + * a linearized file is already optimized. When called in this way, optimize() still populates + * the object <-> user maps. The optional skip_stream_parameters parameter, if present, is + * called for each stream object. The function should return 2 if optimization should discard + * /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should + * preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for + * stream dictionary keys it will be regenerating. + */ [[deprecated("Unused - see release notes for qpdf 12.1.0")]] QPDF_DLL void optimize( std::map const& object_stream_data, bool allow_changes = true, std::function skip_stream_parameters = nullptr); - // Traverse page tree return all /Page objects. It also detects and resolves cases in which the - // same /Page object is duplicated. For efficiency, this method returns a const reference to an - // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but - // direct manipulation of the pages tree or pushing inheritable objects to the page level may - // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should - // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache - // was arguably incorrect, but it is being left here for compatibility. It is, however, - // completely safe to use this for files that you are not modifying. + /*! \brief Traverse page tree return all /Page objects. It also detects and resolves cases in which the + * same /Page object is duplicated. For efficiency, this method returns a const reference to an + * internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but + * direct manipulation of the pages tree or pushing inheritable objects to the page level may + * invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should + * use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache + * was arguably incorrect, but it is being left here for compatibility. It is, however, + * completely safe to use this for files that you are not modifying. + */ QPDF_DLL std::vector const& getAllPages(); @@ -690,32 +739,35 @@ class QPDF QPDF_DLL bool everPushedInheritedAttributesToPages() const; - // These methods, given a page object or its object/generation number, returns the 0-based index - // into the array returned by getAllPages() for that page. An exception is thrown if the page is - // not found. + /*! \brief These methods, given a page object or its object/generation number, returns the 0-based index + * into the array returned by getAllPages() for that page. An exception is thrown if the page is + * not found. + */ QPDF_DLL int findPage(QPDFObjGen og); QPDF_DLL int findPage(QPDFObjectHandle& page); - // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If - // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages - // or changes to any /Pages objects, to calls to these page handling APIs, you never need to - // call this method. If you modify /Pages structures directly, you must call this method - // afterwards. This method updates the internal list of pages, so after calling this method, - // any previous references returned by getAllPages() will be valid again. It also resets any - // state about having pushed inherited attributes in /Pages objects down to the pages, so if you - // add any inheritable attributes to a /Pages object, you should also call this method. + /*! \brief This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If + * you restrict changes to the /Pages tree, including addition, removal, or replacement of pages + * or changes to any /Pages objects, to calls to these page handling APIs, you never need to + * call this method. If you modify /Pages structures directly, you must call this method + * afterwards. This method updates the internal list of pages, so after calling this method, + * any previous references returned by getAllPages() will be valid again. It also resets any + * state about having pushed inherited attributes in /Pages objects down to the pages, so if you + * add any inheritable attributes to a /Pages object, you should also call this method. + */ QPDF_DLL void updateAllPagesCache(); - // Legacy handling API. These methods are not going anywhere, and you should feel free to - // continue using them if it simplifies your code. Newer code should make use of - // QPDFPageDocumentHelper instead as future page handling methods will be added there. The - // functionality and specification of these legacy methods is identical to the identically named - // methods there, except that these versions use QPDFObjectHandle instead of - // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are - // subtleties you need to know about, so please look at the comments there. + /*! \brief Legacy handling API. These methods are not going anywhere, and you should feel free to + * continue using them if it simplifies your code. Newer code should make use of + * QPDFPageDocumentHelper instead as future page handling methods will be added there. The + * functionality and specification of these legacy methods is identical to the identically named + * methods there, except that these versions use QPDFObjectHandle instead of + * QPDFPageObjectHelper, so please see comments in that file for descriptions. There are + * subtleties you need to know about, so please look at the comments there. + */ QPDF_DLL void pushInheritedAttributesToPage(); QPDF_DLL @@ -736,10 +788,11 @@ class QPDF static bool test_json_validators(); private: - // It has never been safe to copy QPDF objects as there is code in the library that assumes - // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until - // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr instead. From qpdf - // 11, you can use QPDF::create to create them. + /*! \brief It has never been safe to copy QPDF objects as there is code in the library that assumes + * there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until + * qpdf 11. If you have been copying QPDF objects, use std::shared_ptr instead. From qpdf + * 11, you can use QPDF::create to create them. + */ QPDF(QPDF const&) = delete; QPDF& operator=(QPDF const&) = delete; @@ -753,7 +806,7 @@ class QPDF void removeObject(QPDFObjGen og); - // Calls finish() on the pipeline when done but does not delete it + /*! \brief Calls finish() on the pipeline when done but does not delete it */ bool pipeStreamData( QPDFObjGen og, qpdf_offset_t offset, @@ -763,6 +816,7 @@ class QPDF Pipeline* pipeline, bool suppress_warnings, bool will_retry); + static bool pipeStreamData( std::shared_ptr encp, std::shared_ptr file, @@ -776,7 +830,7 @@ class QPDF bool suppress_warnings, bool will_retry); - // methods to support encryption -- implemented in QPDF_encryption.cc + /*! \brief methods to support encryption -- implemented in QPDF_encryption.cc */ void initializeEncryption(); static std::string getKeyForObject(std::shared_ptr encp, QPDFObjGen og, bool use_aes); @@ -796,8 +850,9 @@ class QPDF class Members; - // Keep all member variables inside the Members object, which we dynamically allocate. This - // makes it possible to add new private members without breaking binary compatibility. + /*! \brief Keep all member variables inside the Members object, which we dynamically allocate. This + * makes it possible to add new private members without breaking binary compatibility. + */ std::unique_ptr m; };