QPDFWriter.hh
21.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
// Copyright (c) 2005-2021 Jay Berkenbilt
// Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under
// the License.
//
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
// Please see the manual for additional information.
#ifndef QPDFWRITER_HH
#define QPDFWRITER_HH
#include <qpdf/Constants.h>
#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <qpdf/Buffer.hh>
#include <qpdf/PDFVersion.hh>
#include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QPDFObjGen.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFXRefEntry.hh>
#include <bitset>
#include <cstdio>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <string_view>
#include <vector>
namespace qpdf
{
class Writer;
}
class QPDF;
// This class implements a simple writer for saving QPDF objects to new PDF files. See comments
// through the header file for additional details.
class QPDFWriter
{
public:
// Construct a QPDFWriter object without specifying output. You must call one of the output
// setting routines defined below.
QPDF_DLL
QPDFWriter(QPDF& pdf);
// Create a QPDFWriter object that writes its output to a file or to stdout. This is equivalent
// to using the previous constructor and then calling setOutputFilename(). See
// setOutputFilename() for details.
QPDF_DLL
QPDFWriter(QPDF& pdf, char const* filename);
// Create a QPDFWriter object that writes its output to an already open FILE*. This is
// equivalent to calling the first constructor and then calling setOutputFile(). See
// setOutputFile() for details.
QPDF_DLL
QPDFWriter(QPDF& pdf, char const* description, FILE* file, bool close_file);
~QPDFWriter() = default;
class QPDF_DLL_CLASS ProgressReporter
{
public:
QPDF_DLL
virtual ~ProgressReporter();
// This method is called with a value from 0 to 100 to indicate approximate progress through
// the write process. See registerProgressReporter.
virtual void reportProgress(int) = 0;
};
// This is a progress reporter that takes a function. It is used by the C APIs, but it is
// available if you want to just register a C function as a handler.
class QPDF_DLL_CLASS FunctionProgressReporter: public ProgressReporter
{
public:
QPDF_DLL
FunctionProgressReporter(std::function<void(int)>);
QPDF_DLL
~FunctionProgressReporter() override;
QPDF_DLL
void reportProgress(int) override;
private:
std::function<void(int)> handler;
};
// Setting Output. Output may be set only one time. If you don't use the filename version of
// the QPDFWriter constructor, you must call exactly one of these methods.
// Passing nullptr as filename means write to stdout. QPDFWriter will create a zero-length
// output file upon construction. If write fails, the empty or partially written file will not
// be deleted. This is by design: sometimes the partial file may be useful for tracking down
// problems. If your application doesn't want the partially written file to be left behind, you
// should delete it if the eventual call to write fails.
QPDF_DLL
void setOutputFilename(char const* filename);
// Write to the given FILE*, which must be opened by the caller. If close_file is true,
// QPDFWriter will close the file. Otherwise, the caller must close the file. The file does not
// need to be seekable; it will be written to in a single pass. It must be open in binary mode.
QPDF_DLL
void setOutputFile(char const* description, FILE* file, bool close_file);
// Indicate that QPDFWriter should create a memory buffer to contain the final PDF file. Obtain
// the memory by calling getBuffer().
QPDF_DLL
void setOutputMemory();
// Return the buffer object containing the PDF file. If setOutputMemory() has been called, this
// method may be called exactly one time after write() has returned. The caller is responsible
// for deleting the buffer when done. See also getBufferSharedPointer().
QPDF_DLL
Buffer* getBuffer();
// Return getBuffer() in a shared pointer.
QPDF_DLL
std::shared_ptr<Buffer> getBufferSharedPointer();
// Supply your own pipeline object. Output will be written to this pipeline, and QPDFWriter
// will call finish() on the pipeline. It is the caller's responsibility to manage the memory
// for the pipeline. The pipeline is never deleted by QPDFWriter, which makes it possible for
// you to call additional methods on the pipeline after the writing is finished.
QPDF_DLL
void setOutputPipeline(Pipeline*);
// Setting Parameters
// Set the value of object stream mode. In disable mode, we never generate any object streams.
// In preserve mode, we preserve object stream structure from the original file. In generate
// mode, we generate our own object streams. In all cases, we generate a conventional
// cross-reference table if there are no object streams and a cross-reference stream if there
// are object streams. The default is o_preserve.
QPDF_DLL
void setObjectStreamMode(qpdf_object_stream_e);
// Set value of stream data mode. This is an older interface. Instead of using this, prefer
// setCompressStreams() and setDecodeLevel(). This method is retained for compatibility, but it
// does not cover the full range of available configurations. The mapping between this and the
// new methods is as follows:
//
// qpdf_s_uncompress:
// setCompressStreams(false)
// setDecodeLevel(qpdf_dl_generalized)
// qpdf_s_preserve:
// setCompressStreams(false)
// setDecodeLevel(qpdf_dl_none)
// qpdf_s_compress:
// setCompressStreams(true)
// setDecodeLevel(qpdf_dl_generalized)
//
// The default is qpdf_s_compress.
QPDF_DLL
void setStreamDataMode(qpdf_stream_data_e);
// If true, compress any uncompressed streams when writing them. Metadata streams are a special
// case and are not compressed even if this is true. This is true by default for QPDFWriter. If
// you want QPDFWriter to leave uncompressed streams uncompressed, pass false to this method.
QPDF_DLL
void setCompressStreams(bool);
// When QPDFWriter encounters streams, this parameter controls the behavior with respect to
// attempting to apply any filters to the streams when copying to the output. The decode levels
// are as follows:
//
// qpdf_dl_none: Do not attempt to apply any filters. Streams remain as they appear in the
// original file. Note that uncompressed streams may still be compressed on output. You can
// disable that by calling setCompressStreams(false).
//
// qpdf_dl_generalized: This is the default. QPDFWriter will apply LZWDecode, ASCII85Decode,
// ASCIIHexDecode, and FlateDecode filters on the input. When combined with
// setCompressStreams(true), which is the default, the effect of this is that streams filtered
// with these older and less efficient filters will be recompressed with the Flate filter. By
// default, as a special case, if a stream is already compressed with FlateDecode and
// setCompressStreams is enabled, the original compressed data will be preserved. This behavior
// can be overridden by calling setRecompressFlate(true).
//
// qpdf_dl_specialized: In addition to uncompressing the generalized compression formats,
// supported non-lossy compression will also be decoded. At present, this includes the
// RunLengthDecode filter.
//
// qpdf_dl_all: In addition to generalized and non-lossy specialized filters, supported lossy
// compression filters will be applied. At present, this includes DCTDecode (JPEG) compression.
// Note that compressing the resulting data with DCTDecode again will accumulate loss, so avoid
// multiple compression and decompression cycles. This is mostly useful for retrieving image
// data.
QPDF_DLL
void setDecodeLevel(qpdf_stream_decode_level_e);
// By default, when both the input and output contents of a stream are compressed with Flate,
// qpdf does not uncompress and recompress the stream. Passing true here causes it to do so.
// This can be useful if recompressing all streams with a higher compression level, which can be
// set by calling the static method Pl_Flate::setCompressionLevel.
QPDF_DLL
void setRecompressFlate(bool);
// Set value of content stream normalization. The default is "false". If true, we attempt to
// normalize newlines inside of content streams. Some constructs such as inline images may
// thwart our efforts. There may be some cases where this can damage the content stream. This
// flag should be used only for debugging and experimenting with PDF content streams. Never use
// it for production files.
QPDF_DLL
void setContentNormalization(bool);
// Set QDF mode. QDF mode causes special "pretty printing" of PDF objects, adds comments for
// easier perusing of files. Resulting PDF files can be edited in a text editor and then run
// through fix-qdf to update cross reference tables and stream lengths.
QPDF_DLL
void setQDFMode(bool);
// Preserve unreferenced objects. The default behavior is to discard any object that is not
// visited during a traversal of the object structure from the trailer.
QPDF_DLL
void setPreserveUnreferencedObjects(bool);
// Always write a newline before the endstream keyword. This helps with PDF/A compliance, though
// it is not sufficient for it.
QPDF_DLL
void setNewlineBeforeEndstream(bool);
// Set the minimum PDF version. If the PDF version of the input file (or previously set minimum
// version) is less than the version passed to this method, the PDF version of the output file
// will be set to this value. If the original PDF file's version or previously set minimum
// version is already this version or later, the original file's version will be used.
// QPDFWriter automatically sets the minimum version to 1.4 when R3 encryption parameters are
// used, and to 1.5 when object streams are used.
QPDF_DLL
void setMinimumPDFVersion(std::string const&, int extension_level = 0);
QPDF_DLL
void setMinimumPDFVersion(PDFVersion const&);
// Force the PDF version of the output file to be a given version. Use of this function may
// create PDF files that will not work properly with older PDF viewers. When a PDF version is
// set using this function, qpdf will use this version even if the file contains features that
// are not supported in that version of PDF. In other words, you should only use this function
// if you are sure the PDF file in question has no features of newer versions of PDF or if you
// are willing to create files that old viewers may try to open but not be able to properly
// interpret. If any encryption has been applied to the document either explicitly or by
// preserving the encryption of the source document, forcing the PDF version to a value too low
// to support that type of encryption will explicitly disable decryption. Additionally, forcing
// to a version below 1.5 will disable object streams.
QPDF_DLL
void forcePDFVersion(std::string const&, int extension_level = 0);
// Provide additional text to insert in the PDF file somewhere near the beginning of the file.
// This can be used to add comments to the beginning of a PDF file, for example, if those
// comments are to be consumed by some other application. No checks are performed to ensure
// that the text inserted here is valid PDF. If you want to insert multiline comments, you will
// need to include \n in the string yourself and start each line with %. An extra newline will
// be appended if one is not already present at the end of your text.
QPDF_DLL
void setExtraHeaderText(std::string const&);
// Causes a deterministic /ID value to be generated. When this is set, the current time and
// output file name are not used as part of /ID generation. Instead, a digest of all significant
// parts of the output file's contents is included in the /ID calculation. Use of a
// deterministic /ID can be handy when it is desirable for a repeat of the same qpdf operation
// on the same inputs being written to the same outputs with the same parameters to generate
// exactly the same results. This feature is incompatible with encrypted files because, for
// encrypted files, the /ID is generated before any part of the file is written since it is an
// input to the encryption process.
QPDF_DLL
void setDeterministicID(bool);
// Cause a static /ID value to be generated. Use only in test suites. See also
// setDeterministicID.
QPDF_DLL
void setStaticID(bool);
// Use a fixed initialization vector for AES-CBC encryption. This is not secure. It should be
// used only in test suites for creating predictable encrypted output.
QPDF_DLL
void setStaticAesIV(bool);
// Suppress inclusion of comments indicating original object IDs when writing QDF files. This
// can also be useful for testing, particularly when using comparison of two qdf files to
// determine whether two PDF files have identical content.
QPDF_DLL
void setSuppressOriginalObjectIDs(bool);
// Preserve encryption. The default is true unless prefiltering, content normalization, or qdf
// mode has been selected in which case encryption is never preserved. Encryption is also not
// preserved if we explicitly set encryption parameters.
QPDF_DLL
void setPreserveEncryption(bool);
// Copy encryption parameters from another QPDF object. If you want to copy encryption from the
// object you are writing, call setPreserveEncryption(true) instead.
QPDF_DLL
void copyEncryptionParameters(QPDF&);
// Set up for encrypted output. User and owner password both must be specified. Either or both
// may be the empty string. Note that qpdf does not apply any special treatment to the empty
// string, which makes it possible to create encrypted files with empty owner passwords and
// non-empty user passwords or with the same password for both user and owner. Some PDF reading
// products don't handle such files very well. Enabling encryption disables stream prefiltering
// and content normalization. Note that setting R2 encryption parameters sets the PDF version
// to at least 1.3, setting R3 encryption parameters pushes the PDF version number to at
// least 1.4, setting R4 parameters pushes the version to at least 1.5, or if AES is used, 1.6,
// and setting R5 or R6 parameters pushes the version to at least 1.7 with extension level 3.
//
// Note about Unicode passwords: the PDF specification requires passwords to be encoded with PDF
// Doc encoding for R <= 4 and UTF-8 for R >= 5. In all cases, these methods take strings of
// bytes as passwords. It is up to the caller to ensure that passwords are properly encoded. The
// qpdf command-line tool tries to do this, as discussed in the manual. If you are doing this
// from your own application, QUtil contains many transcoding functions that could be useful to
// you, most notably utf8_to_pdf_doc.
// R2 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See
// "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0
// specification.
QPDF_DLL
void setR2EncryptionParametersInsecure(
char const* user_password,
char const* owner_password,
bool allow_print,
bool allow_modify,
bool allow_extract,
bool allow_annotate);
// R3 uses RC4, which is a weak cryptographic algorithm. Don't use it unless you have to. See
// "Weak Cryptography" in the manual. This encryption format is deprecated in the PDF 2.0
// specification.
QPDF_DLL
void setR3EncryptionParametersInsecure(
char const* user_password,
char const* owner_password,
bool allow_accessibility,
bool allow_extract,
bool allow_assemble,
bool allow_annotate_and_form,
bool allow_form_filling,
bool allow_modify_other,
qpdf_r3_print_e print);
// When use_aes=false, this call enables R4 with RC4, which is a weak cryptographic algorithm.
// Even with use_aes=true, the overall encryption scheme is weak. Don't use it unless you have
// to. See "Weak Cryptography" in the manual. This encryption format is deprecated in the
// PDF 2.0 specification.
QPDF_DLL
void setR4EncryptionParametersInsecure(
char const* user_password,
char const* owner_password,
bool allow_accessibility,
bool allow_extract,
bool allow_assemble,
bool allow_annotate_and_form,
bool allow_form_filling,
bool allow_modify_other,
qpdf_r3_print_e print,
bool encrypt_metadata,
bool use_aes);
// R5 is deprecated. Do not use it for production use. Writing R5 is supported by qpdf
// primarily to generate test files for applications that may need to test R5 support.
QPDF_DLL
void setR5EncryptionParameters(
char const* user_password,
char const* owner_password,
bool allow_accessibility,
bool allow_extract,
bool allow_assemble,
bool allow_annotate_and_form,
bool allow_form_filling,
bool allow_modify_other,
qpdf_r3_print_e print,
bool encrypt_metadata);
// This is the only password-based encryption format supported by the PDF specification.
QPDF_DLL
void setR6EncryptionParameters(
char const* user_password,
char const* owner_password,
bool allow_accessibility,
bool allow_extract,
bool allow_assemble,
bool allow_annotate_and_form,
bool allow_form_filling,
bool allow_modify_other,
qpdf_r3_print_e print,
bool encrypt_metadata_aes);
// Create linearized output. Disables qdf mode, content normalization, and stream prefiltering.
QPDF_DLL
void setLinearization(bool);
// For debugging QPDF: provide the name of a file to write pass1 of linearization to. The only
// reason to use this is to debug QPDF. To linearize, QPDF writes out the file in two passes.
// Usually the first pass is discarded, but lots of computations are made in pass 1. If a
// linearized file comes out wrong, it can be helpful to look at the first pass.
QPDF_DLL
void setLinearizationPass1Filename(std::string const&);
// Create PCLm output. This is only useful for clients that know how to create PCLm files. If a
// file is structured exactly as PCLm requires, this call will tell QPDFWriter to write the PCLm
// header, create certain unreferenced streams required by the standard, and write the objects
// in the required order. Calling this on an ordinary PDF serves no purpose. There is no
// command-line argument that causes this method to be called.
QPDF_DLL
void setPCLm(bool);
// If you want to be notified of progress, derive a class from ProgressReporter and override the
// reportProgress method.
QPDF_DLL
void registerProgressReporter(std::shared_ptr<ProgressReporter>);
// Return the PDF version that will be written into the header. Calling this method does all the
// preparation for writing, so it is an error to call any methods that may cause a change to the
// version. Adding new objects to the original file after calling this may also cause problems.
// It is safe to update existing objects or stream contents after calling this method, e.g., to
// include the final version number in metadata.
QPDF_DLL
std::string getFinalVersion();
// Write the final file. There is no expectation of being able to call write() more than once.
QPDF_DLL
void write();
// Return renumbered ObjGen that was written into the final file. This method can be used after
// calling write().
QPDF_DLL
QPDFObjGen getRenumberedObjGen(QPDFObjGen);
// Return XRef entry that was written into the final file. This method can be used after calling
// write().
QPDF_DLL
std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
// The following structs / classes are not part of the public API.
struct Object;
struct NewObject;
class ObjTable;
class NewObjTable;
private:
friend class qpdf::Writer;
class Members;
std::shared_ptr<Members> m;
};
#endif // QPDFWRITER_HH